servalcat 0.4.131__cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- servalcat/__init__.py +10 -0
- servalcat/__main__.py +120 -0
- servalcat/ext.cpython-314t-x86_64-linux-gnu.so +0 -0
- servalcat/refine/__init__.py +0 -0
- servalcat/refine/cgsolve.py +100 -0
- servalcat/refine/refine.py +1162 -0
- servalcat/refine/refine_geom.py +245 -0
- servalcat/refine/refine_spa.py +400 -0
- servalcat/refine/refine_xtal.py +339 -0
- servalcat/refine/spa.py +151 -0
- servalcat/refine/xtal.py +312 -0
- servalcat/refmac/__init__.py +0 -0
- servalcat/refmac/exte.py +191 -0
- servalcat/refmac/refmac_keywords.py +660 -0
- servalcat/refmac/refmac_wrapper.py +423 -0
- servalcat/spa/__init__.py +0 -0
- servalcat/spa/fofc.py +488 -0
- servalcat/spa/fsc.py +391 -0
- servalcat/spa/localcc.py +197 -0
- servalcat/spa/realspcc_from_var.py +128 -0
- servalcat/spa/run_refmac.py +979 -0
- servalcat/spa/shift_maps.py +293 -0
- servalcat/spa/shiftback.py +137 -0
- servalcat/spa/translate.py +129 -0
- servalcat/utils/__init__.py +35 -0
- servalcat/utils/commands.py +1629 -0
- servalcat/utils/fileio.py +836 -0
- servalcat/utils/generate_operators.py +296 -0
- servalcat/utils/hkl.py +811 -0
- servalcat/utils/logger.py +140 -0
- servalcat/utils/maps.py +345 -0
- servalcat/utils/model.py +933 -0
- servalcat/utils/refmac.py +759 -0
- servalcat/utils/restraints.py +888 -0
- servalcat/utils/symmetry.py +298 -0
- servalcat/xtal/__init__.py +0 -0
- servalcat/xtal/french_wilson.py +262 -0
- servalcat/xtal/run_refmac_small.py +240 -0
- servalcat/xtal/sigmaa.py +1954 -0
- servalcat/xtal/twin.py +316 -0
- servalcat-0.4.131.dist-info/METADATA +60 -0
- servalcat-0.4.131.dist-info/RECORD +45 -0
- servalcat-0.4.131.dist-info/WHEEL +6 -0
- servalcat-0.4.131.dist-info/entry_points.txt +4 -0
- servalcat-0.4.131.dist-info/licenses/LICENSE +373 -0
servalcat/xtal/sigmaa.py
ADDED
|
@@ -0,0 +1,1954 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Author: "Keitaro Yamashita, Garib N. Murshudov"
|
|
3
|
+
MRC Laboratory of Molecular Biology
|
|
4
|
+
|
|
5
|
+
This software is released under the
|
|
6
|
+
Mozilla Public License, version 2.0; see LICENSE.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import absolute_import, division, print_function, generators
|
|
9
|
+
import argparse
|
|
10
|
+
import gemmi
|
|
11
|
+
import numpy
|
|
12
|
+
import pandas
|
|
13
|
+
import itertools
|
|
14
|
+
import time
|
|
15
|
+
import scipy.special
|
|
16
|
+
import scipy.optimize
|
|
17
|
+
from servalcat.utils import logger
|
|
18
|
+
from servalcat import utils
|
|
19
|
+
from servalcat import ext
|
|
20
|
+
from servalcat.xtal.twin import find_twin_domains_from_data, estimate_twin_fractions_from_model, mlopt_twin_fractions
|
|
21
|
+
|
|
22
|
+
"""
|
|
23
|
+
DFc = sum_j D_j F_c,j
|
|
24
|
+
The last Fc,n is bulk solvent contribution.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
integr = ext.IntensityIntegrator()
|
|
28
|
+
|
|
29
|
+
def add_arguments(parser):
|
|
30
|
+
parser.description = 'Sigma-A parameter estimation for crystallographic data'
|
|
31
|
+
parser.add_argument('--hklin', required=True,
|
|
32
|
+
help='Input MTZ file')
|
|
33
|
+
parser.add_argument('--hklin_free',
|
|
34
|
+
help='Input MTZ file for test flags')
|
|
35
|
+
parser.add_argument('--spacegroup',
|
|
36
|
+
help='Override space group')
|
|
37
|
+
parser.add_argument('--labin',
|
|
38
|
+
help='MTZ columns of --hklin for F,SIGF,FREE')
|
|
39
|
+
parser.add_argument('--labin_free',
|
|
40
|
+
help='MTZ column of --hklin_free')
|
|
41
|
+
parser.add_argument('--free', type=int,
|
|
42
|
+
help='flag number for test set')
|
|
43
|
+
parser.add_argument('--model', required=True, nargs="+", action="append",
|
|
44
|
+
help='Input atomic model file(s)')
|
|
45
|
+
parser.add_argument("-d", '--d_min', type=float)
|
|
46
|
+
parser.add_argument('--d_max', type=float)
|
|
47
|
+
parser.add_argument('--nbins', type=int,
|
|
48
|
+
help="Number of bins for statistics (default: auto)")
|
|
49
|
+
parser.add_argument('--nbins_ml', type=int,
|
|
50
|
+
help="Number of bins for ML parameters (default: auto)")
|
|
51
|
+
parser.add_argument('-s', '--source', choices=["electron", "xray", "neutron"], required=True,
|
|
52
|
+
help="Scattering factor choice")
|
|
53
|
+
parser.add_argument("--wavelength", type=float, help="For f_prime")
|
|
54
|
+
parser.add_argument('--D_trans', choices=["exp", "splus"],
|
|
55
|
+
help="estimate D with positivity constraint")
|
|
56
|
+
parser.add_argument('--S_trans', choices=["exp", "splus"],
|
|
57
|
+
help="estimate variance of unexplained signal with positivity constraint")
|
|
58
|
+
parser.add_argument('--no_solvent', action='store_true',
|
|
59
|
+
help="Do not consider bulk solvent contribution")
|
|
60
|
+
parser.add_argument('--use_cc', action='store_true',
|
|
61
|
+
help="Use CC(|F1|,|F2|) to CC(F1,F2) conversion to derive D and S")
|
|
62
|
+
parser.add_argument('--use', choices=["all", "work", "test"], default="all",
|
|
63
|
+
help="Which reflections to be used for the parameter estimate.")
|
|
64
|
+
parser.add_argument('--twin', action="store_true", help="Turn on twin refinement")
|
|
65
|
+
parser.add_argument('--twin_mlalpha', action="store_true", help="Use ML optimisation for twin fractions")
|
|
66
|
+
parser.add_argument('--mask',
|
|
67
|
+
help="A solvent mask (by default calculated from the coordinates)")
|
|
68
|
+
parser.add_argument('--keep_charges', action='store_true',
|
|
69
|
+
help="Use scattering factor for charged atoms. Use it with care.")
|
|
70
|
+
parser.add_argument('-o','--output_prefix', default="sigmaa",
|
|
71
|
+
help='output file name prefix (default: %(default)s)')
|
|
72
|
+
# add_arguments()
|
|
73
|
+
|
|
74
|
+
def parse_args(arg_list):
|
|
75
|
+
parser = argparse.ArgumentParser()
|
|
76
|
+
add_arguments(parser)
|
|
77
|
+
return parser.parse_args(arg_list)
|
|
78
|
+
# parse_args()
|
|
79
|
+
|
|
80
|
+
def nanaverage(cc, w):
|
|
81
|
+
sel = ~numpy.isnan(cc)
|
|
82
|
+
if numpy.sum(w[sel]) == 0:
|
|
83
|
+
return numpy.nan
|
|
84
|
+
return numpy.average(cc[sel], weights=w[sel])
|
|
85
|
+
|
|
86
|
+
def calc_r_and_cc(hkldata, twin_data=None):
|
|
87
|
+
has_int = "I" in hkldata.df
|
|
88
|
+
has_free = "FREE" in hkldata.df
|
|
89
|
+
has_llw = (hkldata.df.llweight != 1.0).any()
|
|
90
|
+
has_ano = not twin_data and ("I(+)" if has_int else "F(+)") in hkldata.df and "FC''" in hkldata.df
|
|
91
|
+
rlab = "R1" if has_int else "R"
|
|
92
|
+
cclab = "CCI" if has_int else "CCF"
|
|
93
|
+
olab = "Io" if has_int else "Fo"
|
|
94
|
+
clab = "Ic" if has_int else "Fc"
|
|
95
|
+
stats = hkldata.binned_df["stat"].copy()
|
|
96
|
+
stats[[f"Mn({olab})", f"Mn({clab})"]] = numpy.nan
|
|
97
|
+
stats[["n_obs", "n_all"]] = 0
|
|
98
|
+
if has_free:
|
|
99
|
+
stats[["n_work", "n_free"]] = 0
|
|
100
|
+
if has_llw:
|
|
101
|
+
for suf in ("_llw=0", "_llw>0"):
|
|
102
|
+
stats["n"+suf] = 0
|
|
103
|
+
if rlab == "R1":
|
|
104
|
+
if has_free:
|
|
105
|
+
for suf in ("work", "free"):
|
|
106
|
+
stats["n_R1"+suf] = 0
|
|
107
|
+
else:
|
|
108
|
+
stats["n_R1"] = 0
|
|
109
|
+
if has_llw:
|
|
110
|
+
for suf in ("_llw=0", "_llw>0"):
|
|
111
|
+
stats["n_R1"+suf] = 0
|
|
112
|
+
stats["Cmpl"] = 0.
|
|
113
|
+
if twin_data:
|
|
114
|
+
Fc = numpy.sqrt(twin_data.i_calc_twin()) * hkldata.df.k_aniso.to_numpy()
|
|
115
|
+
elif has_ano:
|
|
116
|
+
fcpp = hkldata.df["FC''"].to_numpy()[:,None] * numpy.array([1j, -1j])
|
|
117
|
+
Fc = numpy.abs(hkldata.df.FC.to_numpy()[:,None] + fcpp) * hkldata.df.k_aniso.to_numpy()[:,None]
|
|
118
|
+
else:
|
|
119
|
+
Fc = numpy.abs(hkldata.df.FC.to_numpy() * hkldata.df.k_aniso.to_numpy())
|
|
120
|
+
if has_int:
|
|
121
|
+
if has_ano:
|
|
122
|
+
obs = hkldata.df[["I(+)", "I(-)"]].to_numpy()
|
|
123
|
+
sigma = hkldata.df[["SIGI(+)", "SIGI(-)"]].to_numpy()
|
|
124
|
+
else:
|
|
125
|
+
obs = hkldata.df.I.to_numpy()
|
|
126
|
+
sigma = hkldata.df.SIGI.to_numpy()
|
|
127
|
+
obs_sqrt = numpy.sqrt(numpy.maximum(0, obs))
|
|
128
|
+
obs_sqrt[obs / sigma < 2] = numpy.nan # SHELX equivalent
|
|
129
|
+
calc = Fc**2
|
|
130
|
+
calc_sqrt = Fc
|
|
131
|
+
else:
|
|
132
|
+
if has_ano:
|
|
133
|
+
obs = hkldata.df[["F(+)", "F(-)"]].to_numpy()
|
|
134
|
+
else:
|
|
135
|
+
obs = obs_sqrt = hkldata.df.FP.to_numpy()
|
|
136
|
+
calc = calc_sqrt = Fc
|
|
137
|
+
if "CC*" in stats: # swap the positions
|
|
138
|
+
stats.insert(len(stats.columns)-1, "CC*", stats.pop("CC*"))
|
|
139
|
+
if has_free:
|
|
140
|
+
for lab in (cclab, rlab):
|
|
141
|
+
for suf in ("work", "free"):
|
|
142
|
+
stats[lab+suf] = numpy.nan
|
|
143
|
+
else:
|
|
144
|
+
stats[cclab] = numpy.nan
|
|
145
|
+
stats[rlab] = numpy.nan
|
|
146
|
+
if has_llw:
|
|
147
|
+
for lab in (cclab, rlab):
|
|
148
|
+
for suf in ("_llw=0", "_llw>0"):
|
|
149
|
+
stats[lab+suf] = numpy.nan
|
|
150
|
+
|
|
151
|
+
centric_and_selections = hkldata.centric_and_selections["stat"]
|
|
152
|
+
sel_llw = [hkldata.df.llweight == 0, hkldata.df.llweight > 0]
|
|
153
|
+
for i_bin, idxes in hkldata.binned("stat"):
|
|
154
|
+
stats.loc[i_bin, "n_obs"] = numpy.sum(numpy.isfinite(obs[idxes]))
|
|
155
|
+
if has_ano:
|
|
156
|
+
stats.loc[i_bin, "n_all"] = sum((len(work) + len(test)) * (1 if c == 1 else 2)
|
|
157
|
+
for c, work, test in centric_and_selections[i_bin])
|
|
158
|
+
else:
|
|
159
|
+
stats.loc[i_bin, "n_all"] = len(idxes)
|
|
160
|
+
stats.loc[i_bin, "Cmpl"] = stats.loc[i_bin, "n_obs"] / stats.loc[i_bin, "n_all"] * 100.
|
|
161
|
+
stats.loc[i_bin, f"Mn({olab})"] = numpy.nanmean(obs[idxes])
|
|
162
|
+
stats.loc[i_bin, f"Mn({clab})"] = numpy.nanmean(calc[idxes])
|
|
163
|
+
if has_free:
|
|
164
|
+
for j, suf in ((1, "work"), (2, "free")):
|
|
165
|
+
idxes2 = numpy.concatenate([sel[j] for sel in centric_and_selections[i_bin]])
|
|
166
|
+
stats.loc[i_bin, "n_"+suf] = numpy.sum(numpy.isfinite(obs[idxes2]))
|
|
167
|
+
stats.loc[i_bin, cclab+suf] = utils.hkl.correlation(obs[idxes2], calc[idxes2])
|
|
168
|
+
stats.loc[i_bin, rlab+suf] = utils.hkl.r_factor(obs_sqrt[idxes2], calc_sqrt[idxes2])
|
|
169
|
+
if rlab == "R1":
|
|
170
|
+
stats.loc[i_bin, "n_"+rlab+suf] = numpy.sum(numpy.isfinite(obs_sqrt[idxes2]))
|
|
171
|
+
else:
|
|
172
|
+
stats.loc[i_bin, cclab] = utils.hkl.correlation(obs[idxes], calc[idxes])
|
|
173
|
+
stats.loc[i_bin, rlab] = utils.hkl.r_factor(obs_sqrt[idxes], calc_sqrt[idxes])
|
|
174
|
+
if rlab == "R1":
|
|
175
|
+
stats.loc[i_bin, "n_"+rlab] = numpy.sum(numpy.isfinite(obs_sqrt[idxes]))
|
|
176
|
+
if has_llw:
|
|
177
|
+
for j, suf in enumerate(("_llw=0", "_llw>0")):
|
|
178
|
+
sel = sel_llw[j][idxes]
|
|
179
|
+
stats.loc[i_bin, "n"+suf] = numpy.sum(numpy.isfinite(obs[idxes][sel]))
|
|
180
|
+
stats.loc[i_bin, cclab+suf] = utils.hkl.correlation(obs[idxes][sel], calc[idxes][sel])
|
|
181
|
+
stats.loc[i_bin, rlab+suf] = utils.hkl.r_factor(obs_sqrt[idxes][sel], calc_sqrt[idxes][sel])
|
|
182
|
+
if rlab == "R1":
|
|
183
|
+
stats.loc[i_bin, "n_"+rlab+suf] = numpy.sum(numpy.isfinite(obs_sqrt[idxes][sel]))
|
|
184
|
+
# Overall
|
|
185
|
+
ret = {}
|
|
186
|
+
if has_free:
|
|
187
|
+
for suf in ("work", "free"):
|
|
188
|
+
ret[cclab+suf+"avg"] = nanaverage(stats[cclab+suf], stats["n_"+suf])
|
|
189
|
+
for j, suf in ((1, "work"), (2, "free")):
|
|
190
|
+
idxes = numpy.concatenate([sel[j] for i_bin, _ in hkldata.binned("stat") for sel in centric_and_selections[i_bin]])
|
|
191
|
+
ret[rlab+suf] = utils.hkl.r_factor(obs_sqrt[idxes], calc_sqrt[idxes])
|
|
192
|
+
else:
|
|
193
|
+
ret[cclab+"avg"] = nanaverage(stats[cclab], stats["n_obs"])
|
|
194
|
+
ret[rlab] = utils.hkl.r_factor(obs_sqrt, calc_sqrt)
|
|
195
|
+
if has_llw:
|
|
196
|
+
for j, suf in enumerate(("_llw=0", "_llw>0")):
|
|
197
|
+
ret[cclab+suf+"_avg"] = nanaverage(stats[cclab+suf], stats["n"+suf])
|
|
198
|
+
sel = sel_llw[j]
|
|
199
|
+
ret[rlab+suf] = utils.hkl.r_factor(obs_sqrt[sel], calc_sqrt[sel])
|
|
200
|
+
|
|
201
|
+
return stats, ret
|
|
202
|
+
# calc_r_and_cc()
|
|
203
|
+
|
|
204
|
+
def subtract_common_aniso_from_model(sts):
|
|
205
|
+
adpdirs = utils.model.adp_constraints(sts[0].find_spacegroup().operations(), sts[0].cell, tr0=True)
|
|
206
|
+
aniso_all = [cra.atom.aniso.added_kI(-cra.atom.aniso.trace()/3).elements_pdb() for st in sts for cra in st[0].all() if cra.atom.aniso.nonzero()]
|
|
207
|
+
if not aniso_all: # no atoms with aniso ADP
|
|
208
|
+
return gemmi.SMat33f(0,0,0,0,0,0)
|
|
209
|
+
|
|
210
|
+
aniso_mean = numpy.mean(aniso_all, axis=0)
|
|
211
|
+
aniso_mean = adpdirs.dot(aniso_mean).dot(adpdirs)
|
|
212
|
+
|
|
213
|
+
if not numpy.any(aniso_mean):
|
|
214
|
+
return gemmi.SMat33f(0,0,0,0,0,0)
|
|
215
|
+
|
|
216
|
+
# correct atoms
|
|
217
|
+
smat_sub = gemmi.SMat33f(*aniso_mean)
|
|
218
|
+
for st in sts:
|
|
219
|
+
for cra in st[0].all():
|
|
220
|
+
if cra.atom.aniso.nonzero():
|
|
221
|
+
cra.atom.aniso -= smat_sub
|
|
222
|
+
|
|
223
|
+
b_aniso = smat_sub.scaled(utils.model.u_to_b)
|
|
224
|
+
logger.writeln(f"Subtracting common anisotropic component from model: B= {b_aniso}")
|
|
225
|
+
return b_aniso
|
|
226
|
+
# subtract_common_aniso_from_model()
|
|
227
|
+
|
|
228
|
+
class VarTrans:
|
|
229
|
+
def __init__(self, D_trans, S_trans):
|
|
230
|
+
# splus (softplus) appears to be better than exp
|
|
231
|
+
# exp sometimes results in too large parameter value
|
|
232
|
+
trans_funcs = {"exp": (numpy.exp, # D = f(x)
|
|
233
|
+
numpy.exp, # dD/dx
|
|
234
|
+
numpy.log), # x = f^-1(D)
|
|
235
|
+
"splus": (lambda x: numpy.logaddexp(0, x),
|
|
236
|
+
scipy.special.expit, # lambda x: 1. / (1. + numpy.exp(-x))
|
|
237
|
+
lambda x: x + numpy.log(-numpy.expm1(-x))),
|
|
238
|
+
None: (lambda x: x,
|
|
239
|
+
lambda x: 1,
|
|
240
|
+
lambda x: x)}
|
|
241
|
+
|
|
242
|
+
self.D, self.D_deriv, self.D_inv = trans_funcs[D_trans]
|
|
243
|
+
self.S, self.S_deriv, self.S_inv = trans_funcs[S_trans]
|
|
244
|
+
# class VarTrans
|
|
245
|
+
|
|
246
|
+
class LsqScale:
|
|
247
|
+
# parameter x = [k_overall, adp_pars, k_sol, B_sol]
|
|
248
|
+
def __init__(self, k_as_exp=False, func_type="log_cosh"):
|
|
249
|
+
assert func_type in ("sq", "log_cosh")
|
|
250
|
+
self.k_trans = lambda x: numpy.exp(x) if k_as_exp else x
|
|
251
|
+
self.k_trans_der = lambda x: numpy.exp(x) if k_as_exp else 1
|
|
252
|
+
self.k_trans_inv = lambda x: numpy.log(x) if k_as_exp else x
|
|
253
|
+
self.func_type = func_type
|
|
254
|
+
self.reset()
|
|
255
|
+
|
|
256
|
+
def reset(self):
|
|
257
|
+
self.k_sol = 0.35 # same default as gemmi/scaling.hpp # refmac seems to use 0.33 and 100? SCALE_LS_PART
|
|
258
|
+
self.b_sol = 46.
|
|
259
|
+
self.k_overall = None
|
|
260
|
+
self.b_iso = None
|
|
261
|
+
self.b_aniso = None
|
|
262
|
+
self.stats = {}
|
|
263
|
+
|
|
264
|
+
def set_data(self, hkldata, fc_list, use_int=False, sigma_cutoff=None, twin_data=None):
|
|
265
|
+
assert 0 < len(fc_list) < 3
|
|
266
|
+
self.use_int = use_int
|
|
267
|
+
if sigma_cutoff is not None:
|
|
268
|
+
if use_int:
|
|
269
|
+
self.sel = hkldata.df.I / hkldata.df.SIGI > sigma_cutoff
|
|
270
|
+
self.labcut = "(I/SIGI>{})".format(sigma_cutoff)
|
|
271
|
+
else:
|
|
272
|
+
self.sel = hkldata.df.FP / hkldata.df.SIGFP > sigma_cutoff
|
|
273
|
+
self.labcut = "(F/SIGF>{})".format(sigma_cutoff)
|
|
274
|
+
else:
|
|
275
|
+
self.sel = hkldata.df.index
|
|
276
|
+
self.labcut = ""
|
|
277
|
+
self.obs = hkldata.df["I" if use_int else "FP"].to_numpy(copy=True)
|
|
278
|
+
self.obs[~self.sel] = numpy.nan
|
|
279
|
+
self.calc = [x for x in fc_list]
|
|
280
|
+
self.s2mat = hkldata.ssq_mat()
|
|
281
|
+
self.s2 = 1. / hkldata.d_spacings().to_numpy()**2
|
|
282
|
+
self.adpdirs = utils.model.adp_constraints(hkldata.sg.operations(), hkldata.cell, tr0=False)
|
|
283
|
+
self.twin_data = twin_data
|
|
284
|
+
if use_int:
|
|
285
|
+
self.sqrt_obs = numpy.sqrt(self.obs)
|
|
286
|
+
|
|
287
|
+
def get_solvent_scale(self, k_sol, b_sol, s2=None):
|
|
288
|
+
if s2 is None: s2 = self.s2
|
|
289
|
+
return k_sol * numpy.exp(-b_sol * s2 / 4)
|
|
290
|
+
|
|
291
|
+
def fc_and_mask_grad(self, x):
|
|
292
|
+
fc0 = self.calc[0]
|
|
293
|
+
if len(self.calc) == 2:
|
|
294
|
+
if self.twin_data:
|
|
295
|
+
r = self.twin_data.scaling_fc_and_mask_grad(self.calc[1], x[-2], x[-1])
|
|
296
|
+
return r[:,0], r[:,1], r[:,2]
|
|
297
|
+
else:
|
|
298
|
+
fmask = self.calc[1]
|
|
299
|
+
temp_sol = numpy.exp(-x[-1] * self.s2 / 4)
|
|
300
|
+
fbulk = x[-2] * temp_sol * fmask
|
|
301
|
+
fc = fc0 + fbulk
|
|
302
|
+
re_fmask_fcconj = (fmask * fc.conj()).real
|
|
303
|
+
fc_abs = numpy.abs(fc)
|
|
304
|
+
tmp = temp_sol / fc_abs * re_fmask_fcconj
|
|
305
|
+
return fc_abs, tmp, -tmp * x[-2] * self.s2 / 4
|
|
306
|
+
else:
|
|
307
|
+
if self.twin_data:
|
|
308
|
+
return numpy.sqrt(self.twin_data.i_calc_twin()), None, None
|
|
309
|
+
else:
|
|
310
|
+
return numpy.abs(fc0), None, None
|
|
311
|
+
|
|
312
|
+
def scaled_fc(self, x):
|
|
313
|
+
fc = self.fc_and_mask_grad(x)[0]
|
|
314
|
+
nadp = self.adpdirs.shape[0]
|
|
315
|
+
B = numpy.dot(x[1:nadp+1], self.adpdirs)
|
|
316
|
+
kani = numpy.exp(numpy.dot(-B, self.s2mat))
|
|
317
|
+
return self.k_trans(x[0]) * kani * fc
|
|
318
|
+
|
|
319
|
+
def target(self, x):
|
|
320
|
+
y = self.scaled_fc(x)
|
|
321
|
+
if self.use_int:
|
|
322
|
+
diff = self.sqrt_obs - y
|
|
323
|
+
#y2 = y**2
|
|
324
|
+
#diff = self.obs - y2
|
|
325
|
+
else:
|
|
326
|
+
diff = self.obs - y
|
|
327
|
+
|
|
328
|
+
if self.func_type == "sq":
|
|
329
|
+
return numpy.nansum(diff**2)
|
|
330
|
+
elif self.func_type == "log_cosh":
|
|
331
|
+
return numpy.nansum(gemmi.log_cosh(diff))
|
|
332
|
+
else:
|
|
333
|
+
raise RuntimeError("bad func_type")
|
|
334
|
+
|
|
335
|
+
def grad(self, x):
|
|
336
|
+
g = numpy.zeros_like(x)
|
|
337
|
+
fc_abs, der_ksol, der_bsol = self.fc_and_mask_grad(x)
|
|
338
|
+
nadp = self.adpdirs.shape[0]
|
|
339
|
+
B = numpy.dot(x[1:nadp+1], self.adpdirs)
|
|
340
|
+
kani = numpy.exp(numpy.dot(-B, self.s2mat))
|
|
341
|
+
k = self.k_trans(x[0])
|
|
342
|
+
y = k * kani * fc_abs
|
|
343
|
+
if self.use_int:
|
|
344
|
+
diff = self.sqrt_obs - y
|
|
345
|
+
diff_der = -1
|
|
346
|
+
#diff = self.obs - y**2
|
|
347
|
+
#diff_der = -2 * y
|
|
348
|
+
else:
|
|
349
|
+
diff = self.obs - y
|
|
350
|
+
diff_der = -1
|
|
351
|
+
if self.func_type == "sq":
|
|
352
|
+
dfdy = 2 * diff * diff_der
|
|
353
|
+
elif self.func_type == "log_cosh":
|
|
354
|
+
dfdy = numpy.tanh(diff) * diff_der
|
|
355
|
+
else:
|
|
356
|
+
raise RuntimeError("bad func_type")
|
|
357
|
+
|
|
358
|
+
dfdb = numpy.nansum(-self.s2mat * k * fc_abs * kani * dfdy, axis=1)
|
|
359
|
+
g[0] = numpy.nansum(kani * fc_abs * dfdy * self.k_trans_der(x[0]))
|
|
360
|
+
g[1:nadp+1] = numpy.dot(dfdb, self.adpdirs.T)
|
|
361
|
+
if len(self.calc) == 2:
|
|
362
|
+
g[-2] = numpy.nansum(k * kani * der_ksol * dfdy)
|
|
363
|
+
g[-1] = numpy.nansum(k * kani * der_bsol * dfdy)
|
|
364
|
+
|
|
365
|
+
return g
|
|
366
|
+
|
|
367
|
+
def calc_shift(self, x):
|
|
368
|
+
# TODO: sort out code duplication, if we use this.
|
|
369
|
+
g = numpy.zeros((len(self.obs), len(x)))
|
|
370
|
+
H = numpy.zeros((len(x), len(x)))
|
|
371
|
+
fc_abs, der_ksol, der_bsol = self.fc_and_mask_grad(x)
|
|
372
|
+
nadp = self.adpdirs.shape[0]
|
|
373
|
+
B = numpy.dot(x[1:nadp+1], self.adpdirs)
|
|
374
|
+
kani = numpy.exp(numpy.dot(-B, self.s2mat))
|
|
375
|
+
k = self.k_trans(x[0])
|
|
376
|
+
y = k * kani * fc_abs
|
|
377
|
+
if self.use_int:
|
|
378
|
+
diff = self.sqrt_obs - y
|
|
379
|
+
diff_der = -1
|
|
380
|
+
diff_der2 = 0
|
|
381
|
+
else:
|
|
382
|
+
diff = self.obs - y
|
|
383
|
+
diff_der = -1.
|
|
384
|
+
diff_der2 = 0.
|
|
385
|
+
|
|
386
|
+
if self.func_type == "sq":
|
|
387
|
+
dfdy = 2 * diff * diff_der
|
|
388
|
+
dfdy2 = 2 * diff_der**2 + 2 * diff * diff_der2
|
|
389
|
+
elif self.func_type == "log_cosh":
|
|
390
|
+
dfdy = numpy.tanh(diff) * diff_der
|
|
391
|
+
#dfdy2 = 1 /numpy.cosh(diff)**2 * diff_der**2 + numpy.tanh(diff) * diff_der2 # problematic with large diff
|
|
392
|
+
#dfdy2 = numpy.where(diff==0, 1., numpy.abs(numpy.tanh(diff)) / gemmi.log_cosh(diff)) * diff_der**2 + numpy.tanh(diff) * diff_der2
|
|
393
|
+
dfdy2 = numpy.where(diff==0, 1., numpy.tanh(diff) / diff) * diff_der**2 + numpy.tanh(diff) * diff_der2
|
|
394
|
+
else:
|
|
395
|
+
raise RuntimeError("bad func_type")
|
|
396
|
+
|
|
397
|
+
dfdb = -self.s2mat * k * fc_abs * kani
|
|
398
|
+
g[:,0] = kani * fc_abs * self.k_trans_der(x[0])
|
|
399
|
+
g[:,1:nadp+1] = numpy.dot(dfdb.T, self.adpdirs.T)
|
|
400
|
+
if len(self.calc) == 2:
|
|
401
|
+
g[:,-2] = k * kani * der_ksol
|
|
402
|
+
g[:,-1] = k * kani * der_bsol
|
|
403
|
+
|
|
404
|
+
# no numpy.nandot..
|
|
405
|
+
g, dfdy, dfdy2 = g[self.sel, :], dfdy[self.sel], dfdy2[self.sel]
|
|
406
|
+
H = numpy.dot(g.T, g * dfdy2[:,None])
|
|
407
|
+
g = numpy.sum(dfdy[:,None] * g, axis=0)
|
|
408
|
+
dx = -numpy.dot(g, numpy.linalg.pinv(H))
|
|
409
|
+
return dx
|
|
410
|
+
|
|
411
|
+
def initial_kb(self):
|
|
412
|
+
fc_abs = self.fc_and_mask_grad([self.k_sol, self.b_sol])[0]
|
|
413
|
+
sel = self.obs > 0 # exclude nan as well
|
|
414
|
+
f1p, f2p, s2p = self.obs[sel], fc_abs[sel], self.s2[sel]
|
|
415
|
+
if self.use_int: f2p *= f2p
|
|
416
|
+
tmp = numpy.log(f2p) - numpy.log(f1p)
|
|
417
|
+
# g = [dT/dk, dT/db]
|
|
418
|
+
g = numpy.array([2 * numpy.sum(tmp), -numpy.sum(tmp*s2p)/2])
|
|
419
|
+
H = numpy.zeros((2,2))
|
|
420
|
+
H[0,0] = 2*len(f1p)
|
|
421
|
+
H[1,1] = numpy.sum(s2p**2/8)
|
|
422
|
+
H[0,1] = H[1,0] = -numpy.sum(s2p)/2
|
|
423
|
+
x = -numpy.dot(numpy.linalg.inv(H), g)
|
|
424
|
+
if self.use_int: x /= 2
|
|
425
|
+
k = numpy.exp(x[0])
|
|
426
|
+
b = x[1]
|
|
427
|
+
logger.writeln(" initial k,b = {:.2e} {:.2e}".format(k, b))
|
|
428
|
+
logger.writeln(" R{} = {:.4f}".format(self.labcut, utils.hkl.r_factor(f1p, f2p * k * numpy.exp(-b*self.s2[sel]/4))))
|
|
429
|
+
return k, b
|
|
430
|
+
|
|
431
|
+
def scale(self):
|
|
432
|
+
use_sol = len(self.calc) == 2
|
|
433
|
+
msg = "Scaling Fc to {} {} bulk solvent contribution".format("Io" if self.use_int else "Fo",
|
|
434
|
+
"with" if use_sol else "without")
|
|
435
|
+
logger.writeln(msg)
|
|
436
|
+
if self.k_overall is None or self.b_iso is None:
|
|
437
|
+
k, b = self.initial_kb()
|
|
438
|
+
else:
|
|
439
|
+
k, b = self.k_overall, self.b_iso
|
|
440
|
+
if self.b_aniso is None:
|
|
441
|
+
self.b_aniso = gemmi.SMat33d(b,b,b,0,0,0)
|
|
442
|
+
x0 = [self.k_trans_inv(k)]
|
|
443
|
+
bounds = [(0, None)]
|
|
444
|
+
x0.extend(numpy.dot(self.b_aniso.elements_pdb(), self.adpdirs.T))
|
|
445
|
+
bounds.extend([(None, None)]*(len(x0)-1))
|
|
446
|
+
if use_sol:
|
|
447
|
+
x0.extend([self.k_sol, self.b_sol])
|
|
448
|
+
bounds.extend([(1e-4, None), (10., 400.)])
|
|
449
|
+
if 0:
|
|
450
|
+
f0 = self.target(x0)
|
|
451
|
+
ader = self.grad(x0)
|
|
452
|
+
e = 1e-4
|
|
453
|
+
nder = []
|
|
454
|
+
for i in range(len(x0)):
|
|
455
|
+
x = numpy.copy(x0)
|
|
456
|
+
x[i] += e
|
|
457
|
+
f1 = self.target(x)
|
|
458
|
+
nder.append((f1 - f0) / e)
|
|
459
|
+
print("ADER NDER RATIO")
|
|
460
|
+
print(ader)
|
|
461
|
+
print(nder)
|
|
462
|
+
print(ader / nder)
|
|
463
|
+
quit()
|
|
464
|
+
|
|
465
|
+
t0 = time.time()
|
|
466
|
+
if 1:
|
|
467
|
+
x = x0
|
|
468
|
+
for i in range(40):
|
|
469
|
+
x_ini = x.copy()
|
|
470
|
+
f0 = f1 = self.target(x)
|
|
471
|
+
dx = self.calc_shift(x)
|
|
472
|
+
if numpy.max(numpy.abs(dx)) < 1e-6:
|
|
473
|
+
break
|
|
474
|
+
for s in (1, 0.5, 0.25):
|
|
475
|
+
if 0:
|
|
476
|
+
with open("debug.dat", "w") as ofs:
|
|
477
|
+
for s in numpy.linspace(-2, 2, 100):
|
|
478
|
+
f1 = self.target(x+dx * s)
|
|
479
|
+
#print(dx, f0, f1, f0 - f1)
|
|
480
|
+
ofs.write("{:4e} {:4e}\n".format(s, f1))
|
|
481
|
+
shift = dx * s
|
|
482
|
+
x = x_ini + shift
|
|
483
|
+
if x[0] < 0: x[0] = x0[0]
|
|
484
|
+
if use_sol:
|
|
485
|
+
if x[-1] < 10: x[-1] = 10
|
|
486
|
+
elif x[-1] > 400: x[-1] = 400
|
|
487
|
+
if x[-2] < 1e-4: x[-2] = 1e-4
|
|
488
|
+
f1 = self.target(x)
|
|
489
|
+
if f1 < f0: break
|
|
490
|
+
#logger.writeln("cycle {} {} {} {} {} {}".format(i, f0, f1, s, shift, (f0 - f1) / f0))
|
|
491
|
+
if 0 < (f0 - f1) / f0 < 1e-6:
|
|
492
|
+
break
|
|
493
|
+
res_x = x
|
|
494
|
+
self.stats["fun"] = f1
|
|
495
|
+
self.stats["x"] = x
|
|
496
|
+
else:
|
|
497
|
+
res = scipy.optimize.minimize(fun=self.target, x0=x0, jac=self.grad, bounds=bounds)
|
|
498
|
+
#logger.writeln(str(res))
|
|
499
|
+
logger.writeln(" finished in {} iterations ({} evaluations)".format(res.nit, res.nfev))
|
|
500
|
+
res_x = res.x
|
|
501
|
+
self.stats["fun"] = res.fun
|
|
502
|
+
self.stats["x"] = res.x
|
|
503
|
+
logger.writeln(" time: {:.3f} sec".format(time.time() - t0))
|
|
504
|
+
self.k_overall = self.k_trans(res_x[0])
|
|
505
|
+
nadp = self.adpdirs.shape[0]
|
|
506
|
+
b_overall = gemmi.SMat33d(*numpy.dot(res_x[1:nadp+1], self.adpdirs))
|
|
507
|
+
self.b_iso = b_overall.trace() / 3
|
|
508
|
+
self.b_aniso = b_overall.added_kI(-self.b_iso) # subtract isotropic contribution
|
|
509
|
+
|
|
510
|
+
logger.writeln(" k_ov= {:.2e} B_iso= {:.2e} B_aniso= {}".format(self.k_overall, self.b_iso, self.b_aniso))
|
|
511
|
+
if use_sol:
|
|
512
|
+
self.k_sol = res_x[-2]
|
|
513
|
+
self.b_sol = res_x[-1]
|
|
514
|
+
logger.writeln(" k_sol= {:.2e} B_sol= {:.2e}".format(self.k_sol, self.b_sol))
|
|
515
|
+
calc = self.scaled_fc(res_x)
|
|
516
|
+
if self.use_int: calc *= calc
|
|
517
|
+
self.stats["cc"] = utils.hkl.correlation(self.obs, calc)
|
|
518
|
+
self.stats["r"] = utils.hkl.r_factor(self.obs, calc)
|
|
519
|
+
logger.writeln(" CC{} = {:.4f}".format(self.labcut, self.stats["cc"]))
|
|
520
|
+
logger.writeln(" R{} = {:.4f}".format(self.labcut, self.stats["r"]))
|
|
521
|
+
# class LsqScale
|
|
522
|
+
|
|
523
|
+
def calc_abs_DFc(Ds, Fcs):
|
|
524
|
+
DFc = sum(Ds[i] * Fcs[i] for i in range(len(Ds)))
|
|
525
|
+
return numpy.abs(DFc)
|
|
526
|
+
# calc_abs_DFc()
|
|
527
|
+
|
|
528
|
+
#import line_profiler
|
|
529
|
+
#profile = line_profiler.LineProfiler()
|
|
530
|
+
#import atexit
|
|
531
|
+
#atexit.register(profile.print_stats)
|
|
532
|
+
#@profile
|
|
533
|
+
def mlf(df, fc_labs, Ds, S, k_ani, idxes):
|
|
534
|
+
Fcs = numpy.vstack([df[lab].to_numpy()[idxes] for lab in fc_labs]).T
|
|
535
|
+
DFc = (Ds * Fcs).sum(axis=1)
|
|
536
|
+
ll = numpy.nansum(ext.ll_amp(df.FP.to_numpy()[idxes], df.SIGFP.to_numpy()[idxes],
|
|
537
|
+
k_ani[idxes], S * df.epsilon.to_numpy()[idxes],
|
|
538
|
+
numpy.abs(DFc), df.centric.to_numpy()[idxes]+1,
|
|
539
|
+
df.llweight.to_numpy()[idxes]))
|
|
540
|
+
return numpy.nansum(ll)
|
|
541
|
+
# mlf()
|
|
542
|
+
|
|
543
|
+
#@profile
|
|
544
|
+
def deriv_mlf_wrt_D_S(df, fc_labs, Ds, S, k_ani, idxes):
|
|
545
|
+
Fcs = [df[lab].to_numpy()[idxes] for lab in fc_labs]
|
|
546
|
+
r = ext.ll_amp_der1_DS(df.FP.to_numpy()[idxes], df.SIGFP.to_numpy()[idxes], k_ani[idxes], S,
|
|
547
|
+
numpy.vstack(Fcs).T, Ds,
|
|
548
|
+
df.centric.to_numpy()[idxes]+1, df.epsilon.to_numpy()[idxes],
|
|
549
|
+
df.llweight.to_numpy()[idxes])
|
|
550
|
+
g = numpy.zeros(len(fc_labs)+1)
|
|
551
|
+
g[:len(fc_labs)] = numpy.nansum(r[:,:len(fc_labs)], axis=0) # D
|
|
552
|
+
g[-1] = numpy.nansum(r[:,-1]) # S
|
|
553
|
+
return g
|
|
554
|
+
# deriv_mlf_wrt_D_S()
|
|
555
|
+
|
|
556
|
+
#@profile
|
|
557
|
+
def mlf_shift_S(df, fc_labs, Ds, S, k_ani, idxes):
|
|
558
|
+
Fcs = [df[lab].to_numpy()[idxes] for lab in fc_labs]
|
|
559
|
+
r = ext.ll_amp_der1_DS(df.FP.to_numpy()[idxes], df.SIGFP.to_numpy()[idxes], k_ani[idxes], S,
|
|
560
|
+
numpy.vstack(Fcs).T, Ds,
|
|
561
|
+
df.centric.to_numpy()[idxes]+1, df.epsilon.to_numpy()[idxes],
|
|
562
|
+
df.llweight.to_numpy()[idxes])
|
|
563
|
+
g = numpy.nansum(r[:,-1])
|
|
564
|
+
H = numpy.nansum(r[:,-1]**2) # approximating expectation value of second derivative
|
|
565
|
+
return -g / H
|
|
566
|
+
# mlf_shift_S()
|
|
567
|
+
|
|
568
|
+
def mli(df, fc_labs, Ds, S, k_ani, idxes):
|
|
569
|
+
Fcs = numpy.vstack([df[lab].to_numpy()[idxes] for lab in fc_labs]).T
|
|
570
|
+
DFc = (Ds * Fcs).sum(axis=1)
|
|
571
|
+
ll = integr.ll_int(df.I.to_numpy()[idxes], df.SIGI.to_numpy()[idxes],
|
|
572
|
+
k_ani[idxes], S * df.epsilon.to_numpy()[idxes],
|
|
573
|
+
numpy.abs(DFc), df.centric.to_numpy()[idxes]+1,
|
|
574
|
+
df.llweight.to_numpy()[idxes])
|
|
575
|
+
return numpy.nansum(ll)
|
|
576
|
+
# mli()
|
|
577
|
+
|
|
578
|
+
def deriv_mli_wrt_D_S(df, fc_labs, Ds, S, k_ani, idxes):
|
|
579
|
+
Fcs = numpy.vstack([df[lab].to_numpy()[idxes] for lab in fc_labs]).T
|
|
580
|
+
r = integr.ll_int_der1_DS(df.I.to_numpy()[idxes], df.SIGI.to_numpy()[idxes], k_ani[idxes], S,
|
|
581
|
+
Fcs, Ds,
|
|
582
|
+
df.centric.to_numpy()[idxes]+1, df.epsilon.to_numpy()[idxes],
|
|
583
|
+
df.llweight.to_numpy()[idxes])
|
|
584
|
+
g = numpy.zeros(len(fc_labs)+1)
|
|
585
|
+
g[:len(fc_labs)] = numpy.nansum(r[:,:len(fc_labs)], axis=0) # D
|
|
586
|
+
g[-1] = numpy.nansum(r[:,-1]) # S
|
|
587
|
+
return g
|
|
588
|
+
# deriv_mli_wrt_D_S()
|
|
589
|
+
|
|
590
|
+
def mli_shift_D(df, fc_labs, Ds, S, k_ani, idxes):
|
|
591
|
+
Fcs = numpy.vstack([df[lab].to_numpy()[idxes] for lab in fc_labs]).T
|
|
592
|
+
r = integr.ll_int_der1_DS(df.I.to_numpy()[idxes], df.SIGI.to_numpy()[idxes], k_ani[idxes], S,
|
|
593
|
+
Fcs, Ds,
|
|
594
|
+
df.centric.to_numpy()[idxes]+1, df.epsilon.to_numpy()[idxes],
|
|
595
|
+
df.llweight.to_numpy()[idxes])[:,:len(fc_labs)]
|
|
596
|
+
g = numpy.nansum(r, axis=0)# * trans.D_deriv(x[:len(fc_labs)]) # D
|
|
597
|
+
#tmp = numpy.hstack([r[:,:len(fc_labs)] #* trans.D_deriv(x[:len(fc_labs)]),
|
|
598
|
+
# r[:,-1,None] * trans.S_deriv(x[-1])])
|
|
599
|
+
H = numpy.nansum(numpy.matmul(r[:,:,None], r[:,None]), axis=0)
|
|
600
|
+
return -numpy.dot(g, numpy.linalg.pinv(H))
|
|
601
|
+
# mli_shift_D()
|
|
602
|
+
|
|
603
|
+
def mli_shift_S(df, fc_labs, Ds, S, k_ani, idxes):
|
|
604
|
+
Fcs = numpy.vstack([df[lab].to_numpy()[idxes] for lab in fc_labs]).T
|
|
605
|
+
r = integr.ll_int_der1_DS(df.I.to_numpy()[idxes], df.SIGI.to_numpy()[idxes], k_ani[idxes], S,
|
|
606
|
+
Fcs, Ds,
|
|
607
|
+
df.centric.to_numpy()[idxes]+1, df.epsilon.to_numpy()[idxes],
|
|
608
|
+
df.llweight.to_numpy()[idxes])
|
|
609
|
+
g = numpy.nansum(r[:,-1])
|
|
610
|
+
H = numpy.nansum(r[:,-1]**2) # approximating expectation value of second derivative
|
|
611
|
+
return -g / H
|
|
612
|
+
# mli_shift_S()
|
|
613
|
+
|
|
614
|
+
#debug_twin_count = 0
|
|
615
|
+
|
|
616
|
+
def mltwin_est_ftrue(twin_data, df, k_ani, idxes):
|
|
617
|
+
kani2_inv = 1 / k_ani**2
|
|
618
|
+
i_sigi = numpy.empty((2, len(df.index)))
|
|
619
|
+
i_sigi[:] = numpy.nan
|
|
620
|
+
i_sigi[0, idxes] = (df.I.to_numpy() * kani2_inv)[idxes]
|
|
621
|
+
i_sigi[1, idxes] = (df.SIGI.to_numpy() * kani2_inv)[idxes]
|
|
622
|
+
#global debug_twin_count
|
|
623
|
+
#debug_twin_count += 1
|
|
624
|
+
# sed -i "s/,]/]/; s/nan/NaN/g" *json
|
|
625
|
+
#twin_data.debug_open(f"twin_debug_{debug_twin_count}.json")
|
|
626
|
+
twin_data.est_f_true(i_sigi[0,:], i_sigi[1,:], 100)
|
|
627
|
+
#twin_data.debug_close()
|
|
628
|
+
return i_sigi[0,:], i_sigi[1,:]
|
|
629
|
+
# mltwin_est_ftrue()
|
|
630
|
+
|
|
631
|
+
def mltwin(df, twin_data, Ds, S, k_ani, idxes, i_bin):
|
|
632
|
+
twin_data.ml_sigma[i_bin] = S
|
|
633
|
+
twin_data.ml_scale[i_bin, :] = Ds
|
|
634
|
+
Io, sigIo = mltwin_est_ftrue(twin_data, df, k_ani, idxes)
|
|
635
|
+
ret = twin_data.ll(Io, sigIo)
|
|
636
|
+
#print("-LL=", ret)
|
|
637
|
+
return ret
|
|
638
|
+
# mltwin()
|
|
639
|
+
|
|
640
|
+
def deriv_mltwin_wrt_D_S(df, twin_data, Ds, S, k_ani, idxes, i_bin):
|
|
641
|
+
twin_data.ml_sigma[i_bin] = S
|
|
642
|
+
twin_data.ml_scale[i_bin, :] = Ds
|
|
643
|
+
Io, sigIo = mltwin_est_ftrue(twin_data, df, k_ani, idxes)
|
|
644
|
+
r = twin_data.ll_der_D_S(Io, sigIo)
|
|
645
|
+
g = numpy.zeros(r.shape[1])
|
|
646
|
+
g[:-1] = numpy.nansum(r[:,:-1], axis=0) # D
|
|
647
|
+
g[-1] = numpy.nansum(r[:,-1]) # S
|
|
648
|
+
return g
|
|
649
|
+
# deriv_mlf_wrt_D_S()
|
|
650
|
+
|
|
651
|
+
def mltwin_shift_S(df, twin_data, Ds, S, k_ani, idxes, i_bin):
|
|
652
|
+
twin_data.ml_sigma[i_bin] = S
|
|
653
|
+
twin_data.ml_scale[i_bin, :] = Ds
|
|
654
|
+
Io, sigIo = mltwin_est_ftrue(twin_data, df, k_ani, idxes)
|
|
655
|
+
r = twin_data.ll_der_D_S(Io, sigIo)
|
|
656
|
+
g = numpy.nansum(r[:,-1])
|
|
657
|
+
H = numpy.nansum(r[:,-1]**2) # approximating expectation value of second derivative
|
|
658
|
+
return -g / H
|
|
659
|
+
# mlf_shift_S()
|
|
660
|
+
|
|
661
|
+
def determine_mlf_params_from_cc(hkldata, fc_labs, D_labs, use="all", smoothing="gauss"):
|
|
662
|
+
# theorhetical values
|
|
663
|
+
cc_a = lambda cc: (numpy.pi/4*(1-cc**2)**2 * scipy.special.hyp2f1(3/2, 3/2, 1, cc**2) - numpy.pi/4) / (1-numpy.pi/4)
|
|
664
|
+
cc_c = lambda cc: 2/(numpy.pi-2) * (cc**2*numpy.sqrt(1-cc**2) + cc * numpy.arctan(cc/numpy.sqrt(1-cc**2)) + (1-cc**2)**(3/2)-1)
|
|
665
|
+
table_fsc = numpy.arange(0, 1, 1e-3)
|
|
666
|
+
table_cc = [cc_a(table_fsc), cc_c(table_fsc)]
|
|
667
|
+
|
|
668
|
+
for lab in D_labs: hkldata.binned_df["ml"][lab] = 1.
|
|
669
|
+
hkldata.binned_df["ml"]["S"] = 1.
|
|
670
|
+
|
|
671
|
+
stats = hkldata.binned_df["ml"][["d_max", "d_min"]].copy()
|
|
672
|
+
for i, labi in enumerate(fc_labs):
|
|
673
|
+
stats["CC(FP,{})".format(labi)] = numpy.nan
|
|
674
|
+
for i, labi in enumerate(fc_labs):
|
|
675
|
+
for j in range(i+1, len(fc_labs)):
|
|
676
|
+
labj = fc_labs[j]
|
|
677
|
+
stats["CC({},{})".format(labi, labj)] = numpy.nan
|
|
678
|
+
|
|
679
|
+
centric_and_selections = hkldata.centric_and_selections["ml"]
|
|
680
|
+
# sqrt of eps * c; c = 1 for acentrics and 2 for centrics
|
|
681
|
+
inv_sqrt_c_eps = 1. / numpy.sqrt(hkldata.df.epsilon.to_numpy() * (hkldata.df.centric.to_numpy() + 1))
|
|
682
|
+
for i_bin, _ in hkldata.binned("ml"):
|
|
683
|
+
# assume they are all acentrics.. only correct by c
|
|
684
|
+
if use == "all":
|
|
685
|
+
cidxes = numpy.concatenate([sel[i] for sel in centric_and_selections[i_bin] for i in (1,2)])
|
|
686
|
+
else:
|
|
687
|
+
i = 1 if use == "work" else 2
|
|
688
|
+
cidxes = numpy.concatenate([sel[i] for sel in centric_and_selections[i_bin]])
|
|
689
|
+
valid_sel = numpy.isfinite(hkldata.df.FP.to_numpy()[cidxes])
|
|
690
|
+
cidxes = cidxes[valid_sel]
|
|
691
|
+
factor = inv_sqrt_c_eps[cidxes]
|
|
692
|
+
k_ani = hkldata.df.k_aniso.to_numpy()[cidxes]
|
|
693
|
+
Fo = hkldata.df.FP.to_numpy()[cidxes] * factor / k_ani
|
|
694
|
+
mean_Fo2 = numpy.mean(Fo**2)
|
|
695
|
+
SigFo = hkldata.df.SIGFP.to_numpy()[cidxes] / k_ani
|
|
696
|
+
Fcs = [hkldata.df[lab].to_numpy()[cidxes] * factor for lab in fc_labs]
|
|
697
|
+
mean_Fk2 = numpy.array([numpy.mean(numpy.abs(fk)**2) for fk in Fcs])
|
|
698
|
+
|
|
699
|
+
# estimate D
|
|
700
|
+
cc_fo_fj = [numpy.corrcoef(numpy.abs(fj), Fo)[1,0] for fj in Fcs]
|
|
701
|
+
for i in range(len(fc_labs)): stats.loc[i_bin, "CC(FP,{})".format(fc_labs[i])] = cc_fo_fj[i]
|
|
702
|
+
mat = [[numpy.sqrt(numpy.mean(numpy.abs(fk)**2)/mean_Fo2) * numpy.real(numpy.corrcoef(fk, fj)[1,0])
|
|
703
|
+
for fk in Fcs]
|
|
704
|
+
for fj in Fcs]
|
|
705
|
+
A = [[numpy.sqrt(numpy.mean(numpy.abs(fk)**2) * numpy.mean(numpy.abs(fj)**2))/mean_Fo2 * numpy.real(numpy.corrcoef(fk, fj)[1,0])
|
|
706
|
+
for fk in Fcs]
|
|
707
|
+
for fj in Fcs]
|
|
708
|
+
A = numpy.array([[numpy.real(numpy.corrcoef(fk, fj)[1,0]) for fk in Fcs] for fj in Fcs])
|
|
709
|
+
v = numpy.interp(cc_fo_fj, table_cc[0], table_fsc)
|
|
710
|
+
|
|
711
|
+
for i in range(len(fc_labs)):
|
|
712
|
+
labi = fc_labs[i]
|
|
713
|
+
for j in range(i+1, len(fc_labs)):
|
|
714
|
+
labj = fc_labs[j]
|
|
715
|
+
stats.loc[i_bin, "CC({},{})".format(labi, labj)] = numpy.real(numpy.corrcoef(Fcs[i], Fcs[j])[1,0])
|
|
716
|
+
|
|
717
|
+
# test all signs, fixing first Fc positive.
|
|
718
|
+
cc_max = -2
|
|
719
|
+
for v_test in itertools.product(*((x, -x) for x in v[1:])):
|
|
720
|
+
v_test = numpy.array((v[0],)+v_test)
|
|
721
|
+
Dj_test = numpy.dot(numpy.linalg.pinv(A), v_test) * numpy.sqrt(mean_Fo2 / mean_Fk2)
|
|
722
|
+
DFc_test = calc_abs_DFc(Dj_test, Fcs)
|
|
723
|
+
cc_test = numpy.corrcoef(Fo, numpy.abs(DFc_test))[1,0]
|
|
724
|
+
if cc_test > cc_max:
|
|
725
|
+
cc_max = cc_test
|
|
726
|
+
v_max = v_test
|
|
727
|
+
DFc = DFc_test
|
|
728
|
+
Dj = Dj_test
|
|
729
|
+
|
|
730
|
+
for lab, D in zip(D_labs, Dj):
|
|
731
|
+
hkldata.binned_df["ml"].loc[i_bin, lab] = D
|
|
732
|
+
|
|
733
|
+
# estimate S
|
|
734
|
+
mean_DFc2 = numpy.mean(DFc**2)
|
|
735
|
+
est_fsc_fo_fc = numpy.interp(numpy.corrcoef(Fo, DFc)[1,0], table_cc[0], table_fsc)
|
|
736
|
+
S = mean_Fo2 - 2 * numpy.sqrt(mean_Fo2 * mean_DFc2) * est_fsc_fo_fc + mean_DFc2 - numpy.mean(SigFo**2)
|
|
737
|
+
hkldata.binned_df["ml"].loc[i_bin, "S"] = S
|
|
738
|
+
|
|
739
|
+
logger.writeln("\nCC:")
|
|
740
|
+
logger.writeln(stats.to_string())
|
|
741
|
+
logger.writeln("\nEstimates:")
|
|
742
|
+
logger.writeln(hkldata.binned_df["ml"].to_string())
|
|
743
|
+
smooth_params(hkldata, D_labs, smoothing)
|
|
744
|
+
# determine_mlf_params_from_cc()
|
|
745
|
+
|
|
746
|
+
def initialize_ml_params(hkldata, use_int, D_labs, b_aniso, use, twin_data=None):
|
|
747
|
+
hkldata.binned_df["ml"]["n_ref"] = 0
|
|
748
|
+
# Initial values
|
|
749
|
+
for lab in D_labs: hkldata.binned_df["ml"][lab] = 1.
|
|
750
|
+
hkldata.binned_df["ml"]["S"] = 10000.
|
|
751
|
+
k_ani = hkldata.debye_waller_factors(b_cart=b_aniso)
|
|
752
|
+
lab_obs = "I" if use_int else "FP"
|
|
753
|
+
centric_and_selections = hkldata.centric_and_selections["ml"]
|
|
754
|
+
for i_bin, _ in hkldata.binned("ml"):
|
|
755
|
+
if use == "all":
|
|
756
|
+
idxes = numpy.concatenate([sel[i] for sel in centric_and_selections[i_bin] for i in (1,2)])
|
|
757
|
+
else:
|
|
758
|
+
i = 1 if use == "work" else 2
|
|
759
|
+
idxes = numpy.concatenate([sel[i] for sel in centric_and_selections[i_bin]])
|
|
760
|
+
valid_sel = numpy.isfinite(hkldata.df.loc[idxes, lab_obs]) # as there is no nan-safe numpy.corrcoef
|
|
761
|
+
valid_sel &= hkldata.df.llweight[idxes] > 0
|
|
762
|
+
hkldata.binned_df["ml"].loc[i_bin, "n_ref"] = valid_sel.sum()
|
|
763
|
+
if numpy.sum(valid_sel) < 2:
|
|
764
|
+
continue
|
|
765
|
+
idxes = idxes[valid_sel]
|
|
766
|
+
if use_int:
|
|
767
|
+
Io = hkldata.df.I.to_numpy()[idxes]
|
|
768
|
+
else:
|
|
769
|
+
Io = hkldata.df.FP.to_numpy()[idxes]**2
|
|
770
|
+
Io /= k_ani[idxes]**2
|
|
771
|
+
if twin_data:
|
|
772
|
+
Ic = twin_data.i_calc_twin()[idxes]
|
|
773
|
+
else:
|
|
774
|
+
Ic = numpy.abs(hkldata.df.FC.to_numpy()[idxes])**2
|
|
775
|
+
mean_Io = numpy.mean(Io)
|
|
776
|
+
mean_Ic = numpy.mean(Ic)
|
|
777
|
+
cc = numpy.corrcoef(Io, Ic)[1,0]
|
|
778
|
+
if cc > 0 and mean_Io > 0:
|
|
779
|
+
D = numpy.sqrt(mean_Io / mean_Ic * cc)
|
|
780
|
+
else:
|
|
781
|
+
D = 0 # will be taken care later
|
|
782
|
+
hkldata.binned_df["ml"].loc[i_bin, D_labs[0]] = D
|
|
783
|
+
if mean_Io > 0:
|
|
784
|
+
S = mean_Io - 2 * numpy.sqrt(mean_Io * mean_Ic * numpy.maximum(0, cc)) + mean_Ic
|
|
785
|
+
else:
|
|
786
|
+
S = numpy.std(Io) # similar initial to french_wilson
|
|
787
|
+
hkldata.binned_df["ml"].loc[i_bin, "S"] = S
|
|
788
|
+
|
|
789
|
+
for D_lab in D_labs:
|
|
790
|
+
if hkldata.binned_df["ml"][D_lab].min() <= 0:
|
|
791
|
+
min_D = hkldata.binned_df["ml"][D_lab][hkldata.binned_df["ml"][D_lab] > 0].min() * 0.1
|
|
792
|
+
logger.writeln("WARNING: negative {} is detected from initial estimates. Replacing it using minimum positive value {:.2e}".format(D_lab, min_D))
|
|
793
|
+
hkldata.binned_df["ml"].loc[hkldata.binned_df["ml"][D_lab] <= 0, D_lab] = min_D # arbitrary
|
|
794
|
+
|
|
795
|
+
if twin_data:
|
|
796
|
+
twin_data.ml_scale[:] = hkldata.binned_df["ml"].loc[:, D_labs]
|
|
797
|
+
twin_data.ml_sigma[:] = hkldata.binned_df["ml"].loc[:, "S"]
|
|
798
|
+
|
|
799
|
+
logger.writeln("Initial estimates:")
|
|
800
|
+
logger.writeln(hkldata.binned_df["ml"].to_string())
|
|
801
|
+
# initialize_ml_params()
|
|
802
|
+
|
|
803
|
+
def refine_ml_params(hkldata, use_int, fc_labs, D_labs, b_aniso,
|
|
804
|
+
D_trans=None, S_trans=None, use="all", n_cycle=1, twin_data=None):
|
|
805
|
+
trans = VarTrans(D_trans, S_trans)
|
|
806
|
+
lab_obs = "I" if use_int else "FP"
|
|
807
|
+
centric_and_selections = hkldata.centric_and_selections["ml"]
|
|
808
|
+
def get_idxes(i_bin):
|
|
809
|
+
if use == "all":
|
|
810
|
+
return numpy.concatenate([sel[i] for sel in centric_and_selections[i_bin] for i in (1,2)])
|
|
811
|
+
else:
|
|
812
|
+
i = 1 if use == "work" else 2
|
|
813
|
+
return numpy.concatenate([sel[i] for sel in centric_and_selections[i_bin]])
|
|
814
|
+
|
|
815
|
+
refpar = "all"
|
|
816
|
+
for i_cyc in range(n_cycle):
|
|
817
|
+
t0 = time.time()
|
|
818
|
+
nfev_total = 0
|
|
819
|
+
k_ani = hkldata.debye_waller_factors(b_cart=b_aniso)
|
|
820
|
+
for i_bin, _ in hkldata.binned("ml"):
|
|
821
|
+
idxes = get_idxes(i_bin)
|
|
822
|
+
valid_sel = numpy.isfinite(hkldata.df.loc[idxes, lab_obs]) # as there is no nan-safe numpy.corrcoef
|
|
823
|
+
if numpy.sum(valid_sel) < 5:
|
|
824
|
+
logger.writeln("WARNING: bin {} has no sufficient reflections".format(i_bin))
|
|
825
|
+
continue
|
|
826
|
+
|
|
827
|
+
def target(x):
|
|
828
|
+
if refpar == "all":
|
|
829
|
+
Ds = trans.D(x[:len(fc_labs)])
|
|
830
|
+
S = trans.S(x[-1])
|
|
831
|
+
elif refpar == "D":
|
|
832
|
+
Ds = trans.D(x[:len(fc_labs)])
|
|
833
|
+
S = hkldata.binned_df["ml"].loc[i_bin, "S"]
|
|
834
|
+
else:
|
|
835
|
+
Ds = [hkldata.binned_df["ml"].loc[i_bin, lab] for lab in D_labs]
|
|
836
|
+
S = trans.S(x[-1])
|
|
837
|
+
|
|
838
|
+
if twin_data:
|
|
839
|
+
return mltwin(hkldata.df, twin_data, Ds, S, k_ani, idxes, i_bin)
|
|
840
|
+
else:
|
|
841
|
+
f = mli if use_int else mlf
|
|
842
|
+
return f(hkldata.df, fc_labs, Ds, S, k_ani, idxes)
|
|
843
|
+
|
|
844
|
+
def grad(x):
|
|
845
|
+
if refpar == "all":
|
|
846
|
+
Ds = trans.D(x[:len(fc_labs)])
|
|
847
|
+
S = trans.S(x[-1])
|
|
848
|
+
n_par = len(fc_labs)+1
|
|
849
|
+
elif refpar == "D":
|
|
850
|
+
Ds = trans.D(x[:len(fc_labs)])
|
|
851
|
+
S = hkldata.binned_df["ml"].loc[i_bin, "S"]
|
|
852
|
+
n_par = len(fc_labs)
|
|
853
|
+
else:
|
|
854
|
+
Ds = [hkldata.binned_df["ml"].loc[i_bin, lab] for lab in D_labs]
|
|
855
|
+
S = trans.S(x[-1])
|
|
856
|
+
n_par = 1
|
|
857
|
+
if twin_data:
|
|
858
|
+
r = deriv_mltwin_wrt_D_S(hkldata.df, twin_data, Ds, S, k_ani, idxes, i_bin)
|
|
859
|
+
else:
|
|
860
|
+
calc_deriv = deriv_mli_wrt_D_S if use_int else deriv_mlf_wrt_D_S
|
|
861
|
+
r = calc_deriv(hkldata.df, fc_labs, Ds, S, k_ani, idxes)
|
|
862
|
+
g = numpy.zeros(n_par)
|
|
863
|
+
if refpar in ("all", "D"):
|
|
864
|
+
g[:len(fc_labs)] = r[:len(fc_labs)]
|
|
865
|
+
g[:len(fc_labs)] *= trans.D_deriv(x[:len(fc_labs)])
|
|
866
|
+
if refpar in ("all", "S"):
|
|
867
|
+
g[-1] = r[-1]
|
|
868
|
+
g[-1] *= trans.S_deriv(x[-1])
|
|
869
|
+
return g
|
|
870
|
+
|
|
871
|
+
if 0:
|
|
872
|
+
refpar = "S"
|
|
873
|
+
x0 = trans.S_inv(hkldata.binned_df["ml"].loc[i_bin, "S"])
|
|
874
|
+
with open("s_line_{}.dat".format(i_bin), "w") as ofs:
|
|
875
|
+
for sval in numpy.linspace(1, x0*2, 100):
|
|
876
|
+
ofs.write("{:.4e} {:.10e} {:.10e}\n".format(sval,
|
|
877
|
+
target([sval]),
|
|
878
|
+
grad([sval])[0]))
|
|
879
|
+
continue
|
|
880
|
+
#print("Bin", i_bin)
|
|
881
|
+
if 1: # refine D and S iteratively
|
|
882
|
+
vals_last = None
|
|
883
|
+
for ids in range(10):
|
|
884
|
+
refpar = "D"
|
|
885
|
+
x0 = numpy.array([trans.D_inv(hkldata.binned_df["ml"].loc[i_bin, lab]) for lab in D_labs])
|
|
886
|
+
#print("MLTWIN=", target(x0))
|
|
887
|
+
#quit()
|
|
888
|
+
if 0 and ids == 0: # debug
|
|
889
|
+
x = x0.copy()
|
|
890
|
+
with open(f"debug_d_{i_bin}.dat", "w") as ofs:
|
|
891
|
+
ofs.write("x f der1 shift\n")
|
|
892
|
+
for d in numpy.linspace(0.001, 1.2, 100):
|
|
893
|
+
x[0] = d
|
|
894
|
+
#shift = mli_shift_D(hkldata.df, fc_labs, trans.D(x), hkldata.binned_df.loc[i_bin, "S"], k_ani, idxes)[0]
|
|
895
|
+
shift = numpy.nan
|
|
896
|
+
der1 = grad(x)[0]
|
|
897
|
+
f = target(x)
|
|
898
|
+
ofs.write(f"{d} {f} {der1} {shift}\n")
|
|
899
|
+
if 0:
|
|
900
|
+
h = 1e-3
|
|
901
|
+
f00 = target(x0)
|
|
902
|
+
g00 = grad(x0)
|
|
903
|
+
for ii in range(len(x0)):
|
|
904
|
+
xx = x0.copy()
|
|
905
|
+
xx[ii] += h
|
|
906
|
+
f01 = target(xx)
|
|
907
|
+
nder = (f01 - f00) / h
|
|
908
|
+
logger.writeln(f"DEBUG_der_D bin_{i_bin} {ii} ad={g00[ii]} nd={nder} r={g00[ii]/nder}")
|
|
909
|
+
vals_now = []
|
|
910
|
+
if 0:
|
|
911
|
+
f0 = target(x0)
|
|
912
|
+
nfev_total += 1
|
|
913
|
+
shift = mli_shift_D(hkldata.df, fc_labs, trans.D(x0), hkldata.binned_df["ml"].loc[i_bin, "S"], k_ani, idxes)
|
|
914
|
+
shift /= trans.D_deriv(x0)
|
|
915
|
+
#if abs(shift) < 1e-3: break
|
|
916
|
+
for itry in range(10):
|
|
917
|
+
x1 = x0 + shift
|
|
918
|
+
if (D_trans and any(x1 < -3)) or (not D_trans and any(x1 < 5e-2)):
|
|
919
|
+
#print(i_bin, cyc_s, trans.S(x0), trans.S(x1), shift, "BAD")
|
|
920
|
+
shift /= 2
|
|
921
|
+
continue
|
|
922
|
+
f1 = target(x1)
|
|
923
|
+
nfev_total += 1
|
|
924
|
+
if f1 > f0:
|
|
925
|
+
shift /= 2
|
|
926
|
+
continue
|
|
927
|
+
else: # good
|
|
928
|
+
for i, lab in enumerate(D_labs):
|
|
929
|
+
hkldata.binned_df["ml"].loc[i_bin, lab] = trans.D(x1[i])
|
|
930
|
+
vals_now.append(hkldata.binned_df["ml"].loc[i_bin, lab])
|
|
931
|
+
break
|
|
932
|
+
else:
|
|
933
|
+
break
|
|
934
|
+
else:
|
|
935
|
+
#print(mli_shift_D(hkldata.df, fc_labs, trans.D(x0), hkldata.binned_df["ml"].S[i_bin], k_ani, idxes))
|
|
936
|
+
res = scipy.optimize.minimize(fun=target, x0=x0, jac=grad,
|
|
937
|
+
bounds=((-5 if D_trans else 1e-5, None),)*len(x0))
|
|
938
|
+
nfev_total += res.nfev
|
|
939
|
+
#print(i_bin, "mini cycle", ids, refpar)
|
|
940
|
+
#print(res)
|
|
941
|
+
for i, lab in enumerate(D_labs):
|
|
942
|
+
hkldata.binned_df["ml"].loc[i_bin, lab] = trans.D(res.x[i])
|
|
943
|
+
vals_now.append(hkldata.binned_df["ml"].loc[i_bin, lab])
|
|
944
|
+
if twin_data:
|
|
945
|
+
twin_data.ml_scale[i_bin, :] = trans.D(res.x)
|
|
946
|
+
refpar = "S"
|
|
947
|
+
if 1:
|
|
948
|
+
for cyc_s in range(1):
|
|
949
|
+
x0 = trans.S_inv(hkldata.binned_df["ml"].loc[i_bin, "S"])
|
|
950
|
+
if 0:
|
|
951
|
+
h = 1e-1
|
|
952
|
+
f00 = target([x0])
|
|
953
|
+
g00 = grad([x0])
|
|
954
|
+
xx = x0 + h
|
|
955
|
+
f01 = target([xx])
|
|
956
|
+
nder = (f01 - f00) / h
|
|
957
|
+
logger.writeln(f"DEBUG_der_S bin_{i_bin} ad={g00} nd={nder} r={g00/nder}")
|
|
958
|
+
|
|
959
|
+
f0 = target([x0])
|
|
960
|
+
Ds = [hkldata.binned_df["ml"].loc[i_bin, lab] for lab in D_labs]
|
|
961
|
+
nfev_total += 1
|
|
962
|
+
if 0 and ids == 0: # debug
|
|
963
|
+
x = x0.copy()
|
|
964
|
+
with open(f"debug_s_{i_bin}.dat", "w") as ofs:
|
|
965
|
+
ofs.write("x f der1 shift\n")
|
|
966
|
+
for s in numpy.linspace(-5,5,100):
|
|
967
|
+
x = x0 * 10**s
|
|
968
|
+
if twin_data:
|
|
969
|
+
shift = mltwin_shift_S(hkldata.df, twin_data, Ds, trans.S(x), k_ani, idxes, i_bin)
|
|
970
|
+
else:
|
|
971
|
+
calc_shift_S = mli_shift_S if use_int else mlf_shift_S
|
|
972
|
+
shift = calc_shift_S(hkldata.df, fc_labs, Ds, trans.S(x), k_ani, idxes)
|
|
973
|
+
der1 = grad([x])[0]
|
|
974
|
+
f = target([x])
|
|
975
|
+
ofs.write(f"{x} {f} {der1} {shift}\n")
|
|
976
|
+
if twin_data:
|
|
977
|
+
shift = mltwin_shift_S(hkldata.df, twin_data, Ds, trans.S(x0), k_ani, idxes, i_bin)
|
|
978
|
+
else:
|
|
979
|
+
calc_shift_S = mli_shift_S if use_int else mlf_shift_S
|
|
980
|
+
shift = calc_shift_S(hkldata.df, fc_labs, Ds, trans.S(x0), k_ani, idxes)
|
|
981
|
+
shift /= trans.S_deriv(x0)
|
|
982
|
+
if abs(shift) < 1e-3: break
|
|
983
|
+
for itry in range(10):
|
|
984
|
+
x1 = x0 + shift
|
|
985
|
+
if (S_trans and x1 < -3) or (not S_trans and x1 < 5e-2):
|
|
986
|
+
#print(i_bin, cyc_s, trans.S(x0), trans.S(x1), shift, "BAD")
|
|
987
|
+
shift /= 2
|
|
988
|
+
continue
|
|
989
|
+
f1 = target([x1])
|
|
990
|
+
nfev_total += 1
|
|
991
|
+
if f1 > f0:
|
|
992
|
+
shift /= 2
|
|
993
|
+
continue
|
|
994
|
+
else: # good
|
|
995
|
+
#print(i_bin, cyc_s, trans.S(x0), trans.S(x1), shift)
|
|
996
|
+
hkldata.binned_df["ml"].loc[i_bin, "S"] = trans.S(x1)
|
|
997
|
+
break
|
|
998
|
+
else:
|
|
999
|
+
#print("all bad")
|
|
1000
|
+
break
|
|
1001
|
+
if twin_data:
|
|
1002
|
+
twin_data.ml_sigma[i_bin] = hkldata.binned_df["ml"].loc[i_bin, "S"]
|
|
1003
|
+
else:
|
|
1004
|
+
# somehow this does not work well.
|
|
1005
|
+
x0 = [trans.S_inv(hkldata.binned_df["ml"].loc[i_bin, "S"])]
|
|
1006
|
+
res = scipy.optimize.minimize(fun=target, x0=x0, jac=grad,
|
|
1007
|
+
bounds=((-3 if S_trans else 5e-2, None),))
|
|
1008
|
+
nfev_total += res.nfev
|
|
1009
|
+
#print(i_bin, "mini cycle", ids, refpar)
|
|
1010
|
+
#print(res)
|
|
1011
|
+
hkldata.binned_df["ml"].loc[i_bin, "S"] = trans.S(res.x[-1])
|
|
1012
|
+
if twin_data:
|
|
1013
|
+
twin_data.ml_sigma[i_bin] = trans.S(res.x[-1])
|
|
1014
|
+
vals_now.append(hkldata.binned_df["ml"].loc[i_bin, "S"])
|
|
1015
|
+
vals_now = numpy.array(vals_now)
|
|
1016
|
+
if vals_last is not None and numpy.all(numpy.abs((vals_last - vals_now) / vals_now) < 1e-2):
|
|
1017
|
+
#logger.writeln("converged in mini cycle {}".format(ids+1))
|
|
1018
|
+
break
|
|
1019
|
+
vals_last = vals_now
|
|
1020
|
+
else:
|
|
1021
|
+
x0 = [trans.D_inv(hkldata.binned_df["ml"].loc[i_bin, lab]) for lab in D_labs] + [trans.S_inv(hkldata.binned_df["ml"].loc[i_bin, "S"])]
|
|
1022
|
+
res = scipy.optimize.minimize(fun=target, x0=x0, jac=grad,
|
|
1023
|
+
bounds=((-5 if D_trans else 1e-5, None), )*len(D_labs) + ((-3 if S_trans else 5e-2, None),))
|
|
1024
|
+
nfev_total += res.nfev
|
|
1025
|
+
#print(i_bin)
|
|
1026
|
+
#print(res)
|
|
1027
|
+
for i, lab in enumerate(D_labs):
|
|
1028
|
+
hkldata.binned_df["ml"].loc[i_bin, lab] = trans.D(res.x[i])
|
|
1029
|
+
hkldata.binned_df["ml"].loc[i_bin, "S"] = trans.S(res.x[-1])
|
|
1030
|
+
if twin_data:
|
|
1031
|
+
twin_data.ml_scale[i_bin, :] = trans.D(res.x[:-1])
|
|
1032
|
+
twin_data.ml_sigma[i_bin] = trans.S(res.x[-1])
|
|
1033
|
+
|
|
1034
|
+
if twin_data:
|
|
1035
|
+
dfc = numpy.abs(twin_data.f_calc) * twin_data.ml_scale_array()
|
|
1036
|
+
for i_bin, idxes in hkldata.binned("ml"):
|
|
1037
|
+
dfc_bin = dfc[numpy.asarray(twin_data.bin)==i_bin,:]
|
|
1038
|
+
mean_dfc = numpy.nanmean(dfc_bin, axis=0)
|
|
1039
|
+
for i, (dlab, fclab) in enumerate(zip(D_labs, fc_labs)):
|
|
1040
|
+
hkldata.binned_df["ml"].loc[i_bin, "Mn(|{}*{}|)".format(dlab, fclab)] = mean_dfc[i]
|
|
1041
|
+
else:
|
|
1042
|
+
for i_bin, idxes in hkldata.binned("ml"):
|
|
1043
|
+
for dlab, fclab in zip(D_labs, fc_labs):
|
|
1044
|
+
mean_dfc = numpy.nanmean(numpy.abs(hkldata.binned_df["ml"][dlab][i_bin] * hkldata.df[fclab][idxes]))
|
|
1045
|
+
hkldata.binned_df["ml"].loc[i_bin, "Mn(|{}*{}|)".format(dlab, fclab)] = mean_dfc
|
|
1046
|
+
|
|
1047
|
+
logger.writeln("Refined estimates:")
|
|
1048
|
+
logger.writeln(hkldata.binned_df["ml"].to_string())
|
|
1049
|
+
#numpy.testing.assert_allclose(hkldata.binned_df["ml"].S, twin_data.ml_sigma)
|
|
1050
|
+
#numpy.testing.assert_allclose(hkldata.binned_df["ml"][D_labs], twin_data.ml_scale)
|
|
1051
|
+
logger.writeln("time: {:.1f} sec ({} evaluations)".format(time.time() - t0, nfev_total))
|
|
1052
|
+
|
|
1053
|
+
if not use_int or twin_data:
|
|
1054
|
+
break # did not implement MLF B_aniso optimization
|
|
1055
|
+
|
|
1056
|
+
# Refine b_aniso
|
|
1057
|
+
adpdirs = utils.model.adp_constraints(hkldata.sg.operations(), hkldata.cell, tr0=True)
|
|
1058
|
+
SMattolist = lambda B: [B.u11, B.u22, B.u33, B.u12, B.u13, B.u23]
|
|
1059
|
+
|
|
1060
|
+
def target_ani(x):
|
|
1061
|
+
b = gemmi.SMat33d(*numpy.dot(x, adpdirs))
|
|
1062
|
+
k_ani = hkldata.debye_waller_factors(b_cart=b)
|
|
1063
|
+
ret = 0.
|
|
1064
|
+
for i_bin, idxes in hkldata.binned("ml"):
|
|
1065
|
+
Ds = [hkldata.binned_df["ml"].loc[i_bin, lab] for lab in D_labs]
|
|
1066
|
+
ret += mli(hkldata.df, fc_labs, Ds, hkldata.binned_df["ml"].loc[i_bin, "S"], k_ani, idxes)
|
|
1067
|
+
return ret
|
|
1068
|
+
def grad_ani(x):
|
|
1069
|
+
b = gemmi.SMat33d(*numpy.dot(x, adpdirs))
|
|
1070
|
+
k_ani = hkldata.debye_waller_factors(b_cart=b)
|
|
1071
|
+
S2mat = hkldata.ssq_mat() # ssqmat
|
|
1072
|
+
g = numpy.zeros(6)
|
|
1073
|
+
for i_bin, idxes in hkldata.binned("ml"):
|
|
1074
|
+
r = integr.ll_int_der1_ani(hkldata.df.I.to_numpy()[idxes], hkldata.df.SIGI.to_numpy()[idxes],
|
|
1075
|
+
k_ani[idxes], hkldata.binned_df["ml"].loc[i_bin, "S"],
|
|
1076
|
+
hkldata.df[fc_labs].to_numpy()[idxes], hkldata.binned_df["ml"].loc[i_bin, D_labs],
|
|
1077
|
+
hkldata.df.centric.to_numpy()[idxes]+1, hkldata.df.epsilon.to_numpy()[idxes],
|
|
1078
|
+
hkldata.df.llweight.to_numpy()[idxes])
|
|
1079
|
+
S2 = S2mat[:,idxes]
|
|
1080
|
+
g += -numpy.nansum(S2 * r[:,0], axis=1) # k_ani is already multiplied in r
|
|
1081
|
+
return numpy.dot(g, adpdirs.T)
|
|
1082
|
+
def shift_ani(x):
|
|
1083
|
+
b = gemmi.SMat33d(*numpy.dot(x, adpdirs))
|
|
1084
|
+
k_ani = hkldata.debye_waller_factors(b_cart=b)
|
|
1085
|
+
S2mat = hkldata.ssq_mat() # ssqmat
|
|
1086
|
+
g = numpy.zeros(6)
|
|
1087
|
+
H = numpy.zeros((6, 6))
|
|
1088
|
+
for i_bin, idxes in hkldata.binned("ml"):
|
|
1089
|
+
r = integr.ll_int_der1_ani(hkldata.df.I.to_numpy()[idxes], hkldata.df.SIGI.to_numpy()[idxes],
|
|
1090
|
+
k_ani[idxes], hkldata.binned_df["ml"].loc[i_bin, "S"],
|
|
1091
|
+
hkldata.df[fc_labs].to_numpy()[idxes], list(hkldata.binned_df["ml"].loc[i_bin, D_labs]),
|
|
1092
|
+
hkldata.df.centric.to_numpy()[idxes]+1, hkldata.df.epsilon.to_numpy()[idxes],
|
|
1093
|
+
hkldata.df.llweight.to_numpy()[idxes])
|
|
1094
|
+
S2 = S2mat[:,idxes]
|
|
1095
|
+
g += -numpy.nansum(S2 * r[:,0], axis=1) # k_ani is already multiplied in r
|
|
1096
|
+
H += numpy.nansum(numpy.matmul(S2[None,:].T, S2.T[:,None]) * (r[:,0]**2)[:,None,None], axis=0)
|
|
1097
|
+
|
|
1098
|
+
g, H = numpy.dot(g, adpdirs.T), numpy.dot(adpdirs, numpy.dot(H, adpdirs.T))
|
|
1099
|
+
return -numpy.dot(g, numpy.linalg.pinv(H))
|
|
1100
|
+
|
|
1101
|
+
logger.writeln("Refining B_aniso. Current = {}".format(b_aniso))
|
|
1102
|
+
if 0:
|
|
1103
|
+
x0 = numpy.dot(SMattolist(b_aniso), numpy.linalg.pinv(adpdirs))
|
|
1104
|
+
res = scipy.optimize.minimize(fun=target_ani, x0=x0, jac=grad_ani)
|
|
1105
|
+
print(res)
|
|
1106
|
+
b_aniso = gemmi.SMat33d(*numpy.dot(res.x, adpdirs))
|
|
1107
|
+
f1 = res.fun
|
|
1108
|
+
else:
|
|
1109
|
+
B_converged = False
|
|
1110
|
+
for j in range(10):
|
|
1111
|
+
x = numpy.dot(SMattolist(b_aniso), numpy.linalg.pinv(adpdirs))
|
|
1112
|
+
f0 = target_ani(x)
|
|
1113
|
+
shift = shift_ani(x)
|
|
1114
|
+
for i in range(3):
|
|
1115
|
+
ss = shift / 2**i
|
|
1116
|
+
f1 = target_ani(x + ss)
|
|
1117
|
+
#logger.writeln("{:2d} f0 = {:.3e} shift = {} df = {:.3e}".format(j, f0, ss, f1 - f0))
|
|
1118
|
+
if f1 < f0:
|
|
1119
|
+
b_aniso = gemmi.SMat33d(*numpy.dot(x+ss, adpdirs))
|
|
1120
|
+
if numpy.max(numpy.abs(ss)) < 1e-4: B_converged = True
|
|
1121
|
+
break
|
|
1122
|
+
else:
|
|
1123
|
+
B_converged = True
|
|
1124
|
+
if B_converged: break
|
|
1125
|
+
|
|
1126
|
+
logger.writeln("Refined B_aniso = {}".format(b_aniso))
|
|
1127
|
+
logger.writeln("cycle {} f= {}".format(i_cyc, f1))
|
|
1128
|
+
|
|
1129
|
+
return b_aniso
|
|
1130
|
+
# refine_ml_params()
|
|
1131
|
+
|
|
1132
|
+
def determine_ml_params(hkldata, use_int, fc_labs, D_labs, b_aniso,
|
|
1133
|
+
D_trans=None, S_trans=None, use="all", n_cycle=1, smoothing="gauss",
|
|
1134
|
+
twin_data=None):
|
|
1135
|
+
assert use in ("all", "work", "test")
|
|
1136
|
+
assert smoothing in (None, "gauss")
|
|
1137
|
+
logger.write(f"Estimating sigma-A parameters from {'intensities' if use_int else 'amplitudes'} using {use} reflections")
|
|
1138
|
+
logger.writeln(f"{' (twin)' if twin_data else ''}")
|
|
1139
|
+
centric_and_selections = hkldata.centric_and_selections["ml"]
|
|
1140
|
+
|
|
1141
|
+
if not set(D_labs + ["S"]).issubset(hkldata.binned_df["ml"]):
|
|
1142
|
+
initialize_ml_params(hkldata, use_int, D_labs, b_aniso, use, twin_data=twin_data)
|
|
1143
|
+
for dlab, fclab in zip(D_labs, fc_labs):
|
|
1144
|
+
hkldata.binned_df["ml"]["Mn(|{}*{}|)".format(dlab, fclab)] = numpy.nan
|
|
1145
|
+
|
|
1146
|
+
if twin_data or use_int:
|
|
1147
|
+
t0 = time.time()
|
|
1148
|
+
if use == "all":
|
|
1149
|
+
idxes = numpy.concatenate([sel[i] for i_bin, _ in hkldata.binned("ml")
|
|
1150
|
+
for sel in centric_and_selections[i_bin] for i in (1,2)])
|
|
1151
|
+
else:
|
|
1152
|
+
i = 1 if use == "work" else 2
|
|
1153
|
+
idxes = numpy.concatenate([sel[i] for i_bin, _ in hkldata.binned("ml")
|
|
1154
|
+
for sel in centric_and_selections[i_bin]])
|
|
1155
|
+
k_ani = hkldata.debye_waller_factors(b_cart=b_aniso)
|
|
1156
|
+
i_sigi = numpy.empty((2, len(hkldata.df.index)))
|
|
1157
|
+
i_sigi[:] = numpy.nan
|
|
1158
|
+
if twin_data:
|
|
1159
|
+
kani2_inv = 1 / k_ani**2
|
|
1160
|
+
i_sigi[0, idxes] = (hkldata.df.I.to_numpy() * kani2_inv)[idxes]
|
|
1161
|
+
i_sigi[1, idxes] = (hkldata.df.SIGI.to_numpy() * kani2_inv)[idxes]
|
|
1162
|
+
#twin_data.debug_open("twin_debug.json")
|
|
1163
|
+
twin_data.ll_refine_D_S(i_sigi[0,:], i_sigi[1,:], 20)
|
|
1164
|
+
#twin_data.debug_close()
|
|
1165
|
+
dfc = numpy.abs(twin_data.f_calc) * twin_data.ml_scale_array()
|
|
1166
|
+
for i_bin, idxes in hkldata.binned("ml"):
|
|
1167
|
+
hkldata.binned_df["ml"].loc[i_bin, D_labs] = twin_data.ml_scale[i_bin, :]
|
|
1168
|
+
hkldata.binned_df["ml"].loc[i_bin, "S"] = twin_data.ml_sigma[i_bin]
|
|
1169
|
+
dfc_bin = dfc[numpy.asarray(twin_data.bin)==i_bin,:]
|
|
1170
|
+
mean_dfc = numpy.nanmean(dfc_bin, axis=0)
|
|
1171
|
+
for i, (dlab, fclab) in enumerate(zip(D_labs, fc_labs)):
|
|
1172
|
+
hkldata.binned_df["ml"].loc[i_bin, "Mn(|{}*{}|)".format(dlab, fclab)] = mean_dfc[i]
|
|
1173
|
+
else:
|
|
1174
|
+
i_sigi[0, idxes] = hkldata.df.I.to_numpy()[idxes]
|
|
1175
|
+
i_sigi[1, idxes] = hkldata.df.SIGI.to_numpy()[idxes]
|
|
1176
|
+
DS = integr.ll_refine_D_S(i_sigi[0,:], i_sigi[1,:], k_ani,
|
|
1177
|
+
hkldata.binned_df["ml"].loc[:, "S"].to_numpy(), hkldata.df[fc_labs].to_numpy(),
|
|
1178
|
+
hkldata.binned_df["ml"].loc[:, D_labs].to_numpy(), hkldata.df.centric.to_numpy()+1,
|
|
1179
|
+
hkldata.df.epsilon.to_numpy(), hkldata.df.llweight.to_numpy(),
|
|
1180
|
+
hkldata.df.bin_ml.to_numpy(), 20)
|
|
1181
|
+
for i_bin, idxes in hkldata.binned("ml"):
|
|
1182
|
+
hkldata.binned_df["ml"].loc[i_bin, D_labs] = DS[i_bin, :-1]
|
|
1183
|
+
hkldata.binned_df["ml"].loc[i_bin, "S"] = DS[i_bin, -1]
|
|
1184
|
+
for dlab, fclab in zip(D_labs, fc_labs):
|
|
1185
|
+
mean_dfc = numpy.nanmean(numpy.abs(hkldata.binned_df["ml"][dlab][i_bin] * hkldata.df[fclab][idxes]))
|
|
1186
|
+
hkldata.binned_df["ml"].loc[i_bin, "Mn(|{}*{}|)".format(dlab, fclab)] = mean_dfc
|
|
1187
|
+
logger.writeln("Refined estimates:")
|
|
1188
|
+
logger.writeln(hkldata.binned_df["ml"].to_string())
|
|
1189
|
+
logger.writeln(f"time: {time.time()-t0:.1f} sec")
|
|
1190
|
+
else:
|
|
1191
|
+
b_aniso = refine_ml_params(hkldata, use_int, fc_labs, D_labs, b_aniso,
|
|
1192
|
+
D_trans, S_trans, use, n_cycle, twin_data)
|
|
1193
|
+
smooth_params(hkldata, D_labs, smoothing)
|
|
1194
|
+
return b_aniso
|
|
1195
|
+
# determine_ml_params()
|
|
1196
|
+
|
|
1197
|
+
def smooth_params(hkldata, D_labs, smoothing): # XXX twin_data
|
|
1198
|
+
if smoothing is None or len(hkldata.binned("ml")) < 2:
|
|
1199
|
+
for i, lab in enumerate(D_labs + ["S"]):
|
|
1200
|
+
hkldata.df[lab] = hkldata.binned_data_as_array("ml", lab)
|
|
1201
|
+
|
|
1202
|
+
elif smoothing == "gauss":
|
|
1203
|
+
bin_centers = (0.5 / hkldata.binned_df["ml"][["d_min", "d_max"]]**2).sum(axis=1).to_numpy()
|
|
1204
|
+
vals = ext.smooth_gauss(bin_centers,
|
|
1205
|
+
hkldata.binned_df["ml"][D_labs + ["S"]].to_numpy(),
|
|
1206
|
+
1./hkldata.df.d.to_numpy()**2,
|
|
1207
|
+
100, # min(n_ref?)
|
|
1208
|
+
(bin_centers[1] - bin_centers[0]))
|
|
1209
|
+
for i, lab in enumerate(D_labs + ["S"]):
|
|
1210
|
+
hkldata.df[lab] = vals[:, i]
|
|
1211
|
+
# Update smoothened average; this affects next refinement.
|
|
1212
|
+
# TODO: update Mn(|Dj*FCj|) as well.
|
|
1213
|
+
#for i_bin, idxes in hkldata.binned("ml"):
|
|
1214
|
+
# for lab in D_labs + ["S"]:
|
|
1215
|
+
# hkldata.binned_df["ml"].loc[i_bin, lab] = numpy.mean(hkldata.df[lab].to_numpy()[idxes])
|
|
1216
|
+
else:
|
|
1217
|
+
raise RuntimeError("unknown smoothing method: {}".format(smoothing))
|
|
1218
|
+
# smooth_params()
|
|
1219
|
+
|
|
1220
|
+
def expected_F_from_int(Io, sigIo, k_ani, DFc, eps, c, S):
|
|
1221
|
+
k_num = numpy.repeat(0.5 if c == 0 else 0., Io.size) # 0.5 if acentric
|
|
1222
|
+
k_den = k_num - 0.5
|
|
1223
|
+
if numpy.isscalar(c): c = numpy.repeat(c, Io.size)
|
|
1224
|
+
to = Io / sigIo - sigIo / (c+1) / k_ani**2 / S / eps
|
|
1225
|
+
tf = k_ani * numpy.abs(DFc) / numpy.sqrt(sigIo)
|
|
1226
|
+
sig1 = k_ani**2 * S * eps / sigIo
|
|
1227
|
+
f = ext.integ_J_ratio(k_num, k_den, True, to, tf, sig1, c+1, integr.exp2_threshold, integr.h, integr.N, integr.ewmax)
|
|
1228
|
+
f *= numpy.sqrt(sigIo) / k_ani
|
|
1229
|
+
m_proxy = ext.integ_J_ratio(k_num, k_num, True, to, tf, sig1, c+1, integr.exp2_threshold, integr.h, integr.N, integr.ewmax)
|
|
1230
|
+
return f, m_proxy
|
|
1231
|
+
# expected_F_from_int()
|
|
1232
|
+
|
|
1233
|
+
def calc_fc_dano(hkldata, D_labs, DFc):
|
|
1234
|
+
# assuming only first components have anomalous scatterers
|
|
1235
|
+
fcpp = hkldata.df["FC''"].to_numpy() * hkldata.df[D_labs[0]].to_numpy() * 1j
|
|
1236
|
+
fc_dano = numpy.abs(DFc + fcpp) - numpy.abs(DFc - fcpp) # this includes centrics (should be zero)
|
|
1237
|
+
return fc_dano
|
|
1238
|
+
# calc_fc_dano()
|
|
1239
|
+
|
|
1240
|
+
def calculate_maps_int(hkldata, b_aniso, fc_labs, D_labs, use="all"):
|
|
1241
|
+
nmodels = len(fc_labs)
|
|
1242
|
+
hkldata.df["FWT"] = 0j * numpy.nan
|
|
1243
|
+
hkldata.df["DELFWT"] = 0j * numpy.nan
|
|
1244
|
+
hkldata.df["F_est"] = numpy.nan
|
|
1245
|
+
hkldata.df["FOM"] = numpy.nan # FOM proxy, |<F>| / <|F|>
|
|
1246
|
+
has_ano = "I(+)" in hkldata.df and "I(-)" in hkldata.df
|
|
1247
|
+
Io = hkldata.df.I.to_numpy()
|
|
1248
|
+
sigIo = hkldata.df.SIGI.to_numpy()
|
|
1249
|
+
k_ani = hkldata.debye_waller_factors(b_cart=b_aniso)
|
|
1250
|
+
eps = hkldata.df.epsilon.to_numpy()
|
|
1251
|
+
Ds = numpy.vstack([hkldata.df[lab].to_numpy() for lab in D_labs]).T
|
|
1252
|
+
Fcs = numpy.vstack([hkldata.df[lab].to_numpy() for lab in fc_labs]).T
|
|
1253
|
+
DFc = (Ds * Fcs).sum(axis=1)
|
|
1254
|
+
hkldata.df["DFC"] = DFc
|
|
1255
|
+
if has_ano:
|
|
1256
|
+
hkldata.df["FAN"] = 0j * numpy.nan
|
|
1257
|
+
ano_data = hkldata.df[["I(+)", "SIGI(+)", "I(-)", "SIGI(-)"]].to_numpy()
|
|
1258
|
+
if "FC''" in hkldata.df:
|
|
1259
|
+
fc_dano = calc_fc_dano(hkldata, D_labs, DFc)
|
|
1260
|
+
hkldata.df["DELFAN"] = 0j * numpy.nan
|
|
1261
|
+
for i_bin, idxes in hkldata.binned("ml"):
|
|
1262
|
+
for c, work, test in hkldata.centric_and_selections["ml"][i_bin]:
|
|
1263
|
+
cidxes = numpy.concatenate([work, test])
|
|
1264
|
+
S = hkldata.df["S"].to_numpy()[cidxes]
|
|
1265
|
+
f, m_proxy = expected_F_from_int(Io[cidxes], sigIo[cidxes], k_ani[cidxes], DFc[cidxes], eps[cidxes], c, S)
|
|
1266
|
+
exp_ip = numpy.exp(numpy.angle(DFc[cidxes])*1j)
|
|
1267
|
+
hkldata.df.loc[cidxes, "FWT"] = 2 * f * exp_ip - DFc[cidxes]
|
|
1268
|
+
hkldata.df.loc[cidxes, "DELFWT"] = f * exp_ip - DFc[cidxes]
|
|
1269
|
+
hkldata.df.loc[cidxes, "FOM"] = m_proxy
|
|
1270
|
+
hkldata.df.loc[cidxes, "F_est"] = f
|
|
1271
|
+
if has_ano:
|
|
1272
|
+
f_p, _ = expected_F_from_int(ano_data[cidxes,0], ano_data[cidxes,1],
|
|
1273
|
+
k_ani[cidxes], DFc[cidxes], eps[cidxes], c, S)
|
|
1274
|
+
f_m, _ = expected_F_from_int(ano_data[cidxes,2], ano_data[cidxes,3],
|
|
1275
|
+
k_ani[cidxes], DFc[cidxes], eps[cidxes], c, S)
|
|
1276
|
+
hkldata.df.loc[cidxes, "FAN"] = (f_p - f_m) * exp_ip / 2j
|
|
1277
|
+
if has_ano and "FC''" in hkldata.df:
|
|
1278
|
+
# for centrics Fo_dano should be NaN; no need to mask fc_dano
|
|
1279
|
+
hkldata.df.loc[cidxes, "DELFAN"] = ((f_p - f_m) - fc_dano[cidxes]) * exp_ip / 2j
|
|
1280
|
+
# remove reflections that should be hidden
|
|
1281
|
+
if use != "all":
|
|
1282
|
+
# usually use == "work"
|
|
1283
|
+
tohide = test if use == "work" else work
|
|
1284
|
+
hkldata.df.loc[tohide, "FWT"] = 0j * numpy.nan
|
|
1285
|
+
hkldata.df.loc[tohide, "DELFWT"] = 0j * numpy.nan
|
|
1286
|
+
fill_sel = numpy.isnan(hkldata.df["FWT"][cidxes].to_numpy())
|
|
1287
|
+
hkldata.df.loc[cidxes[fill_sel], "FWT"] = DFc[cidxes][fill_sel]
|
|
1288
|
+
# calculate_maps_int()
|
|
1289
|
+
|
|
1290
|
+
def calculate_maps_twin(hkldata, b_aniso, fc_labs, D_labs, twin_data, use="all"):
|
|
1291
|
+
k_ani2_inv = 1 / hkldata.debye_waller_factors(b_cart=b_aniso)**2
|
|
1292
|
+
Io = hkldata.df.I.to_numpy(copy=True) * k_ani2_inv
|
|
1293
|
+
sigIo = hkldata.df.SIGI.to_numpy(copy=True) * k_ani2_inv
|
|
1294
|
+
# Mask Io
|
|
1295
|
+
for i_bin, idxes in hkldata.binned("ml"):
|
|
1296
|
+
for c, work, test in hkldata.centric_and_selections["ml"][i_bin]:
|
|
1297
|
+
if use != "all":
|
|
1298
|
+
tohide = test if use == "work" else work
|
|
1299
|
+
Io[tohide] = numpy.nan
|
|
1300
|
+
|
|
1301
|
+
twin_data.est_f_true(Io, sigIo)
|
|
1302
|
+
Ds = twin_data.ml_scale_array()
|
|
1303
|
+
DFc = (twin_data.f_calc * Ds).sum(axis=1)
|
|
1304
|
+
exp_ip = numpy.exp(numpy.angle(DFc)*1j)
|
|
1305
|
+
Ft = numpy.asarray(twin_data.f_true_max)
|
|
1306
|
+
m = twin_data.calc_fom()
|
|
1307
|
+
Fexp = twin_data.expected_F(Io, sigIo)
|
|
1308
|
+
fwt = 2 * Fexp * exp_ip - DFc
|
|
1309
|
+
delfwt = Fexp * exp_ip - DFc
|
|
1310
|
+
sel = numpy.isnan(fwt)
|
|
1311
|
+
fwt[sel] = DFc[sel]
|
|
1312
|
+
|
|
1313
|
+
hkldata2 = utils.hkl.HklData(hkldata.cell, hkldata.sg,
|
|
1314
|
+
utils.hkl.df_from_twin_data(twin_data, fc_labs))
|
|
1315
|
+
hkldata2.df["FWT"] = fwt
|
|
1316
|
+
hkldata2.df["DELFWT"] = delfwt
|
|
1317
|
+
hkldata2.df["FOM"] = m
|
|
1318
|
+
hkldata2.df["F_est"] = Ft
|
|
1319
|
+
hkldata2.df["F_exp"] = Fexp
|
|
1320
|
+
hkldata2.df["FC"] = twin_data.f_calc.sum(axis=1)
|
|
1321
|
+
hkldata2.df["DFC"] = DFc
|
|
1322
|
+
hkldata2.df[D_labs] = Ds
|
|
1323
|
+
hkldata2.df["S"] = twin_data.ml_sigma_array()
|
|
1324
|
+
return hkldata2
|
|
1325
|
+
# calculate_maps_twin()
|
|
1326
|
+
|
|
1327
|
+
def merge_models(sts): # simply merge models. no fix in chain ids etc.
|
|
1328
|
+
st2 = sts[0].clone()
|
|
1329
|
+
del st2[:]
|
|
1330
|
+
model = gemmi.Model(1)
|
|
1331
|
+
for st in sts:
|
|
1332
|
+
for m in st:
|
|
1333
|
+
for c in m:
|
|
1334
|
+
model.add_chain(c)
|
|
1335
|
+
st2.add_model(model)
|
|
1336
|
+
return st2
|
|
1337
|
+
# merge_models()
|
|
1338
|
+
|
|
1339
|
+
def decide_mtz_labels(mtz, find_free=True, require=None, prefer_intensity=False):
|
|
1340
|
+
if prefer_intensity:
|
|
1341
|
+
obs_types = ("J", "F", "K", "G")
|
|
1342
|
+
else:
|
|
1343
|
+
obs_types = ("F", "J", "G", "K")
|
|
1344
|
+
if require:
|
|
1345
|
+
assert set(require).issubset(obs_types)
|
|
1346
|
+
else:
|
|
1347
|
+
require = obs_types
|
|
1348
|
+
dlabs = utils.hkl.mtz_find_data_columns(mtz)
|
|
1349
|
+
logger.writeln("Finding possible options from MTZ:")
|
|
1350
|
+
for typ in dlabs:
|
|
1351
|
+
for labs in dlabs[typ]:
|
|
1352
|
+
logger.writeln(" --labin '{}'".format(",".join(labs)))
|
|
1353
|
+
for typ in require:
|
|
1354
|
+
if dlabs[typ]:
|
|
1355
|
+
labin = dlabs[typ][0]
|
|
1356
|
+
break
|
|
1357
|
+
else:
|
|
1358
|
+
raise RuntimeError(f"Error: Observation or sigma not found in MTZ")
|
|
1359
|
+
if find_free:
|
|
1360
|
+
flabs = utils.hkl.mtz_find_free_columns(mtz)
|
|
1361
|
+
if flabs:
|
|
1362
|
+
labin += [flabs[0]]
|
|
1363
|
+
logger.writeln("MTZ columns automatically selected: {}".format(labin))
|
|
1364
|
+
return labin
|
|
1365
|
+
# decide_mtz_labels()
|
|
1366
|
+
|
|
1367
|
+
def decide_spacegroup(sg_user, sg_st, sg_hkl):
|
|
1368
|
+
assert sg_hkl is not None
|
|
1369
|
+
ret = None
|
|
1370
|
+
if sg_user is not None:
|
|
1371
|
+
ret = sg_user
|
|
1372
|
+
logger.writeln(f"Space group overridden by user. Using {ret.xhm()}")
|
|
1373
|
+
else:
|
|
1374
|
+
ret = sg_hkl
|
|
1375
|
+
if sg_hkl != sg_st:
|
|
1376
|
+
if sg_st and sg_st.laue_str() != sg_hkl.laue_str():
|
|
1377
|
+
raise RuntimeError("Crystal symmetry mismatch between model and data")
|
|
1378
|
+
logger.writeln("Warning: space group mismatch between model and mtz")
|
|
1379
|
+
if sg_st and sg_st.laue_str() == sg_hkl.laue_str():
|
|
1380
|
+
logger.writeln(" using space group from model")
|
|
1381
|
+
ret = sg_st
|
|
1382
|
+
else:
|
|
1383
|
+
logger.writeln(" using space group from mtz")
|
|
1384
|
+
logger.writeln("")
|
|
1385
|
+
|
|
1386
|
+
return ret
|
|
1387
|
+
# decide_spacegroup
|
|
1388
|
+
|
|
1389
|
+
def process_input(hklin, labin, n_bins_ml, free, xyzins, d_max=None, d_min=None,
|
|
1390
|
+
n_per_mlbin=None, use="all", max_mlbins=None, cif_index=0, keep_charges=False,
|
|
1391
|
+
allow_unusual_occupancies=False, space_group=None,
|
|
1392
|
+
hklin_free=None, labin_free=None, labin_llweight=None, n_bins_stat=None, max_statbins=20):
|
|
1393
|
+
if labin: assert 1 < len(labin) < 6
|
|
1394
|
+
assert use in ("all", "work", "test")
|
|
1395
|
+
|
|
1396
|
+
if len(xyzins) > 0 and type(xyzins[0]) is gemmi.Structure:
|
|
1397
|
+
sts = xyzins
|
|
1398
|
+
else:
|
|
1399
|
+
sts = []
|
|
1400
|
+
|
|
1401
|
+
if type(hklin) is gemmi.Mtz or utils.fileio.is_mmhkl_file(hklin):
|
|
1402
|
+
if type(hklin) is gemmi.Mtz:
|
|
1403
|
+
mtz = hklin
|
|
1404
|
+
else:
|
|
1405
|
+
mtz = utils.fileio.read_mmhkl(hklin, cif_index=cif_index)
|
|
1406
|
+
if not sts:
|
|
1407
|
+
sts = [utils.fileio.read_structure(f) for f in xyzins]
|
|
1408
|
+
else:
|
|
1409
|
+
assert len(xyzins) == 1
|
|
1410
|
+
assert not sts
|
|
1411
|
+
st, mtz = utils.fileio.read_small_molecule_files([hklin, xyzins[0]])
|
|
1412
|
+
if None in (st, mtz):
|
|
1413
|
+
raise SystemExit("Failed to read small molecule file(s)")
|
|
1414
|
+
sts = [st]
|
|
1415
|
+
|
|
1416
|
+
for st in sts:
|
|
1417
|
+
utils.model.check_occupancies(st, raise_error=not allow_unusual_occupancies)
|
|
1418
|
+
|
|
1419
|
+
sg_use = decide_spacegroup(sg_user=gemmi.SpaceGroup(space_group) if space_group else None,
|
|
1420
|
+
sg_st=sts[0].find_spacegroup() if sts else None,
|
|
1421
|
+
sg_hkl=mtz.spacegroup)
|
|
1422
|
+
if not labin:
|
|
1423
|
+
labin = decide_mtz_labels(mtz, find_free=hklin_free is None)
|
|
1424
|
+
col_types = {x.label:x.type for x in mtz.columns}
|
|
1425
|
+
if labin[0] not in col_types:
|
|
1426
|
+
raise RuntimeError("MTZ column not found: {}".format(labin[0]))
|
|
1427
|
+
labs_and_types = {"F": ("amplitude", ["FP","SIGFP"], ["F", "Q"]),
|
|
1428
|
+
"J": ("intensity", ["I","SIGI"], ["J", "Q"]),
|
|
1429
|
+
"G": ("anomalous amplitude", ["F(+)","SIGF(+)", "F(-)", "SIGF(-)"], ["G", "L", "G", "L"]),
|
|
1430
|
+
"K": ("anomalous intensity", ["I(+)","SIGI(+)", "I(-)", "SIGI(-)"], ["K", "M", "K", "M"])}
|
|
1431
|
+
if col_types[labin[0]] not in labs_and_types:
|
|
1432
|
+
raise RuntimeError("MTZ column {} is neither amplitude nor intensity".format(labin[0]))
|
|
1433
|
+
if col_types[labin[0]] == "J": # may be unmerged data
|
|
1434
|
+
if (d_min, d_max).count(None) != 2:
|
|
1435
|
+
d_array = mtz.make_d_array()
|
|
1436
|
+
sel = ((0 if d_min is None else d_min) < d_array) & (d_array < (numpy.inf if d_max is None else d_max))
|
|
1437
|
+
else:
|
|
1438
|
+
sel = ...
|
|
1439
|
+
ints = gemmi.Intensities()
|
|
1440
|
+
ints.set_data(mtz.cell, sg_use, mtz.make_miller_array()[sel],
|
|
1441
|
+
mtz.array[sel,mtz.column_labels().index(labin[0])],
|
|
1442
|
+
mtz.array[sel,mtz.column_labels().index(labin[1])])
|
|
1443
|
+
dtype = ints.prepare_for_merging(gemmi.DataType.Mean) # do we want Anomalous?
|
|
1444
|
+
ints_bak = ints.clone() # for stats
|
|
1445
|
+
ints.merge_in_place(dtype)
|
|
1446
|
+
if (ints.nobs_array > 1).any():
|
|
1447
|
+
mtz = ints.prepare_merged_mtz(with_nobs=False)
|
|
1448
|
+
labin = mtz.column_labels()[3:]
|
|
1449
|
+
col_types = {x.label:x.type for x in mtz.columns}
|
|
1450
|
+
mult = ints.nobs_array.mean()
|
|
1451
|
+
logger.writeln(f"Input data were merged (multiplicity: {mult:.2f}). Overriding labin={','.join(labin)}")
|
|
1452
|
+
else:
|
|
1453
|
+
ints_bak = None
|
|
1454
|
+
else:
|
|
1455
|
+
ints_bak = None
|
|
1456
|
+
|
|
1457
|
+
name, newlabels, require_types = labs_and_types[col_types[labin[0]]]
|
|
1458
|
+
logger.writeln("Observation type: {}".format(name))
|
|
1459
|
+
if len(newlabels) < len(labin): newlabels.append("FREE")
|
|
1460
|
+
hkldata = utils.hkl.hkldata_from_mtz(mtz, labin, newlabels=newlabels, require_types=require_types)
|
|
1461
|
+
hkldata.sg = sg_use
|
|
1462
|
+
hkldata.mask_invalid_obs_values(newlabels)
|
|
1463
|
+
if newlabels[0] == "F(+)":
|
|
1464
|
+
hkldata.merge_anomalous(newlabels[:4], ["FP", "SIGFP"])
|
|
1465
|
+
newlabels = ["FP", "SIGFP"] + newlabels[4:]
|
|
1466
|
+
elif newlabels[0] == "I(+)":
|
|
1467
|
+
hkldata.merge_anomalous(newlabels[:4], ["I", "SIGI"])
|
|
1468
|
+
newlabels = ["I", "SIGI"] + newlabels[4:]
|
|
1469
|
+
|
|
1470
|
+
if hkldata.df.empty:
|
|
1471
|
+
raise RuntimeError("No data in hkl data")
|
|
1472
|
+
|
|
1473
|
+
if sts:
|
|
1474
|
+
for st in sts:
|
|
1475
|
+
if st[0].count_atom_sites() == 0:
|
|
1476
|
+
raise RuntimeError("No atom in the model")
|
|
1477
|
+
if not hkldata.cell.approx(sts[0].cell, 1e-3):
|
|
1478
|
+
logger.writeln("Warning: unit cell mismatch between model and reflection data")
|
|
1479
|
+
logger.writeln(" using unit cell from mtz")
|
|
1480
|
+
|
|
1481
|
+
for st in sts:
|
|
1482
|
+
st.cell = hkldata.cell # mtz cell is used in any case
|
|
1483
|
+
st.spacegroup_hm = sg_use.xhm()
|
|
1484
|
+
st.setup_cell_images()
|
|
1485
|
+
|
|
1486
|
+
if not keep_charges:
|
|
1487
|
+
utils.model.remove_charge(sts)
|
|
1488
|
+
|
|
1489
|
+
hkldata.switch_to_asu()
|
|
1490
|
+
hkldata.remove_systematic_absences()
|
|
1491
|
+
#hkldata.df = hkldata.df.astype({name: 'float64' for name in ["I","SIGI","FP","SIGFP"] if name in hkldata.df})
|
|
1492
|
+
d_min_max_data = hkldata.d_min_max(newlabels)
|
|
1493
|
+
if d_min is None and hkldata.d_min_max()[0] != d_min_max_data[0]:
|
|
1494
|
+
d_min = d_min_max_data[0]
|
|
1495
|
+
logger.writeln(f"Changing resolution to {d_min:.3f} A")
|
|
1496
|
+
if (d_min, d_max).count(None) != 2:
|
|
1497
|
+
hkldata = hkldata.copy(d_min=d_min, d_max=d_max)
|
|
1498
|
+
d_min_max_data = hkldata.d_min_max(newlabels)
|
|
1499
|
+
if hkldata.df.empty:
|
|
1500
|
+
raise RuntimeError("No data left in hkl data")
|
|
1501
|
+
|
|
1502
|
+
if hklin_free is not None:
|
|
1503
|
+
mtz2 = utils.fileio.read_mmhkl(hklin_free)
|
|
1504
|
+
for lab in (labin_free, labin_llweight):
|
|
1505
|
+
if lab and lab not in mtz2.column_labels():
|
|
1506
|
+
raise RuntimeError(f"specified label ({labin_free}) not found in {hklin_free}")
|
|
1507
|
+
if not labin_free:
|
|
1508
|
+
tmp = utils.hkl.mtz_find_free_columns(mtz2)
|
|
1509
|
+
if tmp:
|
|
1510
|
+
labin_free = tmp[0]
|
|
1511
|
+
elif not labin_llweight:
|
|
1512
|
+
raise RuntimeError(f"Test flag label not found in {hklin_free}")
|
|
1513
|
+
labs, newlabs = [], []
|
|
1514
|
+
for lab, newlab in ((labin_free, "FREE"), (labin_llweight, "llweight")):
|
|
1515
|
+
if lab:
|
|
1516
|
+
labs.append(lab)
|
|
1517
|
+
newlabs.append(newlab)
|
|
1518
|
+
tmp = utils.hkl.hkldata_from_mtz(mtz2, labs, newlabels=newlabs)
|
|
1519
|
+
tmp.sg = sg_use
|
|
1520
|
+
tmp.switch_to_asu()
|
|
1521
|
+
tmp.remove_systematic_absences()
|
|
1522
|
+
tmp = tmp.copy(d_min=d_min_max_data[0], d_max=d_min_max_data[1])
|
|
1523
|
+
hkldata.complete()
|
|
1524
|
+
tmp.complete()
|
|
1525
|
+
hkldata.merge(tmp.df[["H","K","L"] + newlabs])
|
|
1526
|
+
|
|
1527
|
+
hkldata.complete()
|
|
1528
|
+
hkldata.sort_by_resolution()
|
|
1529
|
+
hkldata.calc_epsilon()
|
|
1530
|
+
hkldata.calc_centric()
|
|
1531
|
+
|
|
1532
|
+
# fill dummy value for missing where observation is also missing
|
|
1533
|
+
for lab in ("FREE", "llweight"):
|
|
1534
|
+
if lab in hkldata.df:
|
|
1535
|
+
if (hkldata.df[lab].isna() & ~hkldata.df[newlabels[0]].isna()).any():
|
|
1536
|
+
raise RuntimeError(f"Missing {lab} reflection(s).")
|
|
1537
|
+
hkldata.df[lab] = hkldata.df[lab].fillna(0)
|
|
1538
|
+
|
|
1539
|
+
if "llweight" not in hkldata.df:
|
|
1540
|
+
hkldata.df["llweight"] = 1.
|
|
1541
|
+
|
|
1542
|
+
if "FREE" in hkldata.df and free is None:
|
|
1543
|
+
free = hkldata.guess_free_number(newlabels[0]) # also check NaN
|
|
1544
|
+
|
|
1545
|
+
if n_bins_ml is None:
|
|
1546
|
+
n_bins_ml, use = utils.hkl.decide_ml_binning(hkldata, data_label=newlabels[0],
|
|
1547
|
+
free_label="FREE", free=free,
|
|
1548
|
+
use=use, n_per_bin=n_per_mlbin,
|
|
1549
|
+
max_bins=max_mlbins)
|
|
1550
|
+
if n_bins_ml < 3 and use == "test":
|
|
1551
|
+
logger.writeln("Warning: Not enough reflections for ML parameters.")
|
|
1552
|
+
logger.writeln("Switching to use=work, i.e. use working reflections for ML estimation")
|
|
1553
|
+
use = "work"
|
|
1554
|
+
n_bins_ml, use = utils.hkl.decide_ml_binning(hkldata, data_label=newlabels[0],
|
|
1555
|
+
free_label="FREE", free=free,
|
|
1556
|
+
use=use, n_per_bin=n_per_mlbin,
|
|
1557
|
+
max_bins=max_mlbins)
|
|
1558
|
+
if n_bins_stat is None:
|
|
1559
|
+
sel = hkldata.df[newlabels[0]].notna()
|
|
1560
|
+
if "FREE" in hkldata.df:
|
|
1561
|
+
sel &= hkldata.df["FREE"] == free
|
|
1562
|
+
s_array = 1/hkldata.d_spacings()[sel]
|
|
1563
|
+
n_bins_stat = utils.hkl.decide_n_bins(10, s_array, min_bins=2, max_bins=max_statbins)
|
|
1564
|
+
|
|
1565
|
+
hkldata.setup_binning(n_bins=n_bins_ml, name="ml")
|
|
1566
|
+
hkldata.setup_binning(n_bins=n_bins_stat, name="stat")
|
|
1567
|
+
hkldata.setup_centric_and_selections("ml", data_lab=newlabels[0], free=free)
|
|
1568
|
+
hkldata.setup_centric_and_selections("stat", data_lab=newlabels[0], free=free)
|
|
1569
|
+
fc_labs = ["FC{}".format(i) for i, _ in enumerate(sts)]
|
|
1570
|
+
|
|
1571
|
+
# Create a centric selection table for faster look up
|
|
1572
|
+
stats = hkldata.binned_df["stat"].copy()
|
|
1573
|
+
stats["n_all"] = 0
|
|
1574
|
+
stats["n_obs"] = 0
|
|
1575
|
+
stats[newlabels[0]] = numpy.nan
|
|
1576
|
+
snr = "I/sigma" if newlabels[0] == "I" else "F/sigma"
|
|
1577
|
+
stats[snr] = numpy.nan
|
|
1578
|
+
if newlabels[0] == "I":
|
|
1579
|
+
stats["Mn(I)/Std(I)"] = numpy.nan
|
|
1580
|
+
if "FREE" in hkldata.df:
|
|
1581
|
+
stats["n_work"] = 0
|
|
1582
|
+
stats["n_test"] = 0
|
|
1583
|
+
|
|
1584
|
+
for i_bin, idxes in hkldata.binned("stat"):
|
|
1585
|
+
n_work, n_test = 0, 0
|
|
1586
|
+
for c, work, test in hkldata.centric_and_selections["stat"][i_bin]:
|
|
1587
|
+
n_work += numpy.sum(numpy.isfinite(hkldata.df.loc[work, newlabels[0]]))
|
|
1588
|
+
n_test += numpy.sum(numpy.isfinite(hkldata.df.loc[test, newlabels[0]]))
|
|
1589
|
+
n_obs = n_work + n_test
|
|
1590
|
+
stats.loc[i_bin, "n_obs"] = n_obs
|
|
1591
|
+
stats.loc[i_bin, "n_all"] = len(idxes)
|
|
1592
|
+
obs = hkldata.df[newlabels[0]].to_numpy()[idxes]
|
|
1593
|
+
sigma = hkldata.df[newlabels[1]].to_numpy()[idxes]
|
|
1594
|
+
if n_obs > 0:
|
|
1595
|
+
stats.loc[i_bin, snr] = numpy.nanmean(obs / sigma)
|
|
1596
|
+
mean_obs = numpy.nanmean(obs)
|
|
1597
|
+
stats.loc[i_bin, newlabels[0]] = mean_obs
|
|
1598
|
+
if newlabels[0] == "I":
|
|
1599
|
+
stats.loc[i_bin, "Mn(I)/Std(I)"] = mean_obs / numpy.nanstd(obs)
|
|
1600
|
+
if "FREE" in hkldata.df:
|
|
1601
|
+
stats.loc[i_bin, "n_work"] = n_work
|
|
1602
|
+
stats.loc[i_bin, "n_test"] = n_test
|
|
1603
|
+
|
|
1604
|
+
stats["completeness"] = stats["n_obs"] / stats["n_all"] * 100
|
|
1605
|
+
logger.writeln("Data completeness: {:.2%}".format(stats["n_obs"].sum() / stats["n_all"].sum()))
|
|
1606
|
+
if ints_bak is not None: # TODO ensure the same binning (use hkldata's binning)
|
|
1607
|
+
binner = gemmi.Binner()
|
|
1608
|
+
for name, n_bins in (("stat", n_bins_stat), ("ml", n_bins_ml)):
|
|
1609
|
+
binner.setup(n_bins, gemmi.Binner.Method.Dstar2, ints_bak)
|
|
1610
|
+
bin_stats = ints_bak.calculate_merging_stats(binner, use_weights="X")
|
|
1611
|
+
cc12 = numpy.array([stats.cc_half() for stats in bin_stats])
|
|
1612
|
+
if name == "stat": stats["CC1/2"] = cc12
|
|
1613
|
+
hkldata.binned_df[name]["CC*"] = numpy.sqrt(2 * cc12 / (1 + cc12))
|
|
1614
|
+
|
|
1615
|
+
logger.writeln(stats.to_string())
|
|
1616
|
+
return hkldata, sts, fc_labs, free, use
|
|
1617
|
+
# process_input()
|
|
1618
|
+
|
|
1619
|
+
def update_fc(st_list, fc_labs, d_min, monlib, source, mott_bethe, hkldata=None, twin_data=None, addends=None, addends2=None):
|
|
1620
|
+
#assert (hkldata, twin_data).count(None) == 1
|
|
1621
|
+
# hkldata not updated when twin_data is given
|
|
1622
|
+
if addends2:
|
|
1623
|
+
hkldata.df["FC''"] = 0.
|
|
1624
|
+
for i, st in enumerate(st_list):
|
|
1625
|
+
if st.ncs:
|
|
1626
|
+
st = st.clone()
|
|
1627
|
+
st.expand_ncs(gemmi.HowToNameCopiedChain.Dup, merge_dist=0)
|
|
1628
|
+
if twin_data:
|
|
1629
|
+
hkl = twin_data.asu
|
|
1630
|
+
else:
|
|
1631
|
+
hkl = hkldata.miller_array()
|
|
1632
|
+
fc = utils.model.calc_fc_fft(st, d_min - 1e-6,
|
|
1633
|
+
monlib=monlib,
|
|
1634
|
+
source=source,
|
|
1635
|
+
mott_bethe=mott_bethe,
|
|
1636
|
+
miller_array=hkl,
|
|
1637
|
+
addends=addends)
|
|
1638
|
+
if addends2:
|
|
1639
|
+
fcpp = utils.model.calc_fcpp_fft(st, d_min - 1e-6, addends2, miller_array=hkl)
|
|
1640
|
+
hkldata.df["FC''"] += fcpp
|
|
1641
|
+
if twin_data:
|
|
1642
|
+
twin_data.f_calc[:,i] = fc
|
|
1643
|
+
else:
|
|
1644
|
+
hkldata.df[fc_labs[i]] = fc
|
|
1645
|
+
if not twin_data:
|
|
1646
|
+
hkldata.df["FC"] = hkldata.df[fc_labs].sum(axis=1)
|
|
1647
|
+
# update_fc()
|
|
1648
|
+
|
|
1649
|
+
def calc_Fmask(st, d_min, miller_array, use_non_binary_mask=False):
|
|
1650
|
+
logger.writeln("Calculating solvent contribution..")
|
|
1651
|
+
grid = gemmi.FloatGrid()
|
|
1652
|
+
grid.setup_from(st, spacing=min(0.6, (d_min-1e-6) / 2 - 1e-9))
|
|
1653
|
+
masker = gemmi.SolventMasker(gemmi.AtomicRadiiSet.Refmac)
|
|
1654
|
+
if use_non_binary_mask:
|
|
1655
|
+
logger.writeln("Using non-binary solvent mask")
|
|
1656
|
+
masker.use_atom_occupancy = True
|
|
1657
|
+
masker.island_min_volume = 0
|
|
1658
|
+
masker.put_mask_on_float_grid(grid, st[0])
|
|
1659
|
+
#utils.maps.write_ccp4_map("solmask.ccp4", grid)
|
|
1660
|
+
fmask_gr = gemmi.transform_map_to_f_phi(grid)
|
|
1661
|
+
# TODO remove this with gemmi 0.7.5
|
|
1662
|
+
if not miller_array.flags.writeable:
|
|
1663
|
+
miller_array = miller_array.copy()
|
|
1664
|
+
Fmask = fmask_gr.get_value_by_hkl(miller_array)
|
|
1665
|
+
return Fmask
|
|
1666
|
+
# calc_Fmask()
|
|
1667
|
+
|
|
1668
|
+
def bulk_solvent_and_lsq_scales(hkldata, sts, fc_labs, use_solvent=True, use_int=False, mask=None, func_type="log_cosh", twin_data=None):
|
|
1669
|
+
# fc_labs must have solvent part at the end
|
|
1670
|
+
miller_array = twin_data.asu if twin_data else hkldata.miller_array()
|
|
1671
|
+
d_min = twin_data.d_min(sts[0].cell) if twin_data else hkldata.d_min_max()[0]
|
|
1672
|
+
if use_solvent:
|
|
1673
|
+
if mask is None:
|
|
1674
|
+
Fmask = calc_Fmask(merge_models(sts), d_min, miller_array)
|
|
1675
|
+
else:
|
|
1676
|
+
fmask_gr = gemmi.transform_map_to_f_phi(mask)
|
|
1677
|
+
# TODO remove this with gemmi 0.7.5
|
|
1678
|
+
if not miller_array.flags.writeable:
|
|
1679
|
+
miller_array = miller_array.copy()
|
|
1680
|
+
Fmask = fmask_gr.get_value_by_hkl(miller_array)
|
|
1681
|
+
if twin_data:
|
|
1682
|
+
fc_sum = twin_data.f_calc[:,:-1].sum(axis=1)
|
|
1683
|
+
else:
|
|
1684
|
+
fc_sum = hkldata.df[fc_labs[:-1]].sum(axis=1).to_numpy()
|
|
1685
|
+
fc_list = [fc_sum, Fmask]
|
|
1686
|
+
else:
|
|
1687
|
+
if twin_data:
|
|
1688
|
+
fc_list = [twin_data.f_calc.sum(axis=1)]
|
|
1689
|
+
else:
|
|
1690
|
+
fc_list = [hkldata.df[fc_labs].sum(axis=1).to_numpy()]
|
|
1691
|
+
|
|
1692
|
+
scaling = LsqScale(func_type=func_type)
|
|
1693
|
+
scaling.set_data(hkldata, fc_list, use_int, sigma_cutoff=0, twin_data=twin_data)
|
|
1694
|
+
scaling.scale()
|
|
1695
|
+
b_iso = scaling.b_iso
|
|
1696
|
+
k_aniso = hkldata.debye_waller_factors(b_cart=scaling.b_aniso)
|
|
1697
|
+
hkldata.df["k_aniso"] = k_aniso # we need it later when calculating stats
|
|
1698
|
+
|
|
1699
|
+
if use_solvent:
|
|
1700
|
+
if twin_data:
|
|
1701
|
+
s2 = numpy.asarray(twin_data.s2_array)
|
|
1702
|
+
else:
|
|
1703
|
+
s2 = 1. / hkldata.d_spacings().to_numpy()**2
|
|
1704
|
+
Fbulk = Fmask * scaling.get_solvent_scale(scaling.k_sol, scaling.b_sol, s2)
|
|
1705
|
+
if twin_data:
|
|
1706
|
+
twin_data.f_calc[:,-1] = Fbulk
|
|
1707
|
+
else:
|
|
1708
|
+
hkldata.df[fc_labs[-1]] = Fbulk
|
|
1709
|
+
|
|
1710
|
+
# Apply scales
|
|
1711
|
+
if use_int:
|
|
1712
|
+
# in intensity case, we try to refine b_aniso with ML. perhaps we should do it in amplitude case also
|
|
1713
|
+
o_labs = ["I", "SIGI", "I(+)","SIGI(+)", "I(-)", "SIGI(-)"]
|
|
1714
|
+
hkldata.df[hkldata.df.columns.intersection(o_labs)] /= scaling.k_overall**2
|
|
1715
|
+
else:
|
|
1716
|
+
o_labs = ["FP", "SIGFP", "F(+)","SIGF(+)", "F(-)", "SIGF(-)"]
|
|
1717
|
+
hkldata.df[hkldata.df.columns.intersection(o_labs)] /= scaling.k_overall
|
|
1718
|
+
if twin_data:
|
|
1719
|
+
twin_data.f_calc[:] *= twin_data.debye_waller_factors(b_iso=b_iso)[:,None]
|
|
1720
|
+
else:
|
|
1721
|
+
k_iso = hkldata.debye_waller_factors(b_iso=b_iso)
|
|
1722
|
+
for lab in fc_labs + ["FC''"]:
|
|
1723
|
+
if lab in hkldata.df:
|
|
1724
|
+
hkldata.df[lab] *= k_iso
|
|
1725
|
+
# total Fc
|
|
1726
|
+
hkldata.df["FC"] = hkldata.df[fc_labs].sum(axis=1)
|
|
1727
|
+
return scaling
|
|
1728
|
+
# bulk_solvent_and_lsq_scales()
|
|
1729
|
+
|
|
1730
|
+
def calculate_maps(hkldata, b_aniso, fc_labs, D_labs, log_out, use="all"):
|
|
1731
|
+
nmodels = len(fc_labs)
|
|
1732
|
+
hkldata.df["FWT"] = 0j * numpy.nan
|
|
1733
|
+
hkldata.df["DELFWT"] = 0j * numpy.nan
|
|
1734
|
+
hkldata.df["FOM"] = numpy.nan
|
|
1735
|
+
hkldata.df["X"] = numpy.nan # for FOM
|
|
1736
|
+
has_ano = "F(+)" in hkldata.df and "F(-)" in hkldata.df
|
|
1737
|
+
stats_data = []
|
|
1738
|
+
k_ani = hkldata.debye_waller_factors(b_cart=b_aniso)
|
|
1739
|
+
Ds = numpy.vstack([hkldata.df[lab].to_numpy() for lab in D_labs]).T
|
|
1740
|
+
Fcs = numpy.vstack([hkldata.df[lab].to_numpy() for lab in fc_labs]).T
|
|
1741
|
+
DFc = (Ds * Fcs).sum(axis=1)
|
|
1742
|
+
hkldata.df["DFC"] = DFc
|
|
1743
|
+
if has_ano:
|
|
1744
|
+
hkldata.df["FAN"] = 0j * numpy.nan
|
|
1745
|
+
if "FC''" in hkldata.df:
|
|
1746
|
+
fc_dano = calc_fc_dano(hkldata, D_labs, DFc)
|
|
1747
|
+
hkldata.df["DELFAN"] = 0j * numpy.nan
|
|
1748
|
+
for i_bin, idxes in hkldata.binned("ml"):
|
|
1749
|
+
bin_d_min = hkldata.binned_df["ml"].d_min[i_bin]
|
|
1750
|
+
bin_d_max = hkldata.binned_df["ml"].d_max[i_bin]
|
|
1751
|
+
# 0: acentric 1: centric
|
|
1752
|
+
mean_fom = [numpy.nan, numpy.nan]
|
|
1753
|
+
nrefs = [0, 0]
|
|
1754
|
+
for c, work, test in hkldata.centric_and_selections["ml"][i_bin]:
|
|
1755
|
+
cidxes = numpy.concatenate([work, test])
|
|
1756
|
+
S = hkldata.df["S"].to_numpy()[cidxes]
|
|
1757
|
+
expip = numpy.exp(numpy.angle(DFc[cidxes])*1j)
|
|
1758
|
+
Fo = hkldata.df.FP.to_numpy()[cidxes] / k_ani[cidxes]
|
|
1759
|
+
SigFo = hkldata.df.SIGFP.to_numpy()[cidxes] / k_ani[cidxes]
|
|
1760
|
+
epsilon = hkldata.df.epsilon.to_numpy()[cidxes]
|
|
1761
|
+
nrefs[c] = numpy.sum(numpy.isfinite(Fo))
|
|
1762
|
+
DFc_abs = numpy.abs(DFc[cidxes])
|
|
1763
|
+
if c == 0:
|
|
1764
|
+
Sigma = 2 * SigFo**2 + epsilon * S
|
|
1765
|
+
X = 2 * Fo * DFc_abs / Sigma
|
|
1766
|
+
m = gemmi.bessel_i1_over_i0(X)
|
|
1767
|
+
else:
|
|
1768
|
+
Sigma = SigFo**2 + epsilon * S
|
|
1769
|
+
X = Fo * DFc_abs / Sigma
|
|
1770
|
+
m = numpy.tanh(X)
|
|
1771
|
+
hkldata.df.loc[cidxes, "FWT"] = (2 * m * Fo - DFc_abs) * expip
|
|
1772
|
+
hkldata.df.loc[cidxes, "DELFWT"] = (m * Fo - DFc_abs) * expip
|
|
1773
|
+
hkldata.df.loc[cidxes, "FOM"] = m
|
|
1774
|
+
hkldata.df.loc[cidxes, "X"] = X
|
|
1775
|
+
#hkldata.df.loc[cidxes, "LL"] = ext.ll_amp(hkldata.df.FP.to_numpy()[cidxes],
|
|
1776
|
+
# hkldata.df.SIGFP.to_numpy()[cidxes],
|
|
1777
|
+
# k_ani[cidxes], S * epsilon,
|
|
1778
|
+
# DFc_abs, numpy.full(cidxes.shape, c + 1),
|
|
1779
|
+
# hkldata.df.llweight.to_numpy()[cidxes])
|
|
1780
|
+
if has_ano:
|
|
1781
|
+
Fo_dano = (hkldata.df["F(+)"].to_numpy()[cidxes] - hkldata.df["F(-)"].to_numpy()[cidxes]) / k_ani[cidxes]
|
|
1782
|
+
hkldata.df.loc[cidxes, "FAN"] = m * Fo_dano * expip / 2j
|
|
1783
|
+
if has_ano and "FC''" in hkldata.df:
|
|
1784
|
+
# for centrics Fo_dano should be NaN; no need to mask fc_dano
|
|
1785
|
+
hkldata.df.loc[cidxes, "DELFAN"] = (m * Fo_dano - fc_dano[cidxes]) * expip / 2j
|
|
1786
|
+
if nrefs[c] > 0: mean_fom[c] = numpy.nanmean(m)
|
|
1787
|
+
|
|
1788
|
+
# remove reflections that should be hidden
|
|
1789
|
+
if use != "all":
|
|
1790
|
+
# usually use == "work"
|
|
1791
|
+
tohide = test if use == "work" else work
|
|
1792
|
+
hkldata.df.loc[tohide, "FWT"] = 0j * numpy.nan
|
|
1793
|
+
hkldata.df.loc[tohide, "DELFWT"] = 0j * numpy.nan
|
|
1794
|
+
fill_sel = numpy.isnan(hkldata.df["FWT"][cidxes].to_numpy())
|
|
1795
|
+
hkldata.df.loc[cidxes[fill_sel], "FWT"] = DFc[cidxes][fill_sel]
|
|
1796
|
+
|
|
1797
|
+
Fc = hkldata.df.FC.to_numpy()[idxes] * k_ani[idxes]
|
|
1798
|
+
Fo = hkldata.df.FP.to_numpy()[idxes]
|
|
1799
|
+
mean_DFc2 = numpy.nanmean(numpy.abs((Ds[idxes,:] * Fcs[idxes,:]).sum(axis=1) * k_ani[idxes])**2)
|
|
1800
|
+
with numpy.errstate(divide="ignore"):
|
|
1801
|
+
mean_log_DFcs = numpy.log(numpy.nanmean(numpy.abs(Ds[idxes,:] * Fcs[idxes,:] * k_ani[idxes,None]), axis=0)).tolist()
|
|
1802
|
+
mean_Ds = numpy.nanmean(Ds[idxes,:], axis=0).tolist()
|
|
1803
|
+
if sum(nrefs) > 0:
|
|
1804
|
+
r = numpy.nansum(numpy.abs(numpy.abs(Fc)-Fo)) / numpy.nansum(Fo)
|
|
1805
|
+
cc = utils.hkl.correlation(Fo, numpy.abs(Fc))
|
|
1806
|
+
mean_Fo2 = numpy.nanmean(numpy.abs(Fo)**2)
|
|
1807
|
+
else:
|
|
1808
|
+
r, cc, mean_Fo2 = numpy.nan, numpy.nan, numpy.nan
|
|
1809
|
+
stats_data.append([i_bin, nrefs[0], nrefs[1], bin_d_max, bin_d_min,
|
|
1810
|
+
numpy.log(mean_Fo2),
|
|
1811
|
+
numpy.log(numpy.nanmean(numpy.abs(Fc)**2)),
|
|
1812
|
+
numpy.log(mean_DFc2),
|
|
1813
|
+
numpy.log(numpy.mean(hkldata.df["S"].to_numpy()[idxes])),
|
|
1814
|
+
mean_fom[0], mean_fom[1], r, cc] + mean_Ds + mean_log_DFcs)
|
|
1815
|
+
|
|
1816
|
+
DFc_labs = ["log(Mn(|{}{}|))".format(dl,fl) for dl,fl in zip(D_labs, fc_labs)]
|
|
1817
|
+
cols = ["bin", "n_a", "n_c", "d_max", "d_min",
|
|
1818
|
+
"log(Mn(|Fo|^2))", "log(Mn(|Fc|^2))", "log(Mn(|DFc|^2))",
|
|
1819
|
+
"log(Sigma)", "FOM_a", "FOM_c", "R", "CC(|Fo|,|Fc|)"] + D_labs + DFc_labs
|
|
1820
|
+
stats = pandas.DataFrame(stats_data, columns=cols)
|
|
1821
|
+
title_labs = [["log(Mn(|F|^2)) and variances", ["log(Mn(|Fo|^2))", "log(Mn(|Fc|^2))", "log(Mn(|DFc|^2))", "log(Sigma)"]],
|
|
1822
|
+
["FOM", ["FOM_a", "FOM_c"]],
|
|
1823
|
+
["D", D_labs],
|
|
1824
|
+
["DFc", DFc_labs],
|
|
1825
|
+
["R-factor", ["R"]],
|
|
1826
|
+
["CC", ["CC(|Fo|,|Fc|)"]],
|
|
1827
|
+
["number of reflections", ["n_a", "n_c"]]]
|
|
1828
|
+
with open(log_out, "w") as ofs:
|
|
1829
|
+
ofs.write(utils.make_loggraph_str(stats, main_title="Statistics",
|
|
1830
|
+
title_labs=title_labs,
|
|
1831
|
+
s2=1/stats["d_min"]**2))
|
|
1832
|
+
logger.writeln("output log: {}".format(log_out))
|
|
1833
|
+
# calculate_maps()
|
|
1834
|
+
|
|
1835
|
+
def main(args):
|
|
1836
|
+
if args.wavelength is not None and args.source != "xray":
|
|
1837
|
+
raise SystemExit("Error: Wavelength is only available for X-ray source")
|
|
1838
|
+
try:
|
|
1839
|
+
hkldata, sts, fc_labs, free, args.use = process_input(
|
|
1840
|
+
hklin=args.hklin,
|
|
1841
|
+
labin=args.labin.split(",") if args.labin else None,
|
|
1842
|
+
n_bins_ml=args.nbins_ml,
|
|
1843
|
+
n_bins_stat=args.nbins,
|
|
1844
|
+
free=args.free,
|
|
1845
|
+
xyzins=sum(args.model, []),
|
|
1846
|
+
d_max=args.d_max,
|
|
1847
|
+
d_min=args.d_min,
|
|
1848
|
+
use=args.use,
|
|
1849
|
+
max_mlbins=30,
|
|
1850
|
+
keep_charges=args.keep_charges,
|
|
1851
|
+
space_group=args.spacegroup,
|
|
1852
|
+
hklin_free=args.hklin_free,
|
|
1853
|
+
labin_free=args.labin_free)
|
|
1854
|
+
except RuntimeError as e:
|
|
1855
|
+
raise SystemExit("Error: {}".format(e))
|
|
1856
|
+
|
|
1857
|
+
addends, addends2 = utils.model.check_atomsf(sts, args.source, mott_bethe=(args.source=="electron"), wavelength=args.wavelength)
|
|
1858
|
+
for st in sts:
|
|
1859
|
+
utils.model.find_special_positions(st, fix_occ=True, fix_pos=False, fix_adp=False)
|
|
1860
|
+
|
|
1861
|
+
if args.twin:
|
|
1862
|
+
twin_data, _ = find_twin_domains_from_data(hkldata)
|
|
1863
|
+
else:
|
|
1864
|
+
twin_data = None
|
|
1865
|
+
if twin_data:
|
|
1866
|
+
twin_data.setup_f_calc(len(sts) + (0 if args.no_solvent else 1))
|
|
1867
|
+
|
|
1868
|
+
subtract_common_aniso_from_model(sts)
|
|
1869
|
+
update_fc(sts, fc_labs, d_min=hkldata.d_min_max()[0], monlib=None,
|
|
1870
|
+
source=args.source, mott_bethe=(args.source=="electron"),
|
|
1871
|
+
hkldata=hkldata, twin_data=twin_data, addends=addends, addends2=addends2)
|
|
1872
|
+
is_int = "I" in hkldata.df
|
|
1873
|
+
|
|
1874
|
+
if args.mask:
|
|
1875
|
+
mask = utils.fileio.read_ccp4_map(args.mask)[0]
|
|
1876
|
+
else:
|
|
1877
|
+
mask = None
|
|
1878
|
+
|
|
1879
|
+
# Overall scaling & bulk solvent
|
|
1880
|
+
# FP/SIGFP will be scaled. Total FC will be added.
|
|
1881
|
+
if not args.no_solvent:
|
|
1882
|
+
fc_labs.append("Fbulk")
|
|
1883
|
+
lsq = bulk_solvent_and_lsq_scales(hkldata, sts, fc_labs, use_solvent=not args.no_solvent,
|
|
1884
|
+
use_int=is_int, mask=mask, twin_data=twin_data)
|
|
1885
|
+
b_aniso = lsq.b_aniso
|
|
1886
|
+
# stats
|
|
1887
|
+
stats, overall = calc_r_and_cc(hkldata, twin_data)
|
|
1888
|
+
if is_int:
|
|
1889
|
+
logger.writeln("R1 is calculated for reflections with I/sigma>2.")
|
|
1890
|
+
|
|
1891
|
+
if twin_data:
|
|
1892
|
+
estimate_twin_fractions_from_model(twin_data, hkldata)
|
|
1893
|
+
#del hkldata.df["FC"]
|
|
1894
|
+
#del hkldata.df["Fbulk"]
|
|
1895
|
+
# Need to redo scaling?
|
|
1896
|
+
lsq = bulk_solvent_and_lsq_scales(hkldata, sts, fc_labs, use_solvent=not args.no_solvent,
|
|
1897
|
+
use_int=is_int, mask=mask, twin_data=twin_data)
|
|
1898
|
+
b_aniso = lsq.b_aniso
|
|
1899
|
+
stats, overall = calc_r_and_cc(hkldata, twin_data)
|
|
1900
|
+
for lab in "R", "CC":
|
|
1901
|
+
logger.writeln(" ".join("{} = {:.4f}".format(x, overall[x]) for x in overall if x.startswith(lab)))
|
|
1902
|
+
logger.writeln(stats.to_string() + "\n")
|
|
1903
|
+
|
|
1904
|
+
# Estimate ML parameters
|
|
1905
|
+
D_labs = ["D{}".format(i) for i in range(len(fc_labs))]
|
|
1906
|
+
|
|
1907
|
+
if args.use_cc:
|
|
1908
|
+
assert not is_int
|
|
1909
|
+
assert not args.twin
|
|
1910
|
+
logger.writeln("Estimating sigma-A parameters from CC..")
|
|
1911
|
+
determine_mlf_params_from_cc(hkldata, fc_labs, D_labs, args.use)
|
|
1912
|
+
else:
|
|
1913
|
+
b_aniso = determine_ml_params(hkldata, is_int, fc_labs, D_labs, b_aniso, args.D_trans, args.S_trans, args.use,
|
|
1914
|
+
twin_data=twin_data)
|
|
1915
|
+
if twin_data and args.twin_mlalpha:
|
|
1916
|
+
mlopt_twin_fractions(hkldata, twin_data, b_aniso)
|
|
1917
|
+
|
|
1918
|
+
use = {"all": "all", "work": "work", "test": "work"}[args.use]
|
|
1919
|
+
if twin_data:
|
|
1920
|
+
# replace hkldata
|
|
1921
|
+
hkldata = calculate_maps_twin(hkldata, b_aniso, fc_labs, D_labs, twin_data, use)
|
|
1922
|
+
elif is_int:
|
|
1923
|
+
calculate_maps_int(hkldata, b_aniso, fc_labs, D_labs, use)
|
|
1924
|
+
else:
|
|
1925
|
+
log_out = "{}.log".format(args.output_prefix)
|
|
1926
|
+
calculate_maps(hkldata, b_aniso, fc_labs, D_labs, log_out, use)
|
|
1927
|
+
|
|
1928
|
+
# Write mtz file
|
|
1929
|
+
if twin_data:
|
|
1930
|
+
labs = ["F_est", "F_exp"]
|
|
1931
|
+
elif is_int:
|
|
1932
|
+
labs = ["I", "SIGI", "F_est"]
|
|
1933
|
+
else:
|
|
1934
|
+
labs = ["FP", "SIGFP"]
|
|
1935
|
+
labs.extend(["FOM", "FWT", "DELFWT", "FC", "DFC"])
|
|
1936
|
+
if "FAN" in hkldata.df:
|
|
1937
|
+
labs.append("FAN")
|
|
1938
|
+
if "DELFAN" in hkldata.df:
|
|
1939
|
+
labs.append("DELFAN")
|
|
1940
|
+
if not args.no_solvent:
|
|
1941
|
+
labs.append("Fbulk")
|
|
1942
|
+
if "FREE" in hkldata.df:
|
|
1943
|
+
labs.append("FREE")
|
|
1944
|
+
if "F_true_est" in hkldata.df:
|
|
1945
|
+
labs.append("F_true_est")
|
|
1946
|
+
labs += D_labs + ["S"]
|
|
1947
|
+
mtz_out = args.output_prefix+".mtz"
|
|
1948
|
+
hkldata.write_mtz(mtz_out, labs=labs, types={"FOM": "W", "FP":"F", "SIGFP":"Q", "F_est": "F", "F_exp": "F"})
|
|
1949
|
+
return hkldata
|
|
1950
|
+
# main()
|
|
1951
|
+
if __name__ == "__main__":
|
|
1952
|
+
import sys
|
|
1953
|
+
args = parse_args(sys.argv[1:])
|
|
1954
|
+
main(args)
|