servalcat 0.4.72__cp312-cp312-macosx_11_0_arm64.whl → 0.4.88__cp312-cp312-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of servalcat might be problematic. Click here for more details.
- servalcat/__init__.py +2 -2
- servalcat/ext.cpython-312-darwin.so +0 -0
- servalcat/refine/refine.py +28 -26
- servalcat/refine/refine_geom.py +8 -2
- servalcat/refine/refine_spa.py +21 -12
- servalcat/refine/refine_xtal.py +27 -8
- servalcat/refine/spa.py +3 -0
- servalcat/refine/xtal.py +142 -96
- servalcat/refmac/exte.py +7 -5
- servalcat/refmac/refmac_keywords.py +11 -9
- servalcat/refmac/refmac_wrapper.py +89 -54
- servalcat/spa/fofc.py +11 -0
- servalcat/spa/fsc.py +3 -1
- servalcat/spa/run_refmac.py +11 -1
- servalcat/utils/fileio.py +5 -2
- servalcat/utils/hkl.py +20 -8
- servalcat/utils/model.py +13 -0
- servalcat/utils/refmac.py +19 -0
- servalcat/utils/restraints.py +19 -9
- servalcat/xtal/french_wilson.py +34 -28
- servalcat/xtal/sigmaa.py +338 -130
- servalcat/xtal/twin.py +115 -0
- {servalcat-0.4.72.dist-info → servalcat-0.4.88.dist-info}/METADATA +3 -3
- servalcat-0.4.88.dist-info/RECORD +45 -0
- {servalcat-0.4.72.dist-info → servalcat-0.4.88.dist-info}/WHEEL +1 -1
- servalcat-0.4.72.dist-info/RECORD +0 -44
- {servalcat-0.4.72.dist-info → servalcat-0.4.88.dist-info}/entry_points.txt +0 -0
- {servalcat-0.4.72.dist-info → servalcat-0.4.88.dist-info}/licenses/LICENSE +0 -0
servalcat/xtal/sigmaa.py
CHANGED
|
@@ -17,6 +17,7 @@ import scipy.optimize
|
|
|
17
17
|
from servalcat.utils import logger
|
|
18
18
|
from servalcat import utils
|
|
19
19
|
from servalcat import ext
|
|
20
|
+
from servalcat.xtal.twin import find_twin_domains_from_data, estimate_twin_fractions_from_model
|
|
20
21
|
|
|
21
22
|
"""
|
|
22
23
|
DFc = sum_j D_j F_c,j
|
|
@@ -51,6 +52,7 @@ def add_arguments(parser):
|
|
|
51
52
|
help="Use CC(|F1|,|F2|) to CC(F1,F2) conversion to derive D and S")
|
|
52
53
|
parser.add_argument('--use', choices=["all", "work", "test"], default="all",
|
|
53
54
|
help="Which reflections to be used for the parameter estimate.")
|
|
55
|
+
parser.add_argument('--twin', action="store_true", help="Turn on twin refinement")
|
|
54
56
|
parser.add_argument('--mask',
|
|
55
57
|
help="A solvent mask (by default calculated from the coordinates)")
|
|
56
58
|
parser.add_argument('--keep_charges', action='store_true',
|
|
@@ -71,22 +73,28 @@ def nanaverage(cc, w):
|
|
|
71
73
|
return numpy.nan
|
|
72
74
|
return numpy.average(cc[sel], weights=w[sel])
|
|
73
75
|
|
|
74
|
-
def calc_r_and_cc(hkldata, centric_and_selections):
|
|
76
|
+
def calc_r_and_cc(hkldata, centric_and_selections, twin_data=None):
|
|
75
77
|
has_int = "I" in hkldata.df
|
|
76
78
|
has_free = "FREE" in hkldata.df
|
|
77
79
|
stats = hkldata.binned_df.copy()
|
|
78
80
|
stats["n_obs"] = 0
|
|
79
81
|
if has_free:
|
|
80
82
|
stats[["n_work", "n_free"]] = 0
|
|
81
|
-
rlab = "
|
|
83
|
+
rlab = "R1" if has_int else "R"
|
|
82
84
|
cclab = "CCI" if has_int else "CCF"
|
|
83
|
-
|
|
85
|
+
if twin_data:
|
|
86
|
+
Fc = numpy.sqrt(twin_data.i_calc_twin())
|
|
87
|
+
else:
|
|
88
|
+
Fc = numpy.abs(hkldata.df.FC * hkldata.df.k_aniso)
|
|
84
89
|
if has_int:
|
|
85
90
|
obs = hkldata.df.I
|
|
91
|
+
obs_sqrt = numpy.sqrt(numpy.maximum(0, hkldata.df.I))
|
|
92
|
+
obs_sqrt[hkldata.df.I/hkldata.df.SIGI < 2] = numpy.nan # SHELX equivalent
|
|
86
93
|
calc = Fc**2
|
|
94
|
+
calc_sqrt = Fc
|
|
87
95
|
else:
|
|
88
|
-
obs = hkldata.df.FP
|
|
89
|
-
calc = Fc
|
|
96
|
+
obs = obs_sqrt = hkldata.df.FP
|
|
97
|
+
calc = calc_sqrt = Fc
|
|
90
98
|
if has_free:
|
|
91
99
|
for lab in (cclab, rlab):
|
|
92
100
|
for suf in ("work", "free"):
|
|
@@ -102,10 +110,10 @@ def calc_r_and_cc(hkldata, centric_and_selections):
|
|
|
102
110
|
idxes2 = numpy.concatenate([sel[j] for sel in centric_and_selections[i_bin]])
|
|
103
111
|
stats.loc[i_bin, "n_"+suf] = numpy.sum(numpy.isfinite(obs[idxes2]))
|
|
104
112
|
stats.loc[i_bin, cclab+suf] = utils.hkl.correlation(obs[idxes2], calc[idxes2])
|
|
105
|
-
stats.loc[i_bin, rlab+suf] = utils.hkl.r_factor(
|
|
113
|
+
stats.loc[i_bin, rlab+suf] = utils.hkl.r_factor(obs_sqrt[idxes2], calc_sqrt[idxes2])
|
|
106
114
|
else:
|
|
107
115
|
stats.loc[i_bin, cclab] = utils.hkl.correlation(obs[idxes], calc[idxes])
|
|
108
|
-
stats.loc[i_bin, rlab] = utils.hkl.r_factor(
|
|
116
|
+
stats.loc[i_bin, rlab] = utils.hkl.r_factor(obs_sqrt[idxes], calc_sqrt[idxes])
|
|
109
117
|
|
|
110
118
|
# Overall
|
|
111
119
|
ret = {}
|
|
@@ -114,7 +122,7 @@ def calc_r_and_cc(hkldata, centric_and_selections):
|
|
|
114
122
|
ret[cclab+suf+"avg"] = nanaverage(stats[cclab+suf], stats["n_"+suf])
|
|
115
123
|
for j, suf in ((1, "work"), (2, "free")):
|
|
116
124
|
idxes = numpy.concatenate([sel[j] for i_bin, _ in hkldata.binned() for sel in centric_and_selections[i_bin]])
|
|
117
|
-
ret[rlab+suf] = utils.hkl.r_factor(
|
|
125
|
+
ret[rlab+suf] = utils.hkl.r_factor(obs_sqrt[idxes], calc_sqrt[idxes])
|
|
118
126
|
else:
|
|
119
127
|
ret[cclab+"avg"] = nanaverage(stats[cclab], stats["n_obs"])
|
|
120
128
|
ret[rlab] = utils.hkl.r_factor(obs, calc)
|
|
@@ -158,46 +166,63 @@ class LsqScale:
|
|
|
158
166
|
self.b_aniso = None
|
|
159
167
|
self.stats = {}
|
|
160
168
|
|
|
161
|
-
def set_data(self, hkldata, fc_list, use_int=False, sigma_cutoff=None):
|
|
169
|
+
def set_data(self, hkldata, fc_list, use_int=False, sigma_cutoff=None, twin_data=None):
|
|
162
170
|
assert 0 < len(fc_list) < 3
|
|
163
171
|
self.use_int = use_int
|
|
164
172
|
if sigma_cutoff is not None:
|
|
165
173
|
if use_int:
|
|
166
|
-
sel = hkldata.df.I / hkldata.df.SIGI > sigma_cutoff
|
|
174
|
+
self.sel = hkldata.df.I / hkldata.df.SIGI > sigma_cutoff
|
|
167
175
|
self.labcut = "(I/SIGI>{})".format(sigma_cutoff)
|
|
168
176
|
else:
|
|
169
|
-
sel = hkldata.df.FP / hkldata.df.SIGFP > sigma_cutoff
|
|
177
|
+
self.sel = hkldata.df.FP / hkldata.df.SIGFP > sigma_cutoff
|
|
170
178
|
self.labcut = "(F/SIGF>{})".format(sigma_cutoff)
|
|
171
179
|
else:
|
|
172
|
-
sel = hkldata.df.index
|
|
180
|
+
self.sel = hkldata.df.index
|
|
173
181
|
self.labcut = ""
|
|
174
|
-
self.obs = hkldata.df["I" if use_int else "FP"].to_numpy()
|
|
175
|
-
self.
|
|
176
|
-
self.
|
|
177
|
-
self.
|
|
182
|
+
self.obs = hkldata.df["I" if use_int else "FP"].to_numpy(copy=True)
|
|
183
|
+
self.obs[~self.sel] = numpy.nan
|
|
184
|
+
self.calc = [x for x in fc_list]
|
|
185
|
+
self.s2mat = hkldata.ssq_mat()
|
|
186
|
+
self.s2 = 1. / hkldata.d_spacings().to_numpy()**2
|
|
178
187
|
self.adpdirs = utils.model.adp_constraints(hkldata.sg.operations(), hkldata.cell, tr0=False)
|
|
188
|
+
self.twin_data = twin_data
|
|
179
189
|
if use_int:
|
|
180
190
|
self.sqrt_obs = numpy.sqrt(self.obs)
|
|
181
191
|
|
|
182
192
|
def get_solvent_scale(self, k_sol, b_sol, s2=None):
|
|
183
193
|
if s2 is None: s2 = self.s2
|
|
184
194
|
return k_sol * numpy.exp(-b_sol * s2 / 4)
|
|
185
|
-
|
|
186
|
-
def
|
|
195
|
+
|
|
196
|
+
def fc_and_mask_grad(self, x):
|
|
187
197
|
fc0 = self.calc[0]
|
|
188
198
|
if len(self.calc) == 2:
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
199
|
+
if self.twin_data:
|
|
200
|
+
r = self.twin_data.scaling_fc_and_mask_grad(self.calc[1], x[-2], x[-1])
|
|
201
|
+
return r[:,0], r[:,1], r[:,2]
|
|
202
|
+
else:
|
|
203
|
+
fmask = self.calc[1]
|
|
204
|
+
temp_sol = numpy.exp(-x[-1] * self.s2 / 4)
|
|
205
|
+
fbulk = x[-2] * temp_sol * fmask
|
|
206
|
+
fc = fc0 + fbulk
|
|
207
|
+
re_fmask_fcconj = (fmask * fc.conj()).real
|
|
208
|
+
fc_abs = numpy.abs(fc)
|
|
209
|
+
tmp = temp_sol / fc_abs * re_fmask_fcconj
|
|
210
|
+
return fc_abs, tmp, -tmp * x[-2] * self.s2 / 4
|
|
192
211
|
else:
|
|
193
|
-
|
|
212
|
+
if self.twin_data:
|
|
213
|
+
return numpy.sqrt(self.twin_data.i_calc_twin()), None, None
|
|
214
|
+
else:
|
|
215
|
+
return numpy.abs(fc0), None, None
|
|
216
|
+
|
|
217
|
+
def scaled_fc(self, x):
|
|
218
|
+
fc = self.fc_and_mask_grad(x)[0]
|
|
194
219
|
nadp = self.adpdirs.shape[0]
|
|
195
220
|
B = numpy.dot(x[1:nadp+1], self.adpdirs)
|
|
196
221
|
kani = numpy.exp(numpy.dot(-B, self.s2mat))
|
|
197
222
|
return self.k_trans(x[0]) * kani * fc
|
|
198
223
|
|
|
199
224
|
def target(self, x):
|
|
200
|
-
y =
|
|
225
|
+
y = self.scaled_fc(x)
|
|
201
226
|
if self.use_int:
|
|
202
227
|
diff = self.sqrt_obs - y
|
|
203
228
|
#y2 = y**2
|
|
@@ -214,18 +239,10 @@ class LsqScale:
|
|
|
214
239
|
|
|
215
240
|
def grad(self, x):
|
|
216
241
|
g = numpy.zeros_like(x)
|
|
217
|
-
|
|
218
|
-
if len(self.calc) == 2:
|
|
219
|
-
fmask = self.calc[1]
|
|
220
|
-
temp_sol = numpy.exp(-x[-1] * self.s2 / 4)
|
|
221
|
-
fbulk = x[-2] * temp_sol * fmask
|
|
222
|
-
fc = fc0 + fbulk
|
|
223
|
-
else:
|
|
224
|
-
fc = fc0
|
|
242
|
+
fc_abs, der_ksol, der_bsol = self.fc_and_mask_grad(x)
|
|
225
243
|
nadp = self.adpdirs.shape[0]
|
|
226
244
|
B = numpy.dot(x[1:nadp+1], self.adpdirs)
|
|
227
245
|
kani = numpy.exp(numpy.dot(-B, self.s2mat))
|
|
228
|
-
fc_abs = numpy.abs(fc)
|
|
229
246
|
k = self.k_trans(x[0])
|
|
230
247
|
y = k * kani * fc_abs
|
|
231
248
|
if self.use_int:
|
|
@@ -247,30 +264,19 @@ class LsqScale:
|
|
|
247
264
|
g[0] = numpy.nansum(kani * fc_abs * dfdy * self.k_trans_der(x[0]))
|
|
248
265
|
g[1:nadp+1] = numpy.dot(dfdb, self.adpdirs.T)
|
|
249
266
|
if len(self.calc) == 2:
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
g[-2] = numpy.nansum(tmp * dfdy)
|
|
253
|
-
g[-1] = numpy.nansum(-tmp * dfdy * x[-2] * self.s2 / 4)
|
|
267
|
+
g[-2] = numpy.nansum(k * kani * der_ksol * dfdy)
|
|
268
|
+
g[-1] = numpy.nansum(k * kani * der_bsol * dfdy)
|
|
254
269
|
|
|
255
270
|
return g
|
|
256
271
|
|
|
257
272
|
def calc_shift(self, x):
|
|
258
273
|
# TODO: sort out code duplication, if we use this.
|
|
259
|
-
g = numpy.zeros((len(self.
|
|
274
|
+
g = numpy.zeros((len(self.obs), len(x)))
|
|
260
275
|
H = numpy.zeros((len(x), len(x)))
|
|
261
|
-
|
|
262
|
-
fc0 = self.calc[0]
|
|
263
|
-
if len(self.calc) == 2:
|
|
264
|
-
fmask = self.calc[1]
|
|
265
|
-
temp_sol = numpy.exp(-x[-1] * self.s2 / 4)
|
|
266
|
-
fbulk = x[-2] * temp_sol * fmask
|
|
267
|
-
fc = fc0 + fbulk
|
|
268
|
-
else:
|
|
269
|
-
fc = fc0
|
|
276
|
+
fc_abs, der_ksol, der_bsol = self.fc_and_mask_grad(x)
|
|
270
277
|
nadp = self.adpdirs.shape[0]
|
|
271
278
|
B = numpy.dot(x[1:nadp+1], self.adpdirs)
|
|
272
279
|
kani = numpy.exp(numpy.dot(-B, self.s2mat))
|
|
273
|
-
fc_abs = numpy.abs(fc)
|
|
274
280
|
k = self.k_trans(x[0])
|
|
275
281
|
y = k * kani * fc_abs
|
|
276
282
|
if self.use_int:
|
|
@@ -297,27 +303,20 @@ class LsqScale:
|
|
|
297
303
|
g[:,0] = kani * fc_abs * self.k_trans_der(x[0])
|
|
298
304
|
g[:,1:nadp+1] = numpy.dot(dfdb.T, self.adpdirs.T)
|
|
299
305
|
if len(self.calc) == 2:
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
g[:,-2] = tmp
|
|
303
|
-
g[:,-1] = -tmp * x[-2] * self.s2 / 4
|
|
306
|
+
g[:,-2] = k * kani * der_ksol
|
|
307
|
+
g[:,-1] = k * kani * der_bsol
|
|
304
308
|
|
|
305
|
-
#
|
|
309
|
+
# no numpy.nandot..
|
|
310
|
+
g, dfdy, dfdy2 = g[self.sel, :], dfdy[self.sel], dfdy2[self.sel]
|
|
306
311
|
H = numpy.dot(g.T, g * dfdy2[:,None])
|
|
307
312
|
g = numpy.sum(dfdy[:,None] * g, axis=0)
|
|
308
313
|
dx = -numpy.dot(g, numpy.linalg.pinv(H))
|
|
309
314
|
return dx
|
|
310
315
|
|
|
311
316
|
def initial_kb(self):
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
fbulk = self.get_solvent_scale(self.k_sol, self.b_sol) * fmask
|
|
316
|
-
fc = fc0 + fbulk
|
|
317
|
-
else:
|
|
318
|
-
fc = fc0
|
|
319
|
-
sel = self.obs > 0
|
|
320
|
-
f1p, f2p, s2p = self.obs[sel], numpy.abs(fc)[sel], self.s2[sel]
|
|
317
|
+
fc_abs = self.fc_and_mask_grad([self.k_sol, self.b_sol])[0]
|
|
318
|
+
sel = self.obs > 0 # exclude nan as well
|
|
319
|
+
f1p, f2p, s2p = self.obs[sel], fc_abs[sel], self.s2[sel]
|
|
321
320
|
if self.use_int: f2p *= f2p
|
|
322
321
|
tmp = numpy.log(f2p) - numpy.log(f1p)
|
|
323
322
|
# g = [dT/dk, dT/db]
|
|
@@ -418,7 +417,7 @@ class LsqScale:
|
|
|
418
417
|
self.k_sol = res_x[-2]
|
|
419
418
|
self.b_sol = res_x[-1]
|
|
420
419
|
logger.writeln(" k_sol= {:.2e} B_sol= {:.2e}".format(self.k_sol, self.b_sol))
|
|
421
|
-
calc =
|
|
420
|
+
calc = self.scaled_fc(res_x)
|
|
422
421
|
if self.use_int: calc *= calc
|
|
423
422
|
self.stats["cc"] = utils.hkl.correlation(self.obs, calc)
|
|
424
423
|
self.stats["r"] = utils.hkl.r_factor(self.obs, calc)
|
|
@@ -510,6 +509,43 @@ def mli_shift_S(df, fc_labs, Ds, S, k_ani, idxes):
|
|
|
510
509
|
return -g / H
|
|
511
510
|
# mli_shift_S()
|
|
512
511
|
|
|
512
|
+
def mltwin_est_ftrue(twin_data, df, k_ani, idxes):
|
|
513
|
+
kani2_inv = 1 / k_ani**2
|
|
514
|
+
i_sigi = numpy.empty((2, len(df.index)))
|
|
515
|
+
i_sigi[:] = numpy.nan
|
|
516
|
+
i_sigi[0, idxes] = (df.I.to_numpy() * kani2_inv)[idxes]
|
|
517
|
+
i_sigi[1, idxes] = (df.SIGI.to_numpy() * kani2_inv)[idxes]
|
|
518
|
+
twin_data.est_f_true(i_sigi[0,:], i_sigi[1,:])
|
|
519
|
+
# mltwin_est_ftrue()
|
|
520
|
+
|
|
521
|
+
def mltwin(df, twin_data, Ds, S, k_ani, idxes, i_bin):
|
|
522
|
+
twin_data.ml_sigma[i_bin] = S
|
|
523
|
+
twin_data.ml_scale[i_bin, :] = Ds
|
|
524
|
+
mltwin_est_ftrue(twin_data, df, k_ani, idxes)
|
|
525
|
+
return twin_data.ll()
|
|
526
|
+
# mltwin()
|
|
527
|
+
|
|
528
|
+
def deriv_mltwin_wrt_D_S(df, twin_data, Ds, S, k_ani, idxes, i_bin):
|
|
529
|
+
twin_data.ml_sigma[i_bin] = S
|
|
530
|
+
twin_data.ml_scale[i_bin, :] = Ds
|
|
531
|
+
mltwin_est_ftrue(twin_data, df, k_ani, idxes)
|
|
532
|
+
r = twin_data.ll_der_D_S()
|
|
533
|
+
g = numpy.zeros(r.shape[1])
|
|
534
|
+
g[:-1] = numpy.nansum(r[:,:-1], axis=0) # D
|
|
535
|
+
g[-1] = numpy.nansum(r[:,-1]) # S
|
|
536
|
+
return g
|
|
537
|
+
# deriv_mlf_wrt_D_S()
|
|
538
|
+
|
|
539
|
+
def mltwin_shift_S(df, twin_data, Ds, S, k_ani, idxes, i_bin):
|
|
540
|
+
twin_data.ml_sigma[i_bin] = S
|
|
541
|
+
twin_data.ml_scale[i_bin, :] = Ds
|
|
542
|
+
mltwin_est_ftrue(twin_data, df, k_ani, idxes)
|
|
543
|
+
r = twin_data.ll_der_D_S()
|
|
544
|
+
g = numpy.nansum(r[:,-1])
|
|
545
|
+
H = numpy.nansum(r[:,-1]**2) # approximating expectation value of second derivative
|
|
546
|
+
return -g / H
|
|
547
|
+
# mlf_shift_S()
|
|
548
|
+
|
|
513
549
|
def determine_mlf_params_from_cc(hkldata, fc_labs, D_labs, centric_and_selections, use="all", smoothing="gauss"):
|
|
514
550
|
# theorhetical values
|
|
515
551
|
cc_a = lambda cc: (numpy.pi/4*(1-cc**2)**2 * scipy.special.hyp2f1(3/2, 3/2, 1, cc**2) - numpy.pi/4) / (1-numpy.pi/4)
|
|
@@ -594,7 +630,7 @@ def determine_mlf_params_from_cc(hkldata, fc_labs, D_labs, centric_and_selection
|
|
|
594
630
|
smooth_params(hkldata, D_labs, smoothing)
|
|
595
631
|
# determine_mlf_params_from_cc()
|
|
596
632
|
|
|
597
|
-
def initialize_ml_params(hkldata, use_int, D_labs, b_aniso, centric_and_selections, use):
|
|
633
|
+
def initialize_ml_params(hkldata, use_int, D_labs, b_aniso, centric_and_selections, use, twin_data=None):
|
|
598
634
|
# Initial values
|
|
599
635
|
for lab in D_labs: hkldata.binned_df[lab] = 1.
|
|
600
636
|
hkldata.binned_df["S"] = 10000.
|
|
@@ -614,8 +650,11 @@ def initialize_ml_params(hkldata, use_int, D_labs, b_aniso, centric_and_selectio
|
|
|
614
650
|
Io = hkldata.df.I.to_numpy()[idxes]
|
|
615
651
|
else:
|
|
616
652
|
Io = hkldata.df.FP.to_numpy()[idxes]**2
|
|
617
|
-
Io /= k_ani[idxes]**2
|
|
618
|
-
|
|
653
|
+
Io /= k_ani[idxes]**2
|
|
654
|
+
if twin_data:
|
|
655
|
+
Ic = twin_data.i_calc_twin()[idxes]
|
|
656
|
+
else:
|
|
657
|
+
Ic = numpy.abs(hkldata.df.FC.to_numpy()[idxes])**2
|
|
619
658
|
mean_Io = numpy.mean(Io)
|
|
620
659
|
mean_Ic = numpy.mean(Ic)
|
|
621
660
|
cc = numpy.corrcoef(Io, Ic)[1,0]
|
|
@@ -635,16 +674,21 @@ def initialize_ml_params(hkldata, use_int, D_labs, b_aniso, centric_and_selectio
|
|
|
635
674
|
min_D = hkldata.binned_df[D_lab][hkldata.binned_df[D_lab] > 0].min() * 0.1
|
|
636
675
|
logger.writeln("WARNING: negative {} is detected from initial estimates. Replacing it using minimum positive value {:.2e}".format(D_lab, min_D))
|
|
637
676
|
hkldata.binned_df[D_lab].where(hkldata.binned_df[D_lab] > 0, min_D, inplace=True) # arbitrary
|
|
638
|
-
|
|
677
|
+
|
|
678
|
+
if twin_data:
|
|
679
|
+
twin_data.ml_scale[:] = hkldata.binned_df.loc[:, D_labs]
|
|
680
|
+
twin_data.ml_sigma[:] = hkldata.binned_df.loc[:, "S"]
|
|
681
|
+
|
|
639
682
|
logger.writeln("Initial estimates:")
|
|
640
683
|
logger.writeln(hkldata.binned_df.to_string())
|
|
641
684
|
# initialize_ml_params()
|
|
642
685
|
|
|
643
686
|
def determine_ml_params(hkldata, use_int, fc_labs, D_labs, b_aniso, centric_and_selections,
|
|
644
|
-
D_trans=None, S_trans=None, use="all", n_cycle=1, smoothing="gauss"
|
|
687
|
+
D_trans=None, S_trans=None, use="all", n_cycle=1, smoothing="gauss",
|
|
688
|
+
twin_data=None):
|
|
645
689
|
assert use in ("all", "work", "test")
|
|
646
690
|
assert smoothing in (None, "gauss")
|
|
647
|
-
logger.writeln("Estimating sigma-A parameters using {}..".format("intensities" if use_int else "amplitudes"))
|
|
691
|
+
logger.writeln("Estimating sigma-A parameters using {}..".format(("intensities" if use_int else "amplitudes") + " (twin)" if twin_data else ""))
|
|
648
692
|
trans = VarTrans(D_trans, S_trans)
|
|
649
693
|
lab_obs = "I" if use_int else "FP"
|
|
650
694
|
def get_idxes(i_bin):
|
|
@@ -655,7 +699,7 @@ def determine_ml_params(hkldata, use_int, fc_labs, D_labs, b_aniso, centric_and_
|
|
|
655
699
|
return numpy.concatenate([sel[i] for sel in centric_and_selections[i_bin]])
|
|
656
700
|
|
|
657
701
|
if not set(D_labs + ["S"]).issubset(hkldata.binned_df):
|
|
658
|
-
initialize_ml_params(hkldata, use_int, D_labs, b_aniso, centric_and_selections, use)
|
|
702
|
+
initialize_ml_params(hkldata, use_int, D_labs, b_aniso, centric_and_selections, use, twin_data=twin_data)
|
|
659
703
|
for dlab, fclab in zip(D_labs, fc_labs):
|
|
660
704
|
hkldata.binned_df["Mn(|{}*{}|)".format(dlab, fclab)] = numpy.nan
|
|
661
705
|
|
|
@@ -681,8 +725,12 @@ def determine_ml_params(hkldata, use_int, fc_labs, D_labs, b_aniso, centric_and_
|
|
|
681
725
|
else:
|
|
682
726
|
Ds = [hkldata.binned_df.loc[i_bin, lab] for lab in D_labs]
|
|
683
727
|
S = trans.S(x[-1])
|
|
684
|
-
|
|
685
|
-
|
|
728
|
+
|
|
729
|
+
if twin_data:
|
|
730
|
+
return mltwin(hkldata.df, twin_data, Ds, S, k_ani, idxes, i_bin)
|
|
731
|
+
else:
|
|
732
|
+
f = mli if use_int else mlf
|
|
733
|
+
return f(hkldata.df, fc_labs, Ds, S, k_ani, idxes)
|
|
686
734
|
|
|
687
735
|
def grad(x):
|
|
688
736
|
if refpar == "all":
|
|
@@ -697,8 +745,11 @@ def determine_ml_params(hkldata, use_int, fc_labs, D_labs, b_aniso, centric_and_
|
|
|
697
745
|
Ds = [hkldata.binned_df.loc[i_bin, lab] for lab in D_labs]
|
|
698
746
|
S = trans.S(x[-1])
|
|
699
747
|
n_par = 1
|
|
700
|
-
|
|
701
|
-
|
|
748
|
+
if twin_data:
|
|
749
|
+
r = deriv_mltwin_wrt_D_S(hkldata.df, twin_data, Ds, S, k_ani, idxes, i_bin)
|
|
750
|
+
else:
|
|
751
|
+
calc_deriv = deriv_mli_wrt_D_S if use_int else deriv_mlf_wrt_D_S
|
|
752
|
+
r = calc_deriv(hkldata.df, fc_labs, Ds, S, k_ani, idxes)
|
|
702
753
|
g = numpy.zeros(n_par)
|
|
703
754
|
if refpar in ("all", "D"):
|
|
704
755
|
g[:len(fc_labs)] = r[:len(fc_labs)]
|
|
@@ -723,6 +774,18 @@ def determine_ml_params(hkldata, use_int, fc_labs, D_labs, b_aniso, centric_and_
|
|
|
723
774
|
for ids in range(10):
|
|
724
775
|
refpar = "D"
|
|
725
776
|
x0 = numpy.array([trans.D_inv(hkldata.binned_df.loc[i_bin, lab]) for lab in D_labs])
|
|
777
|
+
#print("MLTWIN=", target(x0))
|
|
778
|
+
#quit()
|
|
779
|
+
if 0:
|
|
780
|
+
h = 1e-3
|
|
781
|
+
f00 = target(x0)
|
|
782
|
+
g00 = grad(x0)
|
|
783
|
+
for ii in range(len(x0)):
|
|
784
|
+
xx = x0.copy()
|
|
785
|
+
xx[ii] += h
|
|
786
|
+
f01 = target(xx)
|
|
787
|
+
nder = (f01 - f00) / h
|
|
788
|
+
logger.writeln(f"DEBUG_der_D bin_{i_bin} {ii} ad={g00[ii]} nd={nder} r={g00[ii]/nder}")
|
|
726
789
|
vals_now = []
|
|
727
790
|
if 0:
|
|
728
791
|
f0 = target(x0)
|
|
@@ -758,15 +821,29 @@ def determine_ml_params(hkldata, use_int, fc_labs, D_labs, b_aniso, centric_and_
|
|
|
758
821
|
for i, lab in enumerate(D_labs):
|
|
759
822
|
hkldata.binned_df.loc[i_bin, lab] = trans.D(res.x[i])
|
|
760
823
|
vals_now.append(hkldata.binned_df.loc[i_bin, lab])
|
|
824
|
+
if twin_data:
|
|
825
|
+
twin_data.ml_scale[i_bin, :] = trans.D(res.x)
|
|
761
826
|
refpar = "S"
|
|
762
827
|
if 1:
|
|
763
828
|
for cyc_s in range(1):
|
|
764
829
|
x0 = trans.S_inv(hkldata.binned_df.loc[i_bin, "S"])
|
|
830
|
+
if 0:
|
|
831
|
+
h = 1e-1
|
|
832
|
+
f00 = target([x0])
|
|
833
|
+
g00 = grad([x0])
|
|
834
|
+
xx = x0 + h
|
|
835
|
+
f01 = target([xx])
|
|
836
|
+
nder = (f01 - f00) / h
|
|
837
|
+
logger.writeln(f"DEBUG_der_S bin_{i_bin} ad={g00} nd={nder} r={g00/nder}")
|
|
838
|
+
|
|
765
839
|
f0 = target([x0])
|
|
766
840
|
Ds = [hkldata.binned_df.loc[i_bin, lab] for lab in D_labs]
|
|
767
841
|
nfev_total += 1
|
|
768
|
-
|
|
769
|
-
|
|
842
|
+
if twin_data:
|
|
843
|
+
shift = mltwin_shift_S(hkldata.df, twin_data, Ds, trans.S(x0), k_ani, idxes, i_bin)
|
|
844
|
+
else:
|
|
845
|
+
calc_shift_S = mli_shift_S if use_int else mlf_shift_S
|
|
846
|
+
shift = calc_shift_S(hkldata.df, fc_labs, Ds, trans.S(x0), k_ani, idxes)
|
|
770
847
|
shift /= trans.S_deriv(x0)
|
|
771
848
|
if abs(shift) < 1e-3: break
|
|
772
849
|
for itry in range(10):
|
|
@@ -787,6 +864,8 @@ def determine_ml_params(hkldata, use_int, fc_labs, D_labs, b_aniso, centric_and_
|
|
|
787
864
|
else:
|
|
788
865
|
#print("all bad")
|
|
789
866
|
break
|
|
867
|
+
if twin_data:
|
|
868
|
+
twin_data.ml_sigma[i_bin] = hkldata.binned_df.loc[i_bin, "S"]
|
|
790
869
|
else:
|
|
791
870
|
# somehow this does not work well.
|
|
792
871
|
x0 = [trans.S_inv(hkldata.binned_df.loc[i_bin, "S"])]
|
|
@@ -796,6 +875,8 @@ def determine_ml_params(hkldata, use_int, fc_labs, D_labs, b_aniso, centric_and_
|
|
|
796
875
|
#print(i_bin, "mini cycle", ids, refpar)
|
|
797
876
|
#print(res)
|
|
798
877
|
hkldata.binned_df.loc[i_bin, "S"] = trans.S(res.x[-1])
|
|
878
|
+
if twin_data:
|
|
879
|
+
twin_data.ml_sigma[i_bin] = trans.S(res.x[-1])
|
|
799
880
|
vals_now.append(hkldata.binned_df.loc[i_bin, "S"])
|
|
800
881
|
vals_now = numpy.array(vals_now)
|
|
801
882
|
if vals_last is not None and numpy.all(numpy.abs((vals_last - vals_now) / vals_now) < 1e-2):
|
|
@@ -812,17 +893,30 @@ def determine_ml_params(hkldata, use_int, fc_labs, D_labs, b_aniso, centric_and_
|
|
|
812
893
|
for i, lab in enumerate(D_labs):
|
|
813
894
|
hkldata.binned_df.loc[i_bin, lab] = trans.D(res.x[i])
|
|
814
895
|
hkldata.binned_df.loc[i_bin, "S"] = trans.S(res.x[-1])
|
|
896
|
+
if twin_data:
|
|
897
|
+
twin_data.ml_scale[i_bin, :] = trans.D(res.x[:-1])
|
|
898
|
+
twin_data.ml_sigma[i_bin] = trans.S(res.x[-1])
|
|
815
899
|
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
900
|
+
if twin_data:
|
|
901
|
+
dfc = numpy.abs(twin_data.f_calc) * twin_data.ml_scale_array()
|
|
902
|
+
for i_bin, idxes in hkldata.binned():
|
|
903
|
+
dfc_bin = dfc[numpy.asarray(twin_data.bin)==i_bin,:]
|
|
904
|
+
mean_dfc = numpy.nanmean(dfc_bin, axis=0)
|
|
905
|
+
for i, (dlab, fclab) in enumerate(zip(D_labs, fc_labs)):
|
|
906
|
+
hkldata.binned_df.loc[i_bin, "Mn(|{}*{}|)".format(dlab, fclab)] = mean_dfc[i]
|
|
907
|
+
else:
|
|
908
|
+
for i_bin, idxes in hkldata.binned():
|
|
909
|
+
for dlab, fclab in zip(D_labs, fc_labs):
|
|
910
|
+
mean_dfc = numpy.nanmean(numpy.abs(hkldata.binned_df[dlab][i_bin] * hkldata.df[fclab][idxes]))
|
|
911
|
+
hkldata.binned_df.loc[i_bin, "Mn(|{}*{}|)".format(dlab, fclab)] = mean_dfc
|
|
820
912
|
|
|
821
913
|
logger.writeln("Refined estimates:")
|
|
822
914
|
logger.writeln(hkldata.binned_df.to_string())
|
|
915
|
+
#numpy.testing.assert_allclose(hkldata.binned_df.S, twin_data.ml_sigma)
|
|
916
|
+
#numpy.testing.assert_allclose(hkldata.binned_df[D_labs], twin_data.ml_scale)
|
|
823
917
|
logger.writeln("time: {:.1f} sec ({} evaluations)".format(time.time() - t0, nfev_total))
|
|
824
918
|
|
|
825
|
-
if not use_int:
|
|
919
|
+
if not use_int or twin_data:
|
|
826
920
|
break # did not implement MLF B_aniso optimization
|
|
827
921
|
|
|
828
922
|
# Refine b_aniso
|
|
@@ -900,7 +994,7 @@ def determine_ml_params(hkldata, use_int, fc_labs, D_labs, b_aniso, centric_and_
|
|
|
900
994
|
return b_aniso
|
|
901
995
|
# determine_ml_params()
|
|
902
996
|
|
|
903
|
-
def smooth_params(hkldata, D_labs, smoothing):
|
|
997
|
+
def smooth_params(hkldata, D_labs, smoothing): # XXX twin_data
|
|
904
998
|
if smoothing is None or len(hkldata.binned()) < 2:
|
|
905
999
|
for i, lab in enumerate(D_labs + ["S"]):
|
|
906
1000
|
hkldata.df[lab] = hkldata.binned_data_as_array(lab)
|
|
@@ -982,32 +1076,71 @@ def calculate_maps_int(hkldata, b_aniso, fc_labs, D_labs, centric_and_selections
|
|
|
982
1076
|
hkldata.df.loc[cidxes[fill_sel], "FWT"] = DFc[cidxes][fill_sel]
|
|
983
1077
|
# calculate_maps_int()
|
|
984
1078
|
|
|
1079
|
+
def calculate_maps_twin(hkldata, b_aniso, fc_labs, D_labs, twin_data, centric_and_selections, use="all"):
|
|
1080
|
+
k_ani2_inv = 1 / hkldata.debye_waller_factors(b_cart=b_aniso)**2
|
|
1081
|
+
Io = hkldata.df.I.to_numpy(copy=True) * k_ani2_inv
|
|
1082
|
+
sigIo = hkldata.df.SIGI.to_numpy(copy=True) * k_ani2_inv
|
|
1083
|
+
# Mask Io
|
|
1084
|
+
for i_bin, idxes in hkldata.binned():
|
|
1085
|
+
for c, work, test in centric_and_selections[i_bin]:
|
|
1086
|
+
if use != "all":
|
|
1087
|
+
tohide = test if use == "work" else work
|
|
1088
|
+
Io[tohide] = numpy.nan
|
|
1089
|
+
|
|
1090
|
+
twin_data.est_f_true(Io, sigIo)
|
|
1091
|
+
F_true = numpy.asarray(twin_data.f_true_max)
|
|
1092
|
+
Ds = twin_data.ml_scale_array()
|
|
1093
|
+
DFc = (twin_data.f_calc * Ds).sum(axis=1)
|
|
1094
|
+
exp_ip = numpy.exp(numpy.angle(DFc)*1j)
|
|
1095
|
+
Ft = numpy.asarray(twin_data.f_true_max)
|
|
1096
|
+
m = twin_data.calc_fom()
|
|
1097
|
+
fwt = numpy.where(numpy.asarray(twin_data.centric) == 0,
|
|
1098
|
+
2 * m * Ft * exp_ip - DFc, m * Ft * exp_ip)
|
|
1099
|
+
delfwt = m * Ft * exp_ip - DFc
|
|
1100
|
+
sel = numpy.isnan(fwt)
|
|
1101
|
+
fwt[sel] = DFc[sel]
|
|
1102
|
+
|
|
1103
|
+
hkldata2 = utils.hkl.HklData(hkldata.cell, hkldata.sg,
|
|
1104
|
+
utils.hkl.df_from_twin_data(twin_data, fc_labs))
|
|
1105
|
+
hkldata2.df["FWT"] = fwt
|
|
1106
|
+
hkldata2.df["DELFWT"] = delfwt
|
|
1107
|
+
hkldata2.df["FOM"] = m
|
|
1108
|
+
hkldata2.df["F_est"] = F_true
|
|
1109
|
+
hkldata2.df["FC"] = twin_data.f_calc.sum(axis=1)
|
|
1110
|
+
hkldata2.df["DFC"] = DFc
|
|
1111
|
+
hkldata2.df[D_labs] = Ds
|
|
1112
|
+
hkldata2.df["S"] = twin_data.ml_sigma_array()
|
|
1113
|
+
return hkldata2
|
|
1114
|
+
# calculate_maps_twin()
|
|
1115
|
+
|
|
985
1116
|
def merge_models(sts): # simply merge models. no fix in chain ids etc.
|
|
986
|
-
|
|
987
|
-
del
|
|
1117
|
+
st2 = sts[0].clone()
|
|
1118
|
+
del st2[:]
|
|
988
1119
|
model = gemmi.Model("1")
|
|
989
1120
|
for st in sts:
|
|
990
1121
|
for m in st:
|
|
991
1122
|
for c in m:
|
|
992
1123
|
model.add_chain(c)
|
|
993
|
-
|
|
994
|
-
return
|
|
1124
|
+
st2.add_model(model)
|
|
1125
|
+
return st2
|
|
995
1126
|
# merge_models()
|
|
996
1127
|
|
|
997
|
-
def decide_mtz_labels(mtz, find_free=True):
|
|
1128
|
+
def decide_mtz_labels(mtz, find_free=True, require=None):
|
|
1129
|
+
# F is preferred for now by default
|
|
1130
|
+
obs_types = ("F", "J", "G", "K")
|
|
1131
|
+
if require:
|
|
1132
|
+
assert set(require).issubset(obs_types)
|
|
1133
|
+
else:
|
|
1134
|
+
require = obs_types
|
|
998
1135
|
dlabs = utils.hkl.mtz_find_data_columns(mtz)
|
|
999
1136
|
logger.writeln("Finding possible options from MTZ:")
|
|
1000
1137
|
for typ in dlabs:
|
|
1001
1138
|
for labs in dlabs[typ]:
|
|
1002
1139
|
logger.writeln(" --labin '{}'".format(",".join(labs)))
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
elif dlabs["G"]:
|
|
1008
|
-
labin = dlabs["G"][0]
|
|
1009
|
-
elif dlabs["K"]:
|
|
1010
|
-
labin = dlabs["K"][0]
|
|
1140
|
+
for typ in require:
|
|
1141
|
+
if dlabs[typ]:
|
|
1142
|
+
labin = dlabs[typ][0]
|
|
1143
|
+
break
|
|
1011
1144
|
else:
|
|
1012
1145
|
raise RuntimeError("Data not found from mtz")
|
|
1013
1146
|
if find_free:
|
|
@@ -1019,7 +1152,8 @@ def decide_mtz_labels(mtz, find_free=True):
|
|
|
1019
1152
|
# decide_mtz_labels()
|
|
1020
1153
|
|
|
1021
1154
|
def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=None,
|
|
1022
|
-
n_per_bin=None, use="all", max_bins=None, cif_index=0, keep_charges=False
|
|
1155
|
+
n_per_bin=None, use="all", max_bins=None, cif_index=0, keep_charges=False,
|
|
1156
|
+
allow_unusual_occupancies=False):
|
|
1023
1157
|
if labin: assert 1 < len(labin) < 6
|
|
1024
1158
|
assert use in ("all", "work", "test")
|
|
1025
1159
|
assert n_bins or n_per_bin #if n_bins not set, n_per_bin should be given
|
|
@@ -1042,6 +1176,9 @@ def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=
|
|
|
1042
1176
|
st, mtz = utils.fileio.read_small_molecule_files([hklin, xyzins[0]])
|
|
1043
1177
|
sts = [st]
|
|
1044
1178
|
|
|
1179
|
+
for st in sts:
|
|
1180
|
+
utils.model.check_occupancies(st, raise_error=not allow_unusual_occupancies)
|
|
1181
|
+
|
|
1045
1182
|
if not labin:
|
|
1046
1183
|
labin = decide_mtz_labels(mtz)
|
|
1047
1184
|
col_types = {x.label:x.type for x in mtz.columns}
|
|
@@ -1136,13 +1273,7 @@ def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=
|
|
|
1136
1273
|
hkldata.setup_binning(n_bins=n_bins)
|
|
1137
1274
|
logger.writeln("Data completeness: {:.2f}%".format(hkldata.completeness()*100.))
|
|
1138
1275
|
|
|
1139
|
-
fc_labs = []
|
|
1140
|
-
for i, st in enumerate(sts):
|
|
1141
|
-
lab = "FC{}".format(i)
|
|
1142
|
-
hkldata.df[lab] = utils.model.calc_fc_fft(st, d_min-1e-6,
|
|
1143
|
-
source=source, mott_bethe=(source=="electron"),
|
|
1144
|
-
miller_array=hkldata.miller_array())
|
|
1145
|
-
fc_labs.append(lab)
|
|
1276
|
+
fc_labs = ["FC{}".format(i) for i, _ in enumerate(sts)]
|
|
1146
1277
|
|
|
1147
1278
|
# Create a centric selection table for faster look up
|
|
1148
1279
|
centric_and_selections = {}
|
|
@@ -1195,10 +1326,34 @@ def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=
|
|
|
1195
1326
|
return hkldata, sts, fc_labs, centric_and_selections, free
|
|
1196
1327
|
# process_input()
|
|
1197
1328
|
|
|
1329
|
+
def update_fc(st_list, fc_labs, d_min, monlib, source, mott_bethe, hkldata=None, twin_data=None):
|
|
1330
|
+
#assert (hkldata, twin_data).count(None) == 1
|
|
1331
|
+
# hkldata not updated when twin_data is given
|
|
1332
|
+
for i, st in enumerate(st_list):
|
|
1333
|
+
if st.ncs:
|
|
1334
|
+
st = st.clone()
|
|
1335
|
+
st.expand_ncs(gemmi.HowToNameCopiedChain.Dup, merge_dist=0)
|
|
1336
|
+
if twin_data:
|
|
1337
|
+
hkl = twin_data.asu
|
|
1338
|
+
else:
|
|
1339
|
+
hkl = hkldata.miller_array()
|
|
1340
|
+
fc = utils.model.calc_fc_fft(st, d_min - 1e-6,
|
|
1341
|
+
monlib=monlib,
|
|
1342
|
+
source=source,
|
|
1343
|
+
mott_bethe=mott_bethe,
|
|
1344
|
+
miller_array=hkl)
|
|
1345
|
+
if twin_data:
|
|
1346
|
+
twin_data.f_calc[:,i] = fc
|
|
1347
|
+
else:
|
|
1348
|
+
hkldata.df[fc_labs[i]] = fc
|
|
1349
|
+
if not twin_data:
|
|
1350
|
+
hkldata.df["FC"] = hkldata.df[fc_labs].sum(axis=1)
|
|
1351
|
+
# update_fc()
|
|
1352
|
+
|
|
1198
1353
|
def calc_Fmask(st, d_min, miller_array):
|
|
1199
1354
|
logger.writeln("Calculating solvent contribution..")
|
|
1200
1355
|
grid = gemmi.FloatGrid()
|
|
1201
|
-
grid.setup_from(st, spacing=min(0.6, d_min / 2 - 1e-9))
|
|
1356
|
+
grid.setup_from(st, spacing=min(0.6, (d_min-1e-6) / 2 - 1e-9))
|
|
1202
1357
|
masker = gemmi.SolventMasker(gemmi.AtomicRadiiSet.Refmac)
|
|
1203
1358
|
masker.put_mask_on_float_grid(grid, st[0])
|
|
1204
1359
|
fmask_gr = gemmi.transform_map_to_f_phi(grid)
|
|
@@ -1206,29 +1361,44 @@ def calc_Fmask(st, d_min, miller_array):
|
|
|
1206
1361
|
return Fmask
|
|
1207
1362
|
# calc_Fmask()
|
|
1208
1363
|
|
|
1209
|
-
def bulk_solvent_and_lsq_scales(hkldata, sts, fc_labs, use_solvent=True, use_int=False, mask=None, func_type="log_cosh"):
|
|
1210
|
-
|
|
1364
|
+
def bulk_solvent_and_lsq_scales(hkldata, sts, fc_labs, use_solvent=True, use_int=False, mask=None, func_type="log_cosh", twin_data=None):
|
|
1365
|
+
# fc_labs must have solvent part at the end
|
|
1366
|
+
miller_array = twin_data.asu if twin_data else hkldata.miller_array()
|
|
1367
|
+
d_min = twin_data.d_min(sts[0].cell) if twin_data else hkldata.d_min_max()[0]
|
|
1211
1368
|
if use_solvent:
|
|
1212
1369
|
if mask is None:
|
|
1213
|
-
Fmask = calc_Fmask(merge_models(sts),
|
|
1370
|
+
Fmask = calc_Fmask(merge_models(sts), d_min, miller_array)
|
|
1214
1371
|
else:
|
|
1215
1372
|
fmask_gr = gemmi.transform_map_to_f_phi(mask)
|
|
1216
|
-
Fmask = fmask_gr.get_value_by_hkl(
|
|
1217
|
-
|
|
1373
|
+
Fmask = fmask_gr.get_value_by_hkl(miller_array)
|
|
1374
|
+
if twin_data:
|
|
1375
|
+
fc_sum = twin_data.f_calc[:,:-1].sum(axis=1)
|
|
1376
|
+
else:
|
|
1377
|
+
fc_sum = hkldata.df[fc_labs[:-1]].sum(axis=1).to_numpy()
|
|
1378
|
+
fc_list = [fc_sum, Fmask]
|
|
1379
|
+
else:
|
|
1380
|
+
if twin_data:
|
|
1381
|
+
fc_list = [twin_data.f_calc.sum(axis=1)]
|
|
1382
|
+
else:
|
|
1383
|
+
fc_list = [hkldata.df[fc_labs].sum(axis=1).to_numpy()]
|
|
1218
1384
|
|
|
1219
1385
|
scaling = LsqScale(func_type=func_type)
|
|
1220
|
-
scaling.set_data(hkldata, fc_list, use_int, sigma_cutoff=0)
|
|
1386
|
+
scaling.set_data(hkldata, fc_list, use_int, sigma_cutoff=0, twin_data=twin_data)
|
|
1221
1387
|
scaling.scale()
|
|
1222
1388
|
b_iso = scaling.b_iso
|
|
1223
|
-
k_iso = hkldata.debye_waller_factors(b_iso=b_iso)
|
|
1224
1389
|
k_aniso = hkldata.debye_waller_factors(b_cart=scaling.b_aniso)
|
|
1225
1390
|
hkldata.df["k_aniso"] = k_aniso # we need it later when calculating stats
|
|
1226
1391
|
|
|
1227
1392
|
if use_solvent:
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1393
|
+
if twin_data:
|
|
1394
|
+
s2 = numpy.asarray(twin_data.s2_array)
|
|
1395
|
+
else:
|
|
1396
|
+
s2 = 1. / hkldata.d_spacings().to_numpy()**2
|
|
1397
|
+
Fbulk = Fmask * scaling.get_solvent_scale(scaling.k_sol, scaling.b_sol, s2)
|
|
1398
|
+
if twin_data:
|
|
1399
|
+
twin_data.f_calc[:,-1] = Fbulk
|
|
1400
|
+
else:
|
|
1401
|
+
hkldata.df[fc_labs[-1]] = Fbulk
|
|
1232
1402
|
|
|
1233
1403
|
# Apply scales
|
|
1234
1404
|
if use_int:
|
|
@@ -1238,9 +1408,13 @@ def bulk_solvent_and_lsq_scales(hkldata, sts, fc_labs, use_solvent=True, use_int
|
|
|
1238
1408
|
else:
|
|
1239
1409
|
o_labs = ["FP", "SIGFP", "F(+)","SIGF(+)", "F(-)", "SIGF(-)"]
|
|
1240
1410
|
hkldata.df[hkldata.df.columns.intersection(o_labs)] /= scaling.k_overall
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1411
|
+
if twin_data:
|
|
1412
|
+
twin_data.f_calc[:] *= twin_data.debye_waller_factors(b_iso=b_iso)[:,None]
|
|
1413
|
+
else:
|
|
1414
|
+
k_iso = hkldata.debye_waller_factors(b_iso=b_iso)
|
|
1415
|
+
for lab in fc_labs: hkldata.df[lab] *= k_iso
|
|
1416
|
+
# total Fc
|
|
1417
|
+
hkldata.df["FC"] = hkldata.df[fc_labs].sum(axis=1)
|
|
1244
1418
|
return scaling
|
|
1245
1419
|
# bulk_solvent_and_lsq_scales()
|
|
1246
1420
|
|
|
@@ -1357,6 +1531,16 @@ def main(args):
|
|
|
1357
1531
|
except RuntimeError as e:
|
|
1358
1532
|
raise SystemExit("Error: {}".format(e))
|
|
1359
1533
|
|
|
1534
|
+
if args.twin:
|
|
1535
|
+
twin_data = find_twin_domains_from_data(hkldata)
|
|
1536
|
+
else:
|
|
1537
|
+
twin_data = None
|
|
1538
|
+
if twin_data:
|
|
1539
|
+
twin_data.setup_f_calc(len(sts) + (0 if args.no_solvent else 1))
|
|
1540
|
+
|
|
1541
|
+
update_fc(sts, fc_labs, d_min=hkldata.d_min_max()[0], monlib=None,
|
|
1542
|
+
source=args.source, mott_bethe=(args.source=="electron"),
|
|
1543
|
+
hkldata=hkldata, twin_data=twin_data)
|
|
1360
1544
|
is_int = "I" in hkldata.df
|
|
1361
1545
|
|
|
1362
1546
|
if args.mask:
|
|
@@ -1366,46 +1550,70 @@ def main(args):
|
|
|
1366
1550
|
|
|
1367
1551
|
# Overall scaling & bulk solvent
|
|
1368
1552
|
# FP/SIGFP will be scaled. Total FC will be added.
|
|
1553
|
+
if not args.no_solvent:
|
|
1554
|
+
fc_labs.append("Fbulk")
|
|
1369
1555
|
lsq = bulk_solvent_and_lsq_scales(hkldata, sts, fc_labs, use_solvent=not args.no_solvent,
|
|
1370
|
-
use_int=is_int, mask=mask)
|
|
1556
|
+
use_int=is_int, mask=mask, twin_data=twin_data)
|
|
1371
1557
|
b_aniso = lsq.b_aniso
|
|
1372
1558
|
# stats
|
|
1373
|
-
stats, overall = calc_r_and_cc(hkldata, centric_and_selections)
|
|
1559
|
+
stats, overall = calc_r_and_cc(hkldata, centric_and_selections, twin_data)
|
|
1374
1560
|
for lab in "R", "CC":
|
|
1375
1561
|
logger.writeln(" ".join("{} = {:.4f}".format(x, overall[x]) for x in overall if x.startswith(lab)))
|
|
1562
|
+
if is_int:
|
|
1563
|
+
logger.writeln("R1 is calculated for reflections with I/sigma>2.")
|
|
1564
|
+
|
|
1565
|
+
if twin_data:
|
|
1566
|
+
estimate_twin_fractions_from_model(twin_data, hkldata)
|
|
1567
|
+
#del hkldata.df["FC"]
|
|
1568
|
+
#del hkldata.df["Fbulk"]
|
|
1569
|
+
# Need to redo scaling?
|
|
1570
|
+
lsq = bulk_solvent_and_lsq_scales(hkldata, sts, fc_labs, use_solvent=not args.no_solvent,
|
|
1571
|
+
use_int=is_int, mask=mask, twin_data=twin_data)
|
|
1572
|
+
b_aniso = lsq.b_aniso
|
|
1573
|
+
stats, overall = calc_r_and_cc(hkldata, centric_and_selections, twin_data)
|
|
1574
|
+
for lab in "R", "CC":
|
|
1575
|
+
logger.writeln(" ".join("{} = {:.4f}".format(x, overall[x]) for x in overall if x.startswith(lab)))
|
|
1376
1576
|
|
|
1377
1577
|
# Estimate ML parameters
|
|
1378
1578
|
D_labs = ["D{}".format(i) for i in range(len(fc_labs))]
|
|
1379
1579
|
|
|
1380
1580
|
if args.use_cc:
|
|
1381
1581
|
assert not is_int
|
|
1582
|
+
assert not args.twin
|
|
1382
1583
|
logger.writeln("Estimating sigma-A parameters from CC..")
|
|
1383
1584
|
determine_mlf_params_from_cc(hkldata, fc_labs, D_labs, centric_and_selections, args.use)
|
|
1384
1585
|
else:
|
|
1385
|
-
b_aniso = determine_ml_params(hkldata, is_int, fc_labs, D_labs, b_aniso, centric_and_selections, args.D_trans, args.S_trans, args.use
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1586
|
+
b_aniso = determine_ml_params(hkldata, is_int, fc_labs, D_labs, b_aniso, centric_and_selections, args.D_trans, args.S_trans, args.use,
|
|
1587
|
+
twin_data=twin_data)
|
|
1588
|
+
use = {"all": "all", "work": "work", "test": "work"}[args.use]
|
|
1589
|
+
if twin_data:
|
|
1590
|
+
# replace hkldata
|
|
1591
|
+
hkldata = calculate_maps_twin(hkldata, b_aniso, fc_labs, D_labs, twin_data, centric_and_selections, use)
|
|
1592
|
+
elif is_int:
|
|
1593
|
+
calculate_maps_int(hkldata, b_aniso, fc_labs, D_labs, centric_and_selections, use)
|
|
1389
1594
|
else:
|
|
1390
1595
|
log_out = "{}.log".format(args.output_prefix)
|
|
1391
|
-
calculate_maps(hkldata, b_aniso, centric_and_selections, fc_labs, D_labs, log_out,
|
|
1392
|
-
use={"all": "all", "work": "work", "test": "work"}[args.use])
|
|
1596
|
+
calculate_maps(hkldata, b_aniso, centric_and_selections, fc_labs, D_labs, log_out, use)
|
|
1393
1597
|
|
|
1394
1598
|
# Write mtz file
|
|
1395
|
-
if
|
|
1396
|
-
labs = ["
|
|
1599
|
+
if twin_data:
|
|
1600
|
+
labs = ["F_est"]
|
|
1601
|
+
elif is_int:
|
|
1602
|
+
labs = ["I", "SIGI"]
|
|
1397
1603
|
else:
|
|
1398
|
-
labs = ["FP", "SIGFP"
|
|
1399
|
-
labs.extend(["FWT", "DELFWT", "FC", "DFC"])
|
|
1604
|
+
labs = ["FP", "SIGFP"]
|
|
1605
|
+
labs.extend(["FOM", "FWT", "DELFWT", "FC", "DFC"])
|
|
1400
1606
|
if "FAN" in hkldata.df:
|
|
1401
1607
|
labs.append("FAN")
|
|
1402
1608
|
if not args.no_solvent:
|
|
1403
1609
|
labs.append("Fbulk")
|
|
1404
1610
|
if "FREE" in hkldata.df:
|
|
1405
1611
|
labs.append("FREE")
|
|
1612
|
+
if "F_true_est" in hkldata.df:
|
|
1613
|
+
labs.append("F_true_est")
|
|
1406
1614
|
labs += D_labs + ["S"]
|
|
1407
1615
|
mtz_out = args.output_prefix+".mtz"
|
|
1408
|
-
hkldata.write_mtz(mtz_out, labs=labs, types={"FOM": "W", "FP":"F", "SIGFP":"Q"})
|
|
1616
|
+
hkldata.write_mtz(mtz_out, labs=labs, types={"FOM": "W", "FP":"F", "SIGFP":"Q", "F_est": "F"})
|
|
1409
1617
|
return hkldata
|
|
1410
1618
|
# main()
|
|
1411
1619
|
if __name__ == "__main__":
|