servalcat 0.4.72__cp312-cp312-macosx_11_0_arm64.whl → 0.4.99__cp312-cp312-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of servalcat might be problematic. Click here for more details.
- servalcat/__init__.py +2 -2
- servalcat/ext.cpython-312-darwin.so +0 -0
- servalcat/refine/refine.py +152 -67
- servalcat/refine/refine_geom.py +32 -13
- servalcat/refine/refine_spa.py +70 -40
- servalcat/refine/refine_xtal.py +45 -13
- servalcat/refine/spa.py +15 -4
- servalcat/refine/xtal.py +147 -98
- servalcat/refmac/exte.py +7 -5
- servalcat/refmac/refmac_keywords.py +11 -9
- servalcat/refmac/refmac_wrapper.py +87 -60
- servalcat/spa/fofc.py +20 -3
- servalcat/spa/fsc.py +11 -11
- servalcat/spa/run_refmac.py +27 -12
- servalcat/spa/translate.py +2 -2
- servalcat/utils/commands.py +154 -4
- servalcat/utils/fileio.py +20 -10
- servalcat/utils/hkl.py +43 -29
- servalcat/utils/logger.py +25 -1
- servalcat/utils/maps.py +2 -2
- servalcat/utils/model.py +23 -10
- servalcat/utils/refmac.py +20 -1
- servalcat/utils/restraints.py +34 -25
- servalcat/utils/symmetry.py +5 -5
- servalcat/xtal/french_wilson.py +39 -31
- servalcat/xtal/sigmaa.py +382 -152
- servalcat/xtal/twin.py +121 -0
- {servalcat-0.4.72.dist-info → servalcat-0.4.99.dist-info}/METADATA +4 -4
- servalcat-0.4.99.dist-info/RECORD +45 -0
- {servalcat-0.4.72.dist-info → servalcat-0.4.99.dist-info}/WHEEL +1 -1
- servalcat-0.4.72.dist-info/RECORD +0 -44
- {servalcat-0.4.72.dist-info → servalcat-0.4.99.dist-info}/entry_points.txt +0 -0
- {servalcat-0.4.72.dist-info → servalcat-0.4.99.dist-info}/licenses/LICENSE +0 -0
servalcat/xtal/sigmaa.py
CHANGED
|
@@ -17,6 +17,7 @@ import scipy.optimize
|
|
|
17
17
|
from servalcat.utils import logger
|
|
18
18
|
from servalcat import utils
|
|
19
19
|
from servalcat import ext
|
|
20
|
+
from servalcat.xtal.twin import find_twin_domains_from_data, estimate_twin_fractions_from_model
|
|
20
21
|
|
|
21
22
|
"""
|
|
22
23
|
DFc = sum_j D_j F_c,j
|
|
@@ -29,6 +30,8 @@ def add_arguments(parser):
|
|
|
29
30
|
parser.description = 'Sigma-A parameter estimation for crystallographic data'
|
|
30
31
|
parser.add_argument('--hklin', required=True,
|
|
31
32
|
help='Input MTZ file')
|
|
33
|
+
parser.add_argument('--spacegroup',
|
|
34
|
+
help='Override space group')
|
|
32
35
|
parser.add_argument('--labin',
|
|
33
36
|
help='MTZ column for F,SIGF,FREE')
|
|
34
37
|
parser.add_argument('--free', type=int,
|
|
@@ -51,6 +54,7 @@ def add_arguments(parser):
|
|
|
51
54
|
help="Use CC(|F1|,|F2|) to CC(F1,F2) conversion to derive D and S")
|
|
52
55
|
parser.add_argument('--use', choices=["all", "work", "test"], default="all",
|
|
53
56
|
help="Which reflections to be used for the parameter estimate.")
|
|
57
|
+
parser.add_argument('--twin', action="store_true", help="Turn on twin refinement")
|
|
54
58
|
parser.add_argument('--mask',
|
|
55
59
|
help="A solvent mask (by default calculated from the coordinates)")
|
|
56
60
|
parser.add_argument('--keep_charges', action='store_true',
|
|
@@ -71,22 +75,28 @@ def nanaverage(cc, w):
|
|
|
71
75
|
return numpy.nan
|
|
72
76
|
return numpy.average(cc[sel], weights=w[sel])
|
|
73
77
|
|
|
74
|
-
def calc_r_and_cc(hkldata, centric_and_selections):
|
|
78
|
+
def calc_r_and_cc(hkldata, centric_and_selections, twin_data=None):
|
|
75
79
|
has_int = "I" in hkldata.df
|
|
76
80
|
has_free = "FREE" in hkldata.df
|
|
77
81
|
stats = hkldata.binned_df.copy()
|
|
78
|
-
stats["n_obs"] = 0
|
|
82
|
+
stats[["n_obs", "n_all"]] = 0
|
|
79
83
|
if has_free:
|
|
80
84
|
stats[["n_work", "n_free"]] = 0
|
|
81
|
-
rlab = "
|
|
85
|
+
rlab = "R1" if has_int else "R"
|
|
82
86
|
cclab = "CCI" if has_int else "CCF"
|
|
83
|
-
|
|
87
|
+
if twin_data:
|
|
88
|
+
Fc = numpy.sqrt(twin_data.i_calc_twin())
|
|
89
|
+
else:
|
|
90
|
+
Fc = numpy.abs(hkldata.df.FC * hkldata.df.k_aniso)
|
|
84
91
|
if has_int:
|
|
85
92
|
obs = hkldata.df.I
|
|
93
|
+
obs_sqrt = numpy.sqrt(numpy.maximum(0, hkldata.df.I))
|
|
94
|
+
obs_sqrt[hkldata.df.I/hkldata.df.SIGI < 2] = numpy.nan # SHELX equivalent
|
|
86
95
|
calc = Fc**2
|
|
96
|
+
calc_sqrt = Fc
|
|
87
97
|
else:
|
|
88
|
-
obs = hkldata.df.FP
|
|
89
|
-
calc = Fc
|
|
98
|
+
obs = obs_sqrt = hkldata.df.FP
|
|
99
|
+
calc = calc_sqrt = Fc
|
|
90
100
|
if has_free:
|
|
91
101
|
for lab in (cclab, rlab):
|
|
92
102
|
for suf in ("work", "free"):
|
|
@@ -97,15 +107,16 @@ def calc_r_and_cc(hkldata, centric_and_selections):
|
|
|
97
107
|
|
|
98
108
|
for i_bin, idxes in hkldata.binned():
|
|
99
109
|
stats.loc[i_bin, "n_obs"] = numpy.sum(numpy.isfinite(obs[idxes]))
|
|
110
|
+
stats.loc[i_bin, "n_all"] = len(idxes)
|
|
100
111
|
if has_free:
|
|
101
112
|
for j, suf in ((1, "work"), (2, "free")):
|
|
102
113
|
idxes2 = numpy.concatenate([sel[j] for sel in centric_and_selections[i_bin]])
|
|
103
114
|
stats.loc[i_bin, "n_"+suf] = numpy.sum(numpy.isfinite(obs[idxes2]))
|
|
104
115
|
stats.loc[i_bin, cclab+suf] = utils.hkl.correlation(obs[idxes2], calc[idxes2])
|
|
105
|
-
stats.loc[i_bin, rlab+suf] = utils.hkl.r_factor(
|
|
116
|
+
stats.loc[i_bin, rlab+suf] = utils.hkl.r_factor(obs_sqrt[idxes2], calc_sqrt[idxes2])
|
|
106
117
|
else:
|
|
107
118
|
stats.loc[i_bin, cclab] = utils.hkl.correlation(obs[idxes], calc[idxes])
|
|
108
|
-
stats.loc[i_bin, rlab] = utils.hkl.r_factor(
|
|
119
|
+
stats.loc[i_bin, rlab] = utils.hkl.r_factor(obs_sqrt[idxes], calc_sqrt[idxes])
|
|
109
120
|
|
|
110
121
|
# Overall
|
|
111
122
|
ret = {}
|
|
@@ -114,7 +125,7 @@ def calc_r_and_cc(hkldata, centric_and_selections):
|
|
|
114
125
|
ret[cclab+suf+"avg"] = nanaverage(stats[cclab+suf], stats["n_"+suf])
|
|
115
126
|
for j, suf in ((1, "work"), (2, "free")):
|
|
116
127
|
idxes = numpy.concatenate([sel[j] for i_bin, _ in hkldata.binned() for sel in centric_and_selections[i_bin]])
|
|
117
|
-
ret[rlab+suf] = utils.hkl.r_factor(
|
|
128
|
+
ret[rlab+suf] = utils.hkl.r_factor(obs_sqrt[idxes], calc_sqrt[idxes])
|
|
118
129
|
else:
|
|
119
130
|
ret[cclab+"avg"] = nanaverage(stats[cclab], stats["n_obs"])
|
|
120
131
|
ret[rlab] = utils.hkl.r_factor(obs, calc)
|
|
@@ -158,46 +169,63 @@ class LsqScale:
|
|
|
158
169
|
self.b_aniso = None
|
|
159
170
|
self.stats = {}
|
|
160
171
|
|
|
161
|
-
def set_data(self, hkldata, fc_list, use_int=False, sigma_cutoff=None):
|
|
172
|
+
def set_data(self, hkldata, fc_list, use_int=False, sigma_cutoff=None, twin_data=None):
|
|
162
173
|
assert 0 < len(fc_list) < 3
|
|
163
174
|
self.use_int = use_int
|
|
164
175
|
if sigma_cutoff is not None:
|
|
165
176
|
if use_int:
|
|
166
|
-
sel = hkldata.df.I / hkldata.df.SIGI > sigma_cutoff
|
|
177
|
+
self.sel = hkldata.df.I / hkldata.df.SIGI > sigma_cutoff
|
|
167
178
|
self.labcut = "(I/SIGI>{})".format(sigma_cutoff)
|
|
168
179
|
else:
|
|
169
|
-
sel = hkldata.df.FP / hkldata.df.SIGFP > sigma_cutoff
|
|
180
|
+
self.sel = hkldata.df.FP / hkldata.df.SIGFP > sigma_cutoff
|
|
170
181
|
self.labcut = "(F/SIGF>{})".format(sigma_cutoff)
|
|
171
182
|
else:
|
|
172
|
-
sel = hkldata.df.index
|
|
183
|
+
self.sel = hkldata.df.index
|
|
173
184
|
self.labcut = ""
|
|
174
|
-
self.obs = hkldata.df["I" if use_int else "FP"].to_numpy()
|
|
175
|
-
self.
|
|
176
|
-
self.
|
|
177
|
-
self.
|
|
185
|
+
self.obs = hkldata.df["I" if use_int else "FP"].to_numpy(copy=True)
|
|
186
|
+
self.obs[~self.sel] = numpy.nan
|
|
187
|
+
self.calc = [x for x in fc_list]
|
|
188
|
+
self.s2mat = hkldata.ssq_mat()
|
|
189
|
+
self.s2 = 1. / hkldata.d_spacings().to_numpy()**2
|
|
178
190
|
self.adpdirs = utils.model.adp_constraints(hkldata.sg.operations(), hkldata.cell, tr0=False)
|
|
191
|
+
self.twin_data = twin_data
|
|
179
192
|
if use_int:
|
|
180
193
|
self.sqrt_obs = numpy.sqrt(self.obs)
|
|
181
194
|
|
|
182
195
|
def get_solvent_scale(self, k_sol, b_sol, s2=None):
|
|
183
196
|
if s2 is None: s2 = self.s2
|
|
184
197
|
return k_sol * numpy.exp(-b_sol * s2 / 4)
|
|
185
|
-
|
|
186
|
-
def
|
|
198
|
+
|
|
199
|
+
def fc_and_mask_grad(self, x):
|
|
187
200
|
fc0 = self.calc[0]
|
|
188
201
|
if len(self.calc) == 2:
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
202
|
+
if self.twin_data:
|
|
203
|
+
r = self.twin_data.scaling_fc_and_mask_grad(self.calc[1], x[-2], x[-1])
|
|
204
|
+
return r[:,0], r[:,1], r[:,2]
|
|
205
|
+
else:
|
|
206
|
+
fmask = self.calc[1]
|
|
207
|
+
temp_sol = numpy.exp(-x[-1] * self.s2 / 4)
|
|
208
|
+
fbulk = x[-2] * temp_sol * fmask
|
|
209
|
+
fc = fc0 + fbulk
|
|
210
|
+
re_fmask_fcconj = (fmask * fc.conj()).real
|
|
211
|
+
fc_abs = numpy.abs(fc)
|
|
212
|
+
tmp = temp_sol / fc_abs * re_fmask_fcconj
|
|
213
|
+
return fc_abs, tmp, -tmp * x[-2] * self.s2 / 4
|
|
192
214
|
else:
|
|
193
|
-
|
|
215
|
+
if self.twin_data:
|
|
216
|
+
return numpy.sqrt(self.twin_data.i_calc_twin()), None, None
|
|
217
|
+
else:
|
|
218
|
+
return numpy.abs(fc0), None, None
|
|
219
|
+
|
|
220
|
+
def scaled_fc(self, x):
|
|
221
|
+
fc = self.fc_and_mask_grad(x)[0]
|
|
194
222
|
nadp = self.adpdirs.shape[0]
|
|
195
223
|
B = numpy.dot(x[1:nadp+1], self.adpdirs)
|
|
196
224
|
kani = numpy.exp(numpy.dot(-B, self.s2mat))
|
|
197
225
|
return self.k_trans(x[0]) * kani * fc
|
|
198
226
|
|
|
199
227
|
def target(self, x):
|
|
200
|
-
y =
|
|
228
|
+
y = self.scaled_fc(x)
|
|
201
229
|
if self.use_int:
|
|
202
230
|
diff = self.sqrt_obs - y
|
|
203
231
|
#y2 = y**2
|
|
@@ -214,18 +242,10 @@ class LsqScale:
|
|
|
214
242
|
|
|
215
243
|
def grad(self, x):
|
|
216
244
|
g = numpy.zeros_like(x)
|
|
217
|
-
|
|
218
|
-
if len(self.calc) == 2:
|
|
219
|
-
fmask = self.calc[1]
|
|
220
|
-
temp_sol = numpy.exp(-x[-1] * self.s2 / 4)
|
|
221
|
-
fbulk = x[-2] * temp_sol * fmask
|
|
222
|
-
fc = fc0 + fbulk
|
|
223
|
-
else:
|
|
224
|
-
fc = fc0
|
|
245
|
+
fc_abs, der_ksol, der_bsol = self.fc_and_mask_grad(x)
|
|
225
246
|
nadp = self.adpdirs.shape[0]
|
|
226
247
|
B = numpy.dot(x[1:nadp+1], self.adpdirs)
|
|
227
248
|
kani = numpy.exp(numpy.dot(-B, self.s2mat))
|
|
228
|
-
fc_abs = numpy.abs(fc)
|
|
229
249
|
k = self.k_trans(x[0])
|
|
230
250
|
y = k * kani * fc_abs
|
|
231
251
|
if self.use_int:
|
|
@@ -247,30 +267,19 @@ class LsqScale:
|
|
|
247
267
|
g[0] = numpy.nansum(kani * fc_abs * dfdy * self.k_trans_der(x[0]))
|
|
248
268
|
g[1:nadp+1] = numpy.dot(dfdb, self.adpdirs.T)
|
|
249
269
|
if len(self.calc) == 2:
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
g[-2] = numpy.nansum(tmp * dfdy)
|
|
253
|
-
g[-1] = numpy.nansum(-tmp * dfdy * x[-2] * self.s2 / 4)
|
|
270
|
+
g[-2] = numpy.nansum(k * kani * der_ksol * dfdy)
|
|
271
|
+
g[-1] = numpy.nansum(k * kani * der_bsol * dfdy)
|
|
254
272
|
|
|
255
273
|
return g
|
|
256
274
|
|
|
257
275
|
def calc_shift(self, x):
|
|
258
276
|
# TODO: sort out code duplication, if we use this.
|
|
259
|
-
g = numpy.zeros((len(self.
|
|
277
|
+
g = numpy.zeros((len(self.obs), len(x)))
|
|
260
278
|
H = numpy.zeros((len(x), len(x)))
|
|
261
|
-
|
|
262
|
-
fc0 = self.calc[0]
|
|
263
|
-
if len(self.calc) == 2:
|
|
264
|
-
fmask = self.calc[1]
|
|
265
|
-
temp_sol = numpy.exp(-x[-1] * self.s2 / 4)
|
|
266
|
-
fbulk = x[-2] * temp_sol * fmask
|
|
267
|
-
fc = fc0 + fbulk
|
|
268
|
-
else:
|
|
269
|
-
fc = fc0
|
|
279
|
+
fc_abs, der_ksol, der_bsol = self.fc_and_mask_grad(x)
|
|
270
280
|
nadp = self.adpdirs.shape[0]
|
|
271
281
|
B = numpy.dot(x[1:nadp+1], self.adpdirs)
|
|
272
282
|
kani = numpy.exp(numpy.dot(-B, self.s2mat))
|
|
273
|
-
fc_abs = numpy.abs(fc)
|
|
274
283
|
k = self.k_trans(x[0])
|
|
275
284
|
y = k * kani * fc_abs
|
|
276
285
|
if self.use_int:
|
|
@@ -297,27 +306,20 @@ class LsqScale:
|
|
|
297
306
|
g[:,0] = kani * fc_abs * self.k_trans_der(x[0])
|
|
298
307
|
g[:,1:nadp+1] = numpy.dot(dfdb.T, self.adpdirs.T)
|
|
299
308
|
if len(self.calc) == 2:
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
g[:,-2] = tmp
|
|
303
|
-
g[:,-1] = -tmp * x[-2] * self.s2 / 4
|
|
309
|
+
g[:,-2] = k * kani * der_ksol
|
|
310
|
+
g[:,-1] = k * kani * der_bsol
|
|
304
311
|
|
|
305
|
-
#
|
|
312
|
+
# no numpy.nandot..
|
|
313
|
+
g, dfdy, dfdy2 = g[self.sel, :], dfdy[self.sel], dfdy2[self.sel]
|
|
306
314
|
H = numpy.dot(g.T, g * dfdy2[:,None])
|
|
307
315
|
g = numpy.sum(dfdy[:,None] * g, axis=0)
|
|
308
316
|
dx = -numpy.dot(g, numpy.linalg.pinv(H))
|
|
309
317
|
return dx
|
|
310
318
|
|
|
311
319
|
def initial_kb(self):
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
fbulk = self.get_solvent_scale(self.k_sol, self.b_sol) * fmask
|
|
316
|
-
fc = fc0 + fbulk
|
|
317
|
-
else:
|
|
318
|
-
fc = fc0
|
|
319
|
-
sel = self.obs > 0
|
|
320
|
-
f1p, f2p, s2p = self.obs[sel], numpy.abs(fc)[sel], self.s2[sel]
|
|
320
|
+
fc_abs = self.fc_and_mask_grad([self.k_sol, self.b_sol])[0]
|
|
321
|
+
sel = self.obs > 0 # exclude nan as well
|
|
322
|
+
f1p, f2p, s2p = self.obs[sel], fc_abs[sel], self.s2[sel]
|
|
321
323
|
if self.use_int: f2p *= f2p
|
|
322
324
|
tmp = numpy.log(f2p) - numpy.log(f1p)
|
|
323
325
|
# g = [dT/dk, dT/db]
|
|
@@ -418,7 +420,7 @@ class LsqScale:
|
|
|
418
420
|
self.k_sol = res_x[-2]
|
|
419
421
|
self.b_sol = res_x[-1]
|
|
420
422
|
logger.writeln(" k_sol= {:.2e} B_sol= {:.2e}".format(self.k_sol, self.b_sol))
|
|
421
|
-
calc =
|
|
423
|
+
calc = self.scaled_fc(res_x)
|
|
422
424
|
if self.use_int: calc *= calc
|
|
423
425
|
self.stats["cc"] = utils.hkl.correlation(self.obs, calc)
|
|
424
426
|
self.stats["r"] = utils.hkl.r_factor(self.obs, calc)
|
|
@@ -510,6 +512,43 @@ def mli_shift_S(df, fc_labs, Ds, S, k_ani, idxes):
|
|
|
510
512
|
return -g / H
|
|
511
513
|
# mli_shift_S()
|
|
512
514
|
|
|
515
|
+
def mltwin_est_ftrue(twin_data, df, k_ani, idxes):
|
|
516
|
+
kani2_inv = 1 / k_ani**2
|
|
517
|
+
i_sigi = numpy.empty((2, len(df.index)))
|
|
518
|
+
i_sigi[:] = numpy.nan
|
|
519
|
+
i_sigi[0, idxes] = (df.I.to_numpy() * kani2_inv)[idxes]
|
|
520
|
+
i_sigi[1, idxes] = (df.SIGI.to_numpy() * kani2_inv)[idxes]
|
|
521
|
+
twin_data.est_f_true(i_sigi[0,:], i_sigi[1,:])
|
|
522
|
+
# mltwin_est_ftrue()
|
|
523
|
+
|
|
524
|
+
def mltwin(df, twin_data, Ds, S, k_ani, idxes, i_bin):
|
|
525
|
+
twin_data.ml_sigma[i_bin] = S
|
|
526
|
+
twin_data.ml_scale[i_bin, :] = Ds
|
|
527
|
+
mltwin_est_ftrue(twin_data, df, k_ani, idxes)
|
|
528
|
+
return twin_data.ll()
|
|
529
|
+
# mltwin()
|
|
530
|
+
|
|
531
|
+
def deriv_mltwin_wrt_D_S(df, twin_data, Ds, S, k_ani, idxes, i_bin):
|
|
532
|
+
twin_data.ml_sigma[i_bin] = S
|
|
533
|
+
twin_data.ml_scale[i_bin, :] = Ds
|
|
534
|
+
mltwin_est_ftrue(twin_data, df, k_ani, idxes)
|
|
535
|
+
r = twin_data.ll_der_D_S()
|
|
536
|
+
g = numpy.zeros(r.shape[1])
|
|
537
|
+
g[:-1] = numpy.nansum(r[:,:-1], axis=0) # D
|
|
538
|
+
g[-1] = numpy.nansum(r[:,-1]) # S
|
|
539
|
+
return g
|
|
540
|
+
# deriv_mlf_wrt_D_S()
|
|
541
|
+
|
|
542
|
+
def mltwin_shift_S(df, twin_data, Ds, S, k_ani, idxes, i_bin):
|
|
543
|
+
twin_data.ml_sigma[i_bin] = S
|
|
544
|
+
twin_data.ml_scale[i_bin, :] = Ds
|
|
545
|
+
mltwin_est_ftrue(twin_data, df, k_ani, idxes)
|
|
546
|
+
r = twin_data.ll_der_D_S()
|
|
547
|
+
g = numpy.nansum(r[:,-1])
|
|
548
|
+
H = numpy.nansum(r[:,-1]**2) # approximating expectation value of second derivative
|
|
549
|
+
return -g / H
|
|
550
|
+
# mlf_shift_S()
|
|
551
|
+
|
|
513
552
|
def determine_mlf_params_from_cc(hkldata, fc_labs, D_labs, centric_and_selections, use="all", smoothing="gauss"):
|
|
514
553
|
# theorhetical values
|
|
515
554
|
cc_a = lambda cc: (numpy.pi/4*(1-cc**2)**2 * scipy.special.hyp2f1(3/2, 3/2, 1, cc**2) - numpy.pi/4) / (1-numpy.pi/4)
|
|
@@ -594,7 +633,7 @@ def determine_mlf_params_from_cc(hkldata, fc_labs, D_labs, centric_and_selection
|
|
|
594
633
|
smooth_params(hkldata, D_labs, smoothing)
|
|
595
634
|
# determine_mlf_params_from_cc()
|
|
596
635
|
|
|
597
|
-
def initialize_ml_params(hkldata, use_int, D_labs, b_aniso, centric_and_selections, use):
|
|
636
|
+
def initialize_ml_params(hkldata, use_int, D_labs, b_aniso, centric_and_selections, use, twin_data=None):
|
|
598
637
|
# Initial values
|
|
599
638
|
for lab in D_labs: hkldata.binned_df[lab] = 1.
|
|
600
639
|
hkldata.binned_df["S"] = 10000.
|
|
@@ -614,8 +653,11 @@ def initialize_ml_params(hkldata, use_int, D_labs, b_aniso, centric_and_selectio
|
|
|
614
653
|
Io = hkldata.df.I.to_numpy()[idxes]
|
|
615
654
|
else:
|
|
616
655
|
Io = hkldata.df.FP.to_numpy()[idxes]**2
|
|
617
|
-
Io /= k_ani[idxes]**2
|
|
618
|
-
|
|
656
|
+
Io /= k_ani[idxes]**2
|
|
657
|
+
if twin_data:
|
|
658
|
+
Ic = twin_data.i_calc_twin()[idxes]
|
|
659
|
+
else:
|
|
660
|
+
Ic = numpy.abs(hkldata.df.FC.to_numpy()[idxes])**2
|
|
619
661
|
mean_Io = numpy.mean(Io)
|
|
620
662
|
mean_Ic = numpy.mean(Ic)
|
|
621
663
|
cc = numpy.corrcoef(Io, Ic)[1,0]
|
|
@@ -635,16 +677,21 @@ def initialize_ml_params(hkldata, use_int, D_labs, b_aniso, centric_and_selectio
|
|
|
635
677
|
min_D = hkldata.binned_df[D_lab][hkldata.binned_df[D_lab] > 0].min() * 0.1
|
|
636
678
|
logger.writeln("WARNING: negative {} is detected from initial estimates. Replacing it using minimum positive value {:.2e}".format(D_lab, min_D))
|
|
637
679
|
hkldata.binned_df[D_lab].where(hkldata.binned_df[D_lab] > 0, min_D, inplace=True) # arbitrary
|
|
638
|
-
|
|
680
|
+
|
|
681
|
+
if twin_data:
|
|
682
|
+
twin_data.ml_scale[:] = hkldata.binned_df.loc[:, D_labs]
|
|
683
|
+
twin_data.ml_sigma[:] = hkldata.binned_df.loc[:, "S"]
|
|
684
|
+
|
|
639
685
|
logger.writeln("Initial estimates:")
|
|
640
686
|
logger.writeln(hkldata.binned_df.to_string())
|
|
641
687
|
# initialize_ml_params()
|
|
642
688
|
|
|
643
689
|
def determine_ml_params(hkldata, use_int, fc_labs, D_labs, b_aniso, centric_and_selections,
|
|
644
|
-
D_trans=None, S_trans=None, use="all", n_cycle=1, smoothing="gauss"
|
|
690
|
+
D_trans=None, S_trans=None, use="all", n_cycle=1, smoothing="gauss",
|
|
691
|
+
twin_data=None):
|
|
645
692
|
assert use in ("all", "work", "test")
|
|
646
693
|
assert smoothing in (None, "gauss")
|
|
647
|
-
logger.writeln("Estimating sigma-A parameters using {}..".format("intensities" if use_int else "amplitudes"))
|
|
694
|
+
logger.writeln("Estimating sigma-A parameters using {}..".format(("intensities" if use_int else "amplitudes") + " (twin)" if twin_data else ""))
|
|
648
695
|
trans = VarTrans(D_trans, S_trans)
|
|
649
696
|
lab_obs = "I" if use_int else "FP"
|
|
650
697
|
def get_idxes(i_bin):
|
|
@@ -655,7 +702,7 @@ def determine_ml_params(hkldata, use_int, fc_labs, D_labs, b_aniso, centric_and_
|
|
|
655
702
|
return numpy.concatenate([sel[i] for sel in centric_and_selections[i_bin]])
|
|
656
703
|
|
|
657
704
|
if not set(D_labs + ["S"]).issubset(hkldata.binned_df):
|
|
658
|
-
initialize_ml_params(hkldata, use_int, D_labs, b_aniso, centric_and_selections, use)
|
|
705
|
+
initialize_ml_params(hkldata, use_int, D_labs, b_aniso, centric_and_selections, use, twin_data=twin_data)
|
|
659
706
|
for dlab, fclab in zip(D_labs, fc_labs):
|
|
660
707
|
hkldata.binned_df["Mn(|{}*{}|)".format(dlab, fclab)] = numpy.nan
|
|
661
708
|
|
|
@@ -681,8 +728,12 @@ def determine_ml_params(hkldata, use_int, fc_labs, D_labs, b_aniso, centric_and_
|
|
|
681
728
|
else:
|
|
682
729
|
Ds = [hkldata.binned_df.loc[i_bin, lab] for lab in D_labs]
|
|
683
730
|
S = trans.S(x[-1])
|
|
684
|
-
|
|
685
|
-
|
|
731
|
+
|
|
732
|
+
if twin_data:
|
|
733
|
+
return mltwin(hkldata.df, twin_data, Ds, S, k_ani, idxes, i_bin)
|
|
734
|
+
else:
|
|
735
|
+
f = mli if use_int else mlf
|
|
736
|
+
return f(hkldata.df, fc_labs, Ds, S, k_ani, idxes)
|
|
686
737
|
|
|
687
738
|
def grad(x):
|
|
688
739
|
if refpar == "all":
|
|
@@ -697,8 +748,11 @@ def determine_ml_params(hkldata, use_int, fc_labs, D_labs, b_aniso, centric_and_
|
|
|
697
748
|
Ds = [hkldata.binned_df.loc[i_bin, lab] for lab in D_labs]
|
|
698
749
|
S = trans.S(x[-1])
|
|
699
750
|
n_par = 1
|
|
700
|
-
|
|
701
|
-
|
|
751
|
+
if twin_data:
|
|
752
|
+
r = deriv_mltwin_wrt_D_S(hkldata.df, twin_data, Ds, S, k_ani, idxes, i_bin)
|
|
753
|
+
else:
|
|
754
|
+
calc_deriv = deriv_mli_wrt_D_S if use_int else deriv_mlf_wrt_D_S
|
|
755
|
+
r = calc_deriv(hkldata.df, fc_labs, Ds, S, k_ani, idxes)
|
|
702
756
|
g = numpy.zeros(n_par)
|
|
703
757
|
if refpar in ("all", "D"):
|
|
704
758
|
g[:len(fc_labs)] = r[:len(fc_labs)]
|
|
@@ -723,6 +777,18 @@ def determine_ml_params(hkldata, use_int, fc_labs, D_labs, b_aniso, centric_and_
|
|
|
723
777
|
for ids in range(10):
|
|
724
778
|
refpar = "D"
|
|
725
779
|
x0 = numpy.array([trans.D_inv(hkldata.binned_df.loc[i_bin, lab]) for lab in D_labs])
|
|
780
|
+
#print("MLTWIN=", target(x0))
|
|
781
|
+
#quit()
|
|
782
|
+
if 0:
|
|
783
|
+
h = 1e-3
|
|
784
|
+
f00 = target(x0)
|
|
785
|
+
g00 = grad(x0)
|
|
786
|
+
for ii in range(len(x0)):
|
|
787
|
+
xx = x0.copy()
|
|
788
|
+
xx[ii] += h
|
|
789
|
+
f01 = target(xx)
|
|
790
|
+
nder = (f01 - f00) / h
|
|
791
|
+
logger.writeln(f"DEBUG_der_D bin_{i_bin} {ii} ad={g00[ii]} nd={nder} r={g00[ii]/nder}")
|
|
726
792
|
vals_now = []
|
|
727
793
|
if 0:
|
|
728
794
|
f0 = target(x0)
|
|
@@ -758,15 +824,29 @@ def determine_ml_params(hkldata, use_int, fc_labs, D_labs, b_aniso, centric_and_
|
|
|
758
824
|
for i, lab in enumerate(D_labs):
|
|
759
825
|
hkldata.binned_df.loc[i_bin, lab] = trans.D(res.x[i])
|
|
760
826
|
vals_now.append(hkldata.binned_df.loc[i_bin, lab])
|
|
827
|
+
if twin_data:
|
|
828
|
+
twin_data.ml_scale[i_bin, :] = trans.D(res.x)
|
|
761
829
|
refpar = "S"
|
|
762
830
|
if 1:
|
|
763
831
|
for cyc_s in range(1):
|
|
764
832
|
x0 = trans.S_inv(hkldata.binned_df.loc[i_bin, "S"])
|
|
833
|
+
if 0:
|
|
834
|
+
h = 1e-1
|
|
835
|
+
f00 = target([x0])
|
|
836
|
+
g00 = grad([x0])
|
|
837
|
+
xx = x0 + h
|
|
838
|
+
f01 = target([xx])
|
|
839
|
+
nder = (f01 - f00) / h
|
|
840
|
+
logger.writeln(f"DEBUG_der_S bin_{i_bin} ad={g00} nd={nder} r={g00/nder}")
|
|
841
|
+
|
|
765
842
|
f0 = target([x0])
|
|
766
843
|
Ds = [hkldata.binned_df.loc[i_bin, lab] for lab in D_labs]
|
|
767
844
|
nfev_total += 1
|
|
768
|
-
|
|
769
|
-
|
|
845
|
+
if twin_data:
|
|
846
|
+
shift = mltwin_shift_S(hkldata.df, twin_data, Ds, trans.S(x0), k_ani, idxes, i_bin)
|
|
847
|
+
else:
|
|
848
|
+
calc_shift_S = mli_shift_S if use_int else mlf_shift_S
|
|
849
|
+
shift = calc_shift_S(hkldata.df, fc_labs, Ds, trans.S(x0), k_ani, idxes)
|
|
770
850
|
shift /= trans.S_deriv(x0)
|
|
771
851
|
if abs(shift) < 1e-3: break
|
|
772
852
|
for itry in range(10):
|
|
@@ -787,6 +867,8 @@ def determine_ml_params(hkldata, use_int, fc_labs, D_labs, b_aniso, centric_and_
|
|
|
787
867
|
else:
|
|
788
868
|
#print("all bad")
|
|
789
869
|
break
|
|
870
|
+
if twin_data:
|
|
871
|
+
twin_data.ml_sigma[i_bin] = hkldata.binned_df.loc[i_bin, "S"]
|
|
790
872
|
else:
|
|
791
873
|
# somehow this does not work well.
|
|
792
874
|
x0 = [trans.S_inv(hkldata.binned_df.loc[i_bin, "S"])]
|
|
@@ -796,6 +878,8 @@ def determine_ml_params(hkldata, use_int, fc_labs, D_labs, b_aniso, centric_and_
|
|
|
796
878
|
#print(i_bin, "mini cycle", ids, refpar)
|
|
797
879
|
#print(res)
|
|
798
880
|
hkldata.binned_df.loc[i_bin, "S"] = trans.S(res.x[-1])
|
|
881
|
+
if twin_data:
|
|
882
|
+
twin_data.ml_sigma[i_bin] = trans.S(res.x[-1])
|
|
799
883
|
vals_now.append(hkldata.binned_df.loc[i_bin, "S"])
|
|
800
884
|
vals_now = numpy.array(vals_now)
|
|
801
885
|
if vals_last is not None and numpy.all(numpy.abs((vals_last - vals_now) / vals_now) < 1e-2):
|
|
@@ -812,17 +896,30 @@ def determine_ml_params(hkldata, use_int, fc_labs, D_labs, b_aniso, centric_and_
|
|
|
812
896
|
for i, lab in enumerate(D_labs):
|
|
813
897
|
hkldata.binned_df.loc[i_bin, lab] = trans.D(res.x[i])
|
|
814
898
|
hkldata.binned_df.loc[i_bin, "S"] = trans.S(res.x[-1])
|
|
899
|
+
if twin_data:
|
|
900
|
+
twin_data.ml_scale[i_bin, :] = trans.D(res.x[:-1])
|
|
901
|
+
twin_data.ml_sigma[i_bin] = trans.S(res.x[-1])
|
|
815
902
|
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
903
|
+
if twin_data:
|
|
904
|
+
dfc = numpy.abs(twin_data.f_calc) * twin_data.ml_scale_array()
|
|
905
|
+
for i_bin, idxes in hkldata.binned():
|
|
906
|
+
dfc_bin = dfc[numpy.asarray(twin_data.bin)==i_bin,:]
|
|
907
|
+
mean_dfc = numpy.nanmean(dfc_bin, axis=0)
|
|
908
|
+
for i, (dlab, fclab) in enumerate(zip(D_labs, fc_labs)):
|
|
909
|
+
hkldata.binned_df.loc[i_bin, "Mn(|{}*{}|)".format(dlab, fclab)] = mean_dfc[i]
|
|
910
|
+
else:
|
|
911
|
+
for i_bin, idxes in hkldata.binned():
|
|
912
|
+
for dlab, fclab in zip(D_labs, fc_labs):
|
|
913
|
+
mean_dfc = numpy.nanmean(numpy.abs(hkldata.binned_df[dlab][i_bin] * hkldata.df[fclab][idxes]))
|
|
914
|
+
hkldata.binned_df.loc[i_bin, "Mn(|{}*{}|)".format(dlab, fclab)] = mean_dfc
|
|
820
915
|
|
|
821
916
|
logger.writeln("Refined estimates:")
|
|
822
917
|
logger.writeln(hkldata.binned_df.to_string())
|
|
918
|
+
#numpy.testing.assert_allclose(hkldata.binned_df.S, twin_data.ml_sigma)
|
|
919
|
+
#numpy.testing.assert_allclose(hkldata.binned_df[D_labs], twin_data.ml_scale)
|
|
823
920
|
logger.writeln("time: {:.1f} sec ({} evaluations)".format(time.time() - t0, nfev_total))
|
|
824
921
|
|
|
825
|
-
if not use_int:
|
|
922
|
+
if not use_int or twin_data:
|
|
826
923
|
break # did not implement MLF B_aniso optimization
|
|
827
924
|
|
|
828
925
|
# Refine b_aniso
|
|
@@ -900,7 +997,7 @@ def determine_ml_params(hkldata, use_int, fc_labs, D_labs, b_aniso, centric_and_
|
|
|
900
997
|
return b_aniso
|
|
901
998
|
# determine_ml_params()
|
|
902
999
|
|
|
903
|
-
def smooth_params(hkldata, D_labs, smoothing):
|
|
1000
|
+
def smooth_params(hkldata, D_labs, smoothing): # XXX twin_data
|
|
904
1001
|
if smoothing is None or len(hkldata.binned()) < 2:
|
|
905
1002
|
for i, lab in enumerate(D_labs + ["S"]):
|
|
906
1003
|
hkldata.df[lab] = hkldata.binned_data_as_array(lab)
|
|
@@ -924,10 +1021,9 @@ def smooth_params(hkldata, D_labs, smoothing):
|
|
|
924
1021
|
# smooth_params()
|
|
925
1022
|
|
|
926
1023
|
def expected_F_from_int(Io, sigIo, k_ani, DFc, eps, c, S):
|
|
927
|
-
if c == 0
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
k_num, k_den = 0., -0.5
|
|
1024
|
+
k_num = numpy.repeat(0.5 if c == 0 else 0., Io.size) # 0.5 if acentric
|
|
1025
|
+
k_den = k_num - 0.5
|
|
1026
|
+
if numpy.isscalar(c): c = numpy.repeat(c, Io.size)
|
|
931
1027
|
to = Io / sigIo - sigIo / (c+1) / k_ani**2 / S / eps
|
|
932
1028
|
tf = k_ani * numpy.abs(DFc) / numpy.sqrt(sigIo)
|
|
933
1029
|
sig1 = k_ani**2 * S * eps / sigIo
|
|
@@ -982,32 +1078,80 @@ def calculate_maps_int(hkldata, b_aniso, fc_labs, D_labs, centric_and_selections
|
|
|
982
1078
|
hkldata.df.loc[cidxes[fill_sel], "FWT"] = DFc[cidxes][fill_sel]
|
|
983
1079
|
# calculate_maps_int()
|
|
984
1080
|
|
|
1081
|
+
def calculate_maps_twin(hkldata, b_aniso, fc_labs, D_labs, twin_data, centric_and_selections, use="all"):
|
|
1082
|
+
k_ani2_inv = 1 / hkldata.debye_waller_factors(b_cart=b_aniso)**2
|
|
1083
|
+
Io = hkldata.df.I.to_numpy(copy=True) * k_ani2_inv
|
|
1084
|
+
sigIo = hkldata.df.SIGI.to_numpy(copy=True) * k_ani2_inv
|
|
1085
|
+
# Mask Io
|
|
1086
|
+
for i_bin, idxes in hkldata.binned():
|
|
1087
|
+
for c, work, test in centric_and_selections[i_bin]:
|
|
1088
|
+
if use != "all":
|
|
1089
|
+
tohide = test if use == "work" else work
|
|
1090
|
+
Io[tohide] = numpy.nan
|
|
1091
|
+
|
|
1092
|
+
twin_data.est_f_true(Io, sigIo)
|
|
1093
|
+
Ds = twin_data.ml_scale_array()
|
|
1094
|
+
DFc = (twin_data.f_calc * Ds).sum(axis=1)
|
|
1095
|
+
exp_ip = numpy.exp(numpy.angle(DFc)*1j)
|
|
1096
|
+
Ft = numpy.asarray(twin_data.f_true_max)
|
|
1097
|
+
m = twin_data.calc_fom()
|
|
1098
|
+
Fexp = twin_data.expected_F(Io, sigIo)
|
|
1099
|
+
if 1:
|
|
1100
|
+
fwt = numpy.where(numpy.asarray(twin_data.centric) == 0,
|
|
1101
|
+
2 * m * Ft * exp_ip - DFc,
|
|
1102
|
+
m * Ft * exp_ip)
|
|
1103
|
+
delfwt = m * Ft * exp_ip - DFc
|
|
1104
|
+
else: # based on "more accurate" evaluation of <m|F|>
|
|
1105
|
+
fwt = numpy.where(numpy.asarray(twin_data.centric) == 0,
|
|
1106
|
+
2 * Fexp * exp_ip - DFc,
|
|
1107
|
+
m * Fexp * exp_ip)
|
|
1108
|
+
delfwt = Fexp * exp_ip - DFc
|
|
1109
|
+
|
|
1110
|
+
sel = numpy.isnan(fwt)
|
|
1111
|
+
fwt[sel] = DFc[sel]
|
|
1112
|
+
|
|
1113
|
+
hkldata2 = utils.hkl.HklData(hkldata.cell, hkldata.sg,
|
|
1114
|
+
utils.hkl.df_from_twin_data(twin_data, fc_labs))
|
|
1115
|
+
hkldata2.df["FWT"] = fwt
|
|
1116
|
+
hkldata2.df["DELFWT"] = delfwt
|
|
1117
|
+
hkldata2.df["FOM"] = m
|
|
1118
|
+
hkldata2.df["F_est"] = Ft
|
|
1119
|
+
hkldata2.df["F_exp"] = Fexp
|
|
1120
|
+
hkldata2.df["FC"] = twin_data.f_calc.sum(axis=1)
|
|
1121
|
+
hkldata2.df["DFC"] = DFc
|
|
1122
|
+
hkldata2.df[D_labs] = Ds
|
|
1123
|
+
hkldata2.df["S"] = twin_data.ml_sigma_array()
|
|
1124
|
+
return hkldata2
|
|
1125
|
+
# calculate_maps_twin()
|
|
1126
|
+
|
|
985
1127
|
def merge_models(sts): # simply merge models. no fix in chain ids etc.
|
|
986
|
-
|
|
987
|
-
del
|
|
988
|
-
model = gemmi.Model(
|
|
1128
|
+
st2 = sts[0].clone()
|
|
1129
|
+
del st2[:]
|
|
1130
|
+
model = gemmi.Model(1)
|
|
989
1131
|
for st in sts:
|
|
990
1132
|
for m in st:
|
|
991
1133
|
for c in m:
|
|
992
1134
|
model.add_chain(c)
|
|
993
|
-
|
|
994
|
-
return
|
|
1135
|
+
st2.add_model(model)
|
|
1136
|
+
return st2
|
|
995
1137
|
# merge_models()
|
|
996
1138
|
|
|
997
|
-
def decide_mtz_labels(mtz, find_free=True):
|
|
1139
|
+
def decide_mtz_labels(mtz, find_free=True, require=None):
|
|
1140
|
+
# F is preferred for now by default
|
|
1141
|
+
obs_types = ("F", "J", "G", "K")
|
|
1142
|
+
if require:
|
|
1143
|
+
assert set(require).issubset(obs_types)
|
|
1144
|
+
else:
|
|
1145
|
+
require = obs_types
|
|
998
1146
|
dlabs = utils.hkl.mtz_find_data_columns(mtz)
|
|
999
1147
|
logger.writeln("Finding possible options from MTZ:")
|
|
1000
1148
|
for typ in dlabs:
|
|
1001
1149
|
for labs in dlabs[typ]:
|
|
1002
1150
|
logger.writeln(" --labin '{}'".format(",".join(labs)))
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
elif dlabs["G"]:
|
|
1008
|
-
labin = dlabs["G"][0]
|
|
1009
|
-
elif dlabs["K"]:
|
|
1010
|
-
labin = dlabs["K"][0]
|
|
1151
|
+
for typ in require:
|
|
1152
|
+
if dlabs[typ]:
|
|
1153
|
+
labin = dlabs[typ][0]
|
|
1154
|
+
break
|
|
1011
1155
|
else:
|
|
1012
1156
|
raise RuntimeError("Data not found from mtz")
|
|
1013
1157
|
if find_free:
|
|
@@ -1019,7 +1163,8 @@ def decide_mtz_labels(mtz, find_free=True):
|
|
|
1019
1163
|
# decide_mtz_labels()
|
|
1020
1164
|
|
|
1021
1165
|
def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=None,
|
|
1022
|
-
n_per_bin=None, use="all", max_bins=None, cif_index=0, keep_charges=False
|
|
1166
|
+
n_per_bin=None, use="all", max_bins=None, cif_index=0, keep_charges=False,
|
|
1167
|
+
allow_unusual_occupancies=False, space_group=None):
|
|
1023
1168
|
if labin: assert 1 < len(labin) < 6
|
|
1024
1169
|
assert use in ("all", "work", "test")
|
|
1025
1170
|
assert n_bins or n_per_bin #if n_bins not set, n_per_bin should be given
|
|
@@ -1042,6 +1187,9 @@ def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=
|
|
|
1042
1187
|
st, mtz = utils.fileio.read_small_molecule_files([hklin, xyzins[0]])
|
|
1043
1188
|
sts = [st]
|
|
1044
1189
|
|
|
1190
|
+
for st in sts:
|
|
1191
|
+
utils.model.check_occupancies(st, raise_error=not allow_unusual_occupancies)
|
|
1192
|
+
|
|
1045
1193
|
if not labin:
|
|
1046
1194
|
labin = decide_mtz_labels(mtz)
|
|
1047
1195
|
col_types = {x.label:x.type for x in mtz.columns}
|
|
@@ -1067,6 +1215,12 @@ def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=
|
|
|
1067
1215
|
if hkldata.df.empty:
|
|
1068
1216
|
raise RuntimeError("No data in hkl data")
|
|
1069
1217
|
|
|
1218
|
+
if space_group is None:
|
|
1219
|
+
sg_use = None
|
|
1220
|
+
else:
|
|
1221
|
+
sg_use = gemmi.SpaceGroup(space_group)
|
|
1222
|
+
logger.writeln(f"Space group overridden by user. Using {sg_use.xhm()}")
|
|
1223
|
+
|
|
1070
1224
|
if sts:
|
|
1071
1225
|
assert source in ["electron", "xray", "neutron"]
|
|
1072
1226
|
for st in sts:
|
|
@@ -1079,39 +1233,43 @@ def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=
|
|
|
1079
1233
|
for st in sts: st.cell = hkldata.cell # mtz cell is used in any case
|
|
1080
1234
|
|
|
1081
1235
|
sg_st = sts[0].find_spacegroup() # may be None
|
|
1082
|
-
sg_use
|
|
1083
|
-
|
|
1084
|
-
if
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1236
|
+
if sg_use is None:
|
|
1237
|
+
sg_use = hkldata.sg
|
|
1238
|
+
if hkldata.sg != sg_st:
|
|
1239
|
+
if st.cell.is_crystal() and sg_st and sg_st.laue_str() != hkldata.sg.laue_str():
|
|
1240
|
+
raise RuntimeError("Crystal symmetry mismatch between model and data")
|
|
1241
|
+
logger.writeln("Warning: space group mismatch between model and mtz")
|
|
1242
|
+
if sg_st and sg_st.laue_str() == hkldata.sg.laue_str():
|
|
1243
|
+
logger.writeln(" using space group from model")
|
|
1244
|
+
sg_use = sg_st
|
|
1245
|
+
else:
|
|
1246
|
+
logger.writeln(" using space group from mtz")
|
|
1247
|
+
logger.writeln("")
|
|
1248
|
+
|
|
1094
1249
|
for st in sts:
|
|
1095
1250
|
st.spacegroup_hm = sg_use.xhm()
|
|
1096
1251
|
st.setup_cell_images()
|
|
1097
|
-
hkldata.sg = sg_use
|
|
1098
1252
|
|
|
1099
1253
|
if not keep_charges:
|
|
1100
1254
|
utils.model.remove_charge(sts)
|
|
1101
1255
|
utils.model.check_atomsf(sts, source)
|
|
1102
1256
|
|
|
1257
|
+
if sg_use is not None:
|
|
1258
|
+
hkldata.sg = sg_use
|
|
1103
1259
|
if newlabels[0] == "FP":
|
|
1104
1260
|
hkldata.remove_nonpositive(newlabels[0])
|
|
1105
1261
|
hkldata.remove_nonpositive(newlabels[1])
|
|
1106
1262
|
hkldata.switch_to_asu()
|
|
1107
1263
|
hkldata.remove_systematic_absences()
|
|
1108
1264
|
#hkldata.df = hkldata.df.astype({name: 'float64' for name in ["I","SIGI","FP","SIGFP"] if name in hkldata.df})
|
|
1109
|
-
|
|
1265
|
+
d_min_data = hkldata.d_min_max(newlabels)[0]
|
|
1266
|
+
if d_min is None and hkldata.d_min_max()[0] != d_min_data:
|
|
1267
|
+
d_min = d_min_data
|
|
1268
|
+
logger.writeln(f"Changing resolution to {d_min:.3f} A")
|
|
1110
1269
|
if (d_min, d_max).count(None) != 2:
|
|
1111
1270
|
hkldata = hkldata.copy(d_min=d_min, d_max=d_max)
|
|
1112
1271
|
if hkldata.df.empty:
|
|
1113
1272
|
raise RuntimeError("No data left in hkl data")
|
|
1114
|
-
d_min, d_max = hkldata.d_min_max()
|
|
1115
1273
|
|
|
1116
1274
|
hkldata.complete()
|
|
1117
1275
|
hkldata.sort_by_resolution()
|
|
@@ -1136,13 +1294,7 @@ def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=
|
|
|
1136
1294
|
hkldata.setup_binning(n_bins=n_bins)
|
|
1137
1295
|
logger.writeln("Data completeness: {:.2f}%".format(hkldata.completeness()*100.))
|
|
1138
1296
|
|
|
1139
|
-
fc_labs = []
|
|
1140
|
-
for i, st in enumerate(sts):
|
|
1141
|
-
lab = "FC{}".format(i)
|
|
1142
|
-
hkldata.df[lab] = utils.model.calc_fc_fft(st, d_min-1e-6,
|
|
1143
|
-
source=source, mott_bethe=(source=="electron"),
|
|
1144
|
-
miller_array=hkldata.miller_array())
|
|
1145
|
-
fc_labs.append(lab)
|
|
1297
|
+
fc_labs = ["FC{}".format(i) for i, _ in enumerate(sts)]
|
|
1146
1298
|
|
|
1147
1299
|
# Create a centric selection table for faster look up
|
|
1148
1300
|
centric_and_selections = {}
|
|
@@ -1195,10 +1347,34 @@ def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=
|
|
|
1195
1347
|
return hkldata, sts, fc_labs, centric_and_selections, free
|
|
1196
1348
|
# process_input()
|
|
1197
1349
|
|
|
1350
|
+
def update_fc(st_list, fc_labs, d_min, monlib, source, mott_bethe, hkldata=None, twin_data=None):
|
|
1351
|
+
#assert (hkldata, twin_data).count(None) == 1
|
|
1352
|
+
# hkldata not updated when twin_data is given
|
|
1353
|
+
for i, st in enumerate(st_list):
|
|
1354
|
+
if st.ncs:
|
|
1355
|
+
st = st.clone()
|
|
1356
|
+
st.expand_ncs(gemmi.HowToNameCopiedChain.Dup, merge_dist=0)
|
|
1357
|
+
if twin_data:
|
|
1358
|
+
hkl = twin_data.asu
|
|
1359
|
+
else:
|
|
1360
|
+
hkl = hkldata.miller_array()
|
|
1361
|
+
fc = utils.model.calc_fc_fft(st, d_min - 1e-6,
|
|
1362
|
+
monlib=monlib,
|
|
1363
|
+
source=source,
|
|
1364
|
+
mott_bethe=mott_bethe,
|
|
1365
|
+
miller_array=hkl)
|
|
1366
|
+
if twin_data:
|
|
1367
|
+
twin_data.f_calc[:,i] = fc
|
|
1368
|
+
else:
|
|
1369
|
+
hkldata.df[fc_labs[i]] = fc
|
|
1370
|
+
if not twin_data:
|
|
1371
|
+
hkldata.df["FC"] = hkldata.df[fc_labs].sum(axis=1)
|
|
1372
|
+
# update_fc()
|
|
1373
|
+
|
|
1198
1374
|
def calc_Fmask(st, d_min, miller_array):
|
|
1199
1375
|
logger.writeln("Calculating solvent contribution..")
|
|
1200
1376
|
grid = gemmi.FloatGrid()
|
|
1201
|
-
grid.setup_from(st, spacing=min(0.6, d_min / 2 - 1e-9))
|
|
1377
|
+
grid.setup_from(st, spacing=min(0.6, (d_min-1e-6) / 2 - 1e-9))
|
|
1202
1378
|
masker = gemmi.SolventMasker(gemmi.AtomicRadiiSet.Refmac)
|
|
1203
1379
|
masker.put_mask_on_float_grid(grid, st[0])
|
|
1204
1380
|
fmask_gr = gemmi.transform_map_to_f_phi(grid)
|
|
@@ -1206,29 +1382,44 @@ def calc_Fmask(st, d_min, miller_array):
|
|
|
1206
1382
|
return Fmask
|
|
1207
1383
|
# calc_Fmask()
|
|
1208
1384
|
|
|
1209
|
-
def bulk_solvent_and_lsq_scales(hkldata, sts, fc_labs, use_solvent=True, use_int=False, mask=None, func_type="log_cosh"):
|
|
1210
|
-
|
|
1385
|
+
def bulk_solvent_and_lsq_scales(hkldata, sts, fc_labs, use_solvent=True, use_int=False, mask=None, func_type="log_cosh", twin_data=None):
|
|
1386
|
+
# fc_labs must have solvent part at the end
|
|
1387
|
+
miller_array = twin_data.asu if twin_data else hkldata.miller_array()
|
|
1388
|
+
d_min = twin_data.d_min(sts[0].cell) if twin_data else hkldata.d_min_max()[0]
|
|
1211
1389
|
if use_solvent:
|
|
1212
1390
|
if mask is None:
|
|
1213
|
-
Fmask = calc_Fmask(merge_models(sts),
|
|
1391
|
+
Fmask = calc_Fmask(merge_models(sts), d_min, miller_array)
|
|
1214
1392
|
else:
|
|
1215
1393
|
fmask_gr = gemmi.transform_map_to_f_phi(mask)
|
|
1216
|
-
Fmask = fmask_gr.get_value_by_hkl(
|
|
1217
|
-
|
|
1394
|
+
Fmask = fmask_gr.get_value_by_hkl(miller_array)
|
|
1395
|
+
if twin_data:
|
|
1396
|
+
fc_sum = twin_data.f_calc[:,:-1].sum(axis=1)
|
|
1397
|
+
else:
|
|
1398
|
+
fc_sum = hkldata.df[fc_labs[:-1]].sum(axis=1).to_numpy()
|
|
1399
|
+
fc_list = [fc_sum, Fmask]
|
|
1400
|
+
else:
|
|
1401
|
+
if twin_data:
|
|
1402
|
+
fc_list = [twin_data.f_calc.sum(axis=1)]
|
|
1403
|
+
else:
|
|
1404
|
+
fc_list = [hkldata.df[fc_labs].sum(axis=1).to_numpy()]
|
|
1218
1405
|
|
|
1219
1406
|
scaling = LsqScale(func_type=func_type)
|
|
1220
|
-
scaling.set_data(hkldata, fc_list, use_int, sigma_cutoff=0)
|
|
1407
|
+
scaling.set_data(hkldata, fc_list, use_int, sigma_cutoff=0, twin_data=twin_data)
|
|
1221
1408
|
scaling.scale()
|
|
1222
1409
|
b_iso = scaling.b_iso
|
|
1223
|
-
k_iso = hkldata.debye_waller_factors(b_iso=b_iso)
|
|
1224
1410
|
k_aniso = hkldata.debye_waller_factors(b_cart=scaling.b_aniso)
|
|
1225
1411
|
hkldata.df["k_aniso"] = k_aniso # we need it later when calculating stats
|
|
1226
1412
|
|
|
1227
1413
|
if use_solvent:
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1414
|
+
if twin_data:
|
|
1415
|
+
s2 = numpy.asarray(twin_data.s2_array)
|
|
1416
|
+
else:
|
|
1417
|
+
s2 = 1. / hkldata.d_spacings().to_numpy()**2
|
|
1418
|
+
Fbulk = Fmask * scaling.get_solvent_scale(scaling.k_sol, scaling.b_sol, s2)
|
|
1419
|
+
if twin_data:
|
|
1420
|
+
twin_data.f_calc[:,-1] = Fbulk
|
|
1421
|
+
else:
|
|
1422
|
+
hkldata.df[fc_labs[-1]] = Fbulk
|
|
1232
1423
|
|
|
1233
1424
|
# Apply scales
|
|
1234
1425
|
if use_int:
|
|
@@ -1238,9 +1429,13 @@ def bulk_solvent_and_lsq_scales(hkldata, sts, fc_labs, use_solvent=True, use_int
|
|
|
1238
1429
|
else:
|
|
1239
1430
|
o_labs = ["FP", "SIGFP", "F(+)","SIGF(+)", "F(-)", "SIGF(-)"]
|
|
1240
1431
|
hkldata.df[hkldata.df.columns.intersection(o_labs)] /= scaling.k_overall
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1432
|
+
if twin_data:
|
|
1433
|
+
twin_data.f_calc[:] *= twin_data.debye_waller_factors(b_iso=b_iso)[:,None]
|
|
1434
|
+
else:
|
|
1435
|
+
k_iso = hkldata.debye_waller_factors(b_iso=b_iso)
|
|
1436
|
+
for lab in fc_labs: hkldata.df[lab] *= k_iso
|
|
1437
|
+
# total Fc
|
|
1438
|
+
hkldata.df["FC"] = hkldata.df[fc_labs].sum(axis=1)
|
|
1244
1439
|
return scaling
|
|
1245
1440
|
# bulk_solvent_and_lsq_scales()
|
|
1246
1441
|
|
|
@@ -1353,10 +1548,21 @@ def main(args):
|
|
|
1353
1548
|
n_per_bin=n_per_bin,
|
|
1354
1549
|
use=args.use,
|
|
1355
1550
|
max_bins=30,
|
|
1356
|
-
keep_charges=args.keep_charges
|
|
1551
|
+
keep_charges=args.keep_charges,
|
|
1552
|
+
space_group=args.spacegroup)
|
|
1357
1553
|
except RuntimeError as e:
|
|
1358
1554
|
raise SystemExit("Error: {}".format(e))
|
|
1359
1555
|
|
|
1556
|
+
if args.twin:
|
|
1557
|
+
twin_data = find_twin_domains_from_data(hkldata)
|
|
1558
|
+
else:
|
|
1559
|
+
twin_data = None
|
|
1560
|
+
if twin_data:
|
|
1561
|
+
twin_data.setup_f_calc(len(sts) + (0 if args.no_solvent else 1))
|
|
1562
|
+
|
|
1563
|
+
update_fc(sts, fc_labs, d_min=hkldata.d_min_max()[0], monlib=None,
|
|
1564
|
+
source=args.source, mott_bethe=(args.source=="electron"),
|
|
1565
|
+
hkldata=hkldata, twin_data=twin_data)
|
|
1360
1566
|
is_int = "I" in hkldata.df
|
|
1361
1567
|
|
|
1362
1568
|
if args.mask:
|
|
@@ -1366,46 +1572,70 @@ def main(args):
|
|
|
1366
1572
|
|
|
1367
1573
|
# Overall scaling & bulk solvent
|
|
1368
1574
|
# FP/SIGFP will be scaled. Total FC will be added.
|
|
1575
|
+
if not args.no_solvent:
|
|
1576
|
+
fc_labs.append("Fbulk")
|
|
1369
1577
|
lsq = bulk_solvent_and_lsq_scales(hkldata, sts, fc_labs, use_solvent=not args.no_solvent,
|
|
1370
|
-
use_int=is_int, mask=mask)
|
|
1578
|
+
use_int=is_int, mask=mask, twin_data=twin_data)
|
|
1371
1579
|
b_aniso = lsq.b_aniso
|
|
1372
1580
|
# stats
|
|
1373
|
-
stats, overall = calc_r_and_cc(hkldata, centric_and_selections)
|
|
1581
|
+
stats, overall = calc_r_and_cc(hkldata, centric_and_selections, twin_data)
|
|
1374
1582
|
for lab in "R", "CC":
|
|
1375
1583
|
logger.writeln(" ".join("{} = {:.4f}".format(x, overall[x]) for x in overall if x.startswith(lab)))
|
|
1584
|
+
if is_int:
|
|
1585
|
+
logger.writeln("R1 is calculated for reflections with I/sigma>2.")
|
|
1586
|
+
|
|
1587
|
+
if twin_data:
|
|
1588
|
+
estimate_twin_fractions_from_model(twin_data, hkldata)
|
|
1589
|
+
#del hkldata.df["FC"]
|
|
1590
|
+
#del hkldata.df["Fbulk"]
|
|
1591
|
+
# Need to redo scaling?
|
|
1592
|
+
lsq = bulk_solvent_and_lsq_scales(hkldata, sts, fc_labs, use_solvent=not args.no_solvent,
|
|
1593
|
+
use_int=is_int, mask=mask, twin_data=twin_data)
|
|
1594
|
+
b_aniso = lsq.b_aniso
|
|
1595
|
+
stats, overall = calc_r_and_cc(hkldata, centric_and_selections, twin_data)
|
|
1596
|
+
for lab in "R", "CC":
|
|
1597
|
+
logger.writeln(" ".join("{} = {:.4f}".format(x, overall[x]) for x in overall if x.startswith(lab)))
|
|
1376
1598
|
|
|
1377
1599
|
# Estimate ML parameters
|
|
1378
1600
|
D_labs = ["D{}".format(i) for i in range(len(fc_labs))]
|
|
1379
1601
|
|
|
1380
1602
|
if args.use_cc:
|
|
1381
1603
|
assert not is_int
|
|
1604
|
+
assert not args.twin
|
|
1382
1605
|
logger.writeln("Estimating sigma-A parameters from CC..")
|
|
1383
1606
|
determine_mlf_params_from_cc(hkldata, fc_labs, D_labs, centric_and_selections, args.use)
|
|
1384
1607
|
else:
|
|
1385
|
-
b_aniso = determine_ml_params(hkldata, is_int, fc_labs, D_labs, b_aniso, centric_and_selections, args.D_trans, args.S_trans, args.use
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1608
|
+
b_aniso = determine_ml_params(hkldata, is_int, fc_labs, D_labs, b_aniso, centric_and_selections, args.D_trans, args.S_trans, args.use,
|
|
1609
|
+
twin_data=twin_data)
|
|
1610
|
+
use = {"all": "all", "work": "work", "test": "work"}[args.use]
|
|
1611
|
+
if twin_data:
|
|
1612
|
+
# replace hkldata
|
|
1613
|
+
hkldata = calculate_maps_twin(hkldata, b_aniso, fc_labs, D_labs, twin_data, centric_and_selections, use)
|
|
1614
|
+
elif is_int:
|
|
1615
|
+
calculate_maps_int(hkldata, b_aniso, fc_labs, D_labs, centric_and_selections, use)
|
|
1389
1616
|
else:
|
|
1390
1617
|
log_out = "{}.log".format(args.output_prefix)
|
|
1391
|
-
calculate_maps(hkldata, b_aniso, centric_and_selections, fc_labs, D_labs, log_out,
|
|
1392
|
-
use={"all": "all", "work": "work", "test": "work"}[args.use])
|
|
1618
|
+
calculate_maps(hkldata, b_aniso, centric_and_selections, fc_labs, D_labs, log_out, use)
|
|
1393
1619
|
|
|
1394
1620
|
# Write mtz file
|
|
1395
|
-
if
|
|
1396
|
-
labs = ["
|
|
1621
|
+
if twin_data:
|
|
1622
|
+
labs = ["F_est", "F_exp"]
|
|
1623
|
+
elif is_int:
|
|
1624
|
+
labs = ["I", "SIGI"]
|
|
1397
1625
|
else:
|
|
1398
|
-
labs = ["FP", "SIGFP"
|
|
1399
|
-
labs.extend(["FWT", "DELFWT", "FC", "DFC"])
|
|
1626
|
+
labs = ["FP", "SIGFP"]
|
|
1627
|
+
labs.extend(["FOM", "FWT", "DELFWT", "FC", "DFC"])
|
|
1400
1628
|
if "FAN" in hkldata.df:
|
|
1401
1629
|
labs.append("FAN")
|
|
1402
1630
|
if not args.no_solvent:
|
|
1403
1631
|
labs.append("Fbulk")
|
|
1404
1632
|
if "FREE" in hkldata.df:
|
|
1405
1633
|
labs.append("FREE")
|
|
1634
|
+
if "F_true_est" in hkldata.df:
|
|
1635
|
+
labs.append("F_true_est")
|
|
1406
1636
|
labs += D_labs + ["S"]
|
|
1407
1637
|
mtz_out = args.output_prefix+".mtz"
|
|
1408
|
-
hkldata.write_mtz(mtz_out, labs=labs, types={"FOM": "W", "FP":"F", "SIGFP":"Q"})
|
|
1638
|
+
hkldata.write_mtz(mtz_out, labs=labs, types={"FOM": "W", "FP":"F", "SIGFP":"Q", "F_est": "F", "F_exp": "F"})
|
|
1409
1639
|
return hkldata
|
|
1410
1640
|
# main()
|
|
1411
1641
|
if __name__ == "__main__":
|