servalcat 0.4.60__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of servalcat might be problematic. Click here for more details.

Files changed (44) hide show
  1. servalcat/__init__.py +10 -0
  2. servalcat/__main__.py +120 -0
  3. servalcat/ext.cp312-win_amd64.pyd +0 -0
  4. servalcat/refine/__init__.py +0 -0
  5. servalcat/refine/cgsolve.py +100 -0
  6. servalcat/refine/refine.py +733 -0
  7. servalcat/refine/refine_geom.py +207 -0
  8. servalcat/refine/refine_spa.py +327 -0
  9. servalcat/refine/refine_xtal.py +242 -0
  10. servalcat/refine/spa.py +132 -0
  11. servalcat/refine/xtal.py +227 -0
  12. servalcat/refmac/__init__.py +0 -0
  13. servalcat/refmac/exte.py +182 -0
  14. servalcat/refmac/refmac_keywords.py +536 -0
  15. servalcat/refmac/refmac_wrapper.py +360 -0
  16. servalcat/spa/__init__.py +0 -0
  17. servalcat/spa/fofc.py +462 -0
  18. servalcat/spa/fsc.py +385 -0
  19. servalcat/spa/localcc.py +188 -0
  20. servalcat/spa/realspcc_from_var.py +128 -0
  21. servalcat/spa/run_refmac.py +961 -0
  22. servalcat/spa/shift_maps.py +293 -0
  23. servalcat/spa/shiftback.py +137 -0
  24. servalcat/spa/translate.py +129 -0
  25. servalcat/utils/__init__.py +35 -0
  26. servalcat/utils/commands.py +1277 -0
  27. servalcat/utils/fileio.py +745 -0
  28. servalcat/utils/generate_operators.py +296 -0
  29. servalcat/utils/hkl.py +699 -0
  30. servalcat/utils/logger.py +116 -0
  31. servalcat/utils/maps.py +340 -0
  32. servalcat/utils/model.py +774 -0
  33. servalcat/utils/refmac.py +747 -0
  34. servalcat/utils/restraints.py +605 -0
  35. servalcat/utils/symmetry.py +295 -0
  36. servalcat/xtal/__init__.py +0 -0
  37. servalcat/xtal/french_wilson.py +250 -0
  38. servalcat/xtal/run_refmac_small.py +240 -0
  39. servalcat/xtal/sigmaa.py +1403 -0
  40. servalcat-0.4.60.dist-info/METADATA +56 -0
  41. servalcat-0.4.60.dist-info/RECORD +44 -0
  42. servalcat-0.4.60.dist-info/WHEEL +5 -0
  43. servalcat-0.4.60.dist-info/entry_points.txt +4 -0
  44. servalcat-0.4.60.dist-info/licenses/LICENSE +373 -0
@@ -0,0 +1,1403 @@
1
+ """
2
+ Author: "Keitaro Yamashita, Garib N. Murshudov"
3
+ MRC Laboratory of Molecular Biology
4
+
5
+ This software is released under the
6
+ Mozilla Public License, version 2.0; see LICENSE.
7
+ """
8
+ from __future__ import absolute_import, division, print_function, generators
9
+ import argparse
10
+ import gemmi
11
+ import numpy
12
+ import pandas
13
+ import itertools
14
+ import time
15
+ import scipy.special
16
+ import scipy.optimize
17
+ from servalcat.utils import logger
18
+ from servalcat import utils
19
+ from servalcat import ext
20
+
21
+ """
22
+ DFc = sum_j D_j F_c,j
23
+ The last Fc,n is bulk solvent contribution.
24
+ """
25
+
26
+ integr = ext.IntensityIntegrator()
27
+
28
+ def add_arguments(parser):
29
+ parser.description = 'Sigma-A parameter estimation for crystallographic data'
30
+ parser.add_argument('--hklin', required=True,
31
+ help='Input MTZ file')
32
+ parser.add_argument('--labin',
33
+ help='MTZ column for F,SIGF,FREE')
34
+ parser.add_argument('--free', type=int,
35
+ help='flag number for test set')
36
+ parser.add_argument('--model', required=True, nargs="+", action="append",
37
+ help='Input atomic model file(s)')
38
+ parser.add_argument("-d", '--d_min', type=float)
39
+ parser.add_argument('--d_max', type=float)
40
+ parser.add_argument('--nbins', type=int,
41
+ help="Number of bins (default: auto)")
42
+ parser.add_argument('-s', '--source', choices=["electron", "xray", "neutron"], required=True,
43
+ help="Scattering factor choice")
44
+ parser.add_argument('--D_trans', choices=["exp", "splus"],
45
+ help="estimate D with positivity constraint")
46
+ parser.add_argument('--S_trans', choices=["exp", "splus"],
47
+ help="estimate variance of unexplained signal with positivity constraint")
48
+ parser.add_argument('--no_solvent', action='store_true',
49
+ help="Do not consider bulk solvent contribution")
50
+ parser.add_argument('--use_cc', action='store_true',
51
+ help="Use CC(|F1|,|F2|) to CC(F1,F2) conversion to derive D and S")
52
+ parser.add_argument('--use', choices=["all", "work", "test"], default="all",
53
+ help="Which reflections to be used for the parameter estimate.")
54
+ parser.add_argument('--mask',
55
+ help="A solvent mask (by default calculated from the coordinates)")
56
+ parser.add_argument('--keep_charges', action='store_true',
57
+ help="Use scattering factor for charged atoms. Use it with care.")
58
+ parser.add_argument('-o','--output_prefix', default="sigmaa",
59
+ help='output file name prefix (default: %(default)s)')
60
+ # add_arguments()
61
+
62
+ def parse_args(arg_list):
63
+ parser = argparse.ArgumentParser()
64
+ add_arguments(parser)
65
+ return parser.parse_args(arg_list)
66
+ # parse_args()
67
+
68
+ def nanaverage(cc, w):
69
+ sel = ~numpy.isnan(cc)
70
+ if numpy.sum(w[sel]) == 0:
71
+ return numpy.nan
72
+ return numpy.average(cc[sel], weights=w[sel])
73
+
74
+ def calc_r_and_cc(hkldata, centric_and_selections):
75
+ has_int = "I" in hkldata.df
76
+ has_free = "FREE" in hkldata.df
77
+ stats = hkldata.binned_df.copy()
78
+ stats["n_obs"] = 0
79
+ if has_free:
80
+ stats[["n_work", "n_free"]] = 0
81
+ rlab = "R2" if has_int else "R"
82
+ cclab = "CCI" if has_int else "CCF"
83
+ Fc = numpy.abs(hkldata.df.FC * hkldata.df.k_aniso)
84
+ if has_int:
85
+ obs = hkldata.df.I
86
+ calc = Fc**2
87
+ else:
88
+ obs = hkldata.df.FP
89
+ calc = Fc
90
+ if has_free:
91
+ for lab in (cclab, rlab):
92
+ for suf in ("work", "free"):
93
+ stats[lab+suf] = numpy.nan
94
+ else:
95
+ stats[cclab] = numpy.nan
96
+ stats[rlab] = numpy.nan
97
+
98
+ for i_bin, idxes in hkldata.binned():
99
+ stats.loc[i_bin, "n_obs"] = numpy.sum(numpy.isfinite(obs[idxes]))
100
+ if has_free:
101
+ for j, suf in ((1, "work"), (2, "free")):
102
+ idxes2 = numpy.concatenate([sel[j] for sel in centric_and_selections[i_bin]])
103
+ stats.loc[i_bin, "n_"+suf] = numpy.sum(numpy.isfinite(obs[idxes2]))
104
+ stats.loc[i_bin, cclab+suf] = utils.hkl.correlation(obs[idxes2], calc[idxes2])
105
+ stats.loc[i_bin, rlab+suf] = utils.hkl.r_factor(obs[idxes2], calc[idxes2])
106
+ else:
107
+ stats.loc[i_bin, cclab] = utils.hkl.correlation(obs[idxes], calc[idxes])
108
+ stats.loc[i_bin, rlab] = utils.hkl.r_factor(obs[idxes], calc[idxes])
109
+
110
+ # Overall
111
+ ret = {}
112
+ if has_free:
113
+ for suf in ("work", "free"):
114
+ ret[cclab+suf+"avg"] = nanaverage(stats[cclab+suf], stats["n_"+suf])
115
+ for j, suf in ((1, "work"), (2, "free")):
116
+ idxes = numpy.concatenate([sel[j] for i_bin, _ in hkldata.binned() for sel in centric_and_selections[i_bin]])
117
+ ret[rlab+suf] = utils.hkl.r_factor(obs[idxes], calc[idxes])
118
+ else:
119
+ ret[cclab+"avg"] = nanaverage(stats[cclab], stats["n_obs"])
120
+ ret[rlab] = utils.hkl.r_factor(obs, calc)
121
+
122
+ return stats, ret
123
+ # calc_r_and_cc()
124
+
125
+ class VarTrans:
126
+ def __init__(self, D_trans, S_trans):
127
+ # splus (softplus) appears to be better than exp
128
+ # exp sometimes results in too large parameter value
129
+ trans_funcs = {"exp": (numpy.exp, # D = f(x)
130
+ numpy.exp, # dD/dx
131
+ numpy.log), # x = f^-1(D)
132
+ "splus": (lambda x: numpy.logaddexp(0, x),
133
+ scipy.special.expit, # lambda x: 1. / (1. + numpy.exp(-x))
134
+ lambda x: x + numpy.log(-numpy.expm1(-x))),
135
+ None: (lambda x: x,
136
+ lambda x: 1,
137
+ lambda x: x)}
138
+
139
+ self.D, self.D_deriv, self.D_inv = trans_funcs[D_trans]
140
+ self.S, self.S_deriv, self.S_inv = trans_funcs[S_trans]
141
+ # class VarTrans
142
+
143
+ class LsqScale:
144
+ # parameter x = [k_overall, adp_pars, k_sol, B_sol]
145
+ def __init__(self, k_as_exp=False, func_type="log_cosh"):
146
+ assert func_type in ("sq", "log_cosh")
147
+ self.k_trans = lambda x: numpy.exp(x) if k_as_exp else x
148
+ self.k_trans_der = lambda x: numpy.exp(x) if k_as_exp else 1
149
+ self.k_trans_inv = lambda x: numpy.log(x) if k_as_exp else x
150
+ self.func_type = func_type
151
+ self.reset()
152
+
153
+ def reset(self):
154
+ self.k_sol = 0.35 # same default as gemmi/scaling.hpp # refmac seems to use 0.33 and 100? SCALE_LS_PART
155
+ self.b_sol = 46.
156
+ self.k_overall = None
157
+ self.b_iso = None
158
+ self.b_aniso = None
159
+
160
+ def set_data(self, hkldata, fc_list, use_int=False, sigma_cutoff=None):
161
+ assert 0 < len(fc_list) < 3
162
+ self.use_int = use_int
163
+ if sigma_cutoff is not None:
164
+ if use_int:
165
+ sel = hkldata.df.I / hkldata.df.SIGI > sigma_cutoff
166
+ self.labcut = "(I/SIGI>{})".format(sigma_cutoff)
167
+ else:
168
+ sel = hkldata.df.FP / hkldata.df.SIGFP > sigma_cutoff
169
+ self.labcut = "(F/SIGF>{})".format(sigma_cutoff)
170
+ else:
171
+ sel = hkldata.df.index
172
+ self.labcut = ""
173
+ self.obs = hkldata.df["I" if use_int else "FP"].to_numpy()[sel]
174
+ self.calc = [x[sel] for x in fc_list]
175
+ self.s2mat = hkldata.ssq_mat()[:,sel]
176
+ self.s2 = 1. / hkldata.d_spacings().to_numpy()[sel]**2
177
+ self.adpdirs = utils.model.adp_constraints(hkldata.sg.operations(), hkldata.cell, tr0=False)
178
+ if use_int:
179
+ self.sqrt_obs = numpy.sqrt(self.obs)
180
+
181
+ def get_solvent_scale(self, k_sol, b_sol, s2=None):
182
+ if s2 is None: s2 = self.s2
183
+ return k_sol * numpy.exp(-b_sol * s2 / 4)
184
+
185
+ def scaled_fc(self, x):
186
+ fc0 = self.calc[0]
187
+ if len(self.calc) == 2:
188
+ fmask = self.calc[1]
189
+ fbulk = self.get_solvent_scale(x[-2], x[-1]) * fmask
190
+ fc = fc0 + fbulk
191
+ else:
192
+ fc = fc0
193
+ nadp = self.adpdirs.shape[0]
194
+ B = numpy.dot(x[1:nadp+1], self.adpdirs)
195
+ kani = numpy.exp(numpy.dot(-B, self.s2mat))
196
+ return self.k_trans(x[0]) * kani * fc
197
+
198
+ def target(self, x):
199
+ y = numpy.abs(self.scaled_fc(x))
200
+ if self.use_int:
201
+ diff = self.sqrt_obs - y
202
+ #y2 = y**2
203
+ #diff = self.obs - y2
204
+ else:
205
+ diff = self.obs - y
206
+
207
+ if self.func_type == "sq":
208
+ return numpy.nansum(diff**2)
209
+ elif self.func_type == "log_cosh":
210
+ return numpy.nansum(gemmi.log_cosh(diff))
211
+ else:
212
+ raise RuntimeError("bad func_type")
213
+
214
+ def grad(self, x):
215
+ g = numpy.zeros_like(x)
216
+ fc0 = self.calc[0]
217
+ if len(self.calc) == 2:
218
+ fmask = self.calc[1]
219
+ temp_sol = numpy.exp(-x[-1] * self.s2 / 4)
220
+ fbulk = x[-2] * temp_sol * fmask
221
+ fc = fc0 + fbulk
222
+ else:
223
+ fc = fc0
224
+ nadp = self.adpdirs.shape[0]
225
+ B = numpy.dot(x[1:nadp+1], self.adpdirs)
226
+ kani = numpy.exp(numpy.dot(-B, self.s2mat))
227
+ fc_abs = numpy.abs(fc)
228
+ k = self.k_trans(x[0])
229
+ y = k * kani * fc_abs
230
+ if self.use_int:
231
+ diff = self.sqrt_obs - y
232
+ diff_der = -1
233
+ #diff = self.obs - y**2
234
+ #diff_der = -2 * y
235
+ else:
236
+ diff = self.obs - y
237
+ diff_der = -1
238
+ if self.func_type == "sq":
239
+ dfdy = 2 * diff * diff_der
240
+ elif self.func_type == "log_cosh":
241
+ dfdy = numpy.tanh(diff) * diff_der
242
+ else:
243
+ raise RuntimeError("bad func_type")
244
+
245
+ dfdb = numpy.nansum(-self.s2mat * k * fc_abs * kani * dfdy, axis=1)
246
+ g[0] = numpy.nansum(kani * fc_abs * dfdy * self.k_trans_der(x[0]))
247
+ g[1:nadp+1] = numpy.dot(dfdb, self.adpdirs.T)
248
+ if len(self.calc) == 2:
249
+ re_fmask_fcconj = (fmask * fc.conj()).real
250
+ tmp = k * kani * temp_sol / fc_abs * re_fmask_fcconj
251
+ g[-2] = numpy.nansum(tmp * dfdy)
252
+ g[-1] = numpy.nansum(-tmp * dfdy * x[-2] * self.s2 / 4)
253
+
254
+ return g
255
+
256
+ def calc_shift(self, x):
257
+ # TODO: sort out code duplication, if we use this.
258
+ g = numpy.zeros((len(self.calc[0]), len(x)))
259
+ H = numpy.zeros((len(x), len(x)))
260
+
261
+ fc0 = self.calc[0]
262
+ if len(self.calc) == 2:
263
+ fmask = self.calc[1]
264
+ temp_sol = numpy.exp(-x[-1] * self.s2 / 4)
265
+ fbulk = x[-2] * temp_sol * fmask
266
+ fc = fc0 + fbulk
267
+ else:
268
+ fc = fc0
269
+ nadp = self.adpdirs.shape[0]
270
+ B = numpy.dot(x[1:nadp+1], self.adpdirs)
271
+ kani = numpy.exp(numpy.dot(-B, self.s2mat))
272
+ fc_abs = numpy.abs(fc)
273
+ k = self.k_trans(x[0])
274
+ y = k * kani * fc_abs
275
+ if self.use_int:
276
+ diff = self.sqrt_obs - y
277
+ diff_der = -1
278
+ diff_der2 = 0
279
+ else:
280
+ diff = self.obs - y
281
+ diff_der = -1.
282
+ diff_der2 = 0.
283
+
284
+ if self.func_type == "sq":
285
+ dfdy = 2 * diff * diff_der
286
+ dfdy2 = 2 * diff_der**2 + 2 * diff * diff_der2
287
+ elif self.func_type == "log_cosh":
288
+ dfdy = numpy.tanh(diff) * diff_der
289
+ #dfdy2 = 1 /numpy.cosh(diff)**2 * diff_der**2 + numpy.tanh(diff) * diff_der2 # problematic with large diff
290
+ #dfdy2 = numpy.where(diff==0, 1., numpy.abs(numpy.tanh(diff)) / gemmi.log_cosh(diff)) * diff_der**2 + numpy.tanh(diff) * diff_der2
291
+ dfdy2 = numpy.where(diff==0, 1., numpy.tanh(diff) / diff) * diff_der**2 + numpy.tanh(diff) * diff_der2
292
+ else:
293
+ raise RuntimeError("bad func_type")
294
+
295
+ dfdb = -self.s2mat * k * fc_abs * kani
296
+ g[:,0] = kani * fc_abs * self.k_trans_der(x[0])
297
+ g[:,1:nadp+1] = numpy.dot(dfdb.T, self.adpdirs.T)
298
+ if len(self.calc) == 2:
299
+ re_fmask_fcconj = (fmask * fc.conj()).real
300
+ tmp = k * kani * temp_sol / fc_abs * re_fmask_fcconj
301
+ g[:,-2] = tmp
302
+ g[:,-1] = -tmp * x[-2] * self.s2 / 4
303
+
304
+ # XXX won't work with NaNs.
305
+ H = numpy.dot(g.T, g * dfdy2[:,None])
306
+ g = numpy.sum(dfdy[:,None] * g, axis=0)
307
+ dx = -numpy.dot(g, numpy.linalg.pinv(H))
308
+ return dx
309
+
310
+ def initial_kb(self):
311
+ fc0 = self.calc[0]
312
+ if len(self.calc) == 2:
313
+ fmask = self.calc[1]
314
+ fbulk = self.get_solvent_scale(self.k_sol, self.b_sol) * fmask
315
+ fc = fc0 + fbulk
316
+ else:
317
+ fc = fc0
318
+ sel = self.obs > 0
319
+ f1p, f2p, s2p = self.obs[sel], numpy.abs(fc)[sel], self.s2[sel]
320
+ if self.use_int: f2p *= f2p
321
+ tmp = numpy.log(f2p) - numpy.log(f1p)
322
+ # g = [dT/dk, dT/db]
323
+ g = numpy.array([2 * numpy.sum(tmp), -numpy.sum(tmp*s2p)/2])
324
+ H = numpy.zeros((2,2))
325
+ H[0,0] = 2*len(f1p)
326
+ H[1,1] = numpy.sum(s2p**2/8)
327
+ H[0,1] = H[1,0] = -numpy.sum(s2p)/2
328
+ x = -numpy.dot(numpy.linalg.inv(H), g)
329
+ if self.use_int: x /= 2
330
+ k = numpy.exp(x[0])
331
+ b = x[1]
332
+ logger.writeln(" initial k,b = {:.2e} {:.2e}".format(k, b))
333
+ logger.writeln(" R{} = {:.4f}".format(self.labcut, utils.hkl.r_factor(f1p, f2p * k * numpy.exp(-b*self.s2[sel]/4))))
334
+ return k, b
335
+
336
+ def scale(self):
337
+ use_sol = len(self.calc) == 2
338
+ msg = "Scaling Fc to {} {} bulk solvent contribution".format("Io" if self.use_int else "Fo",
339
+ "with" if use_sol else "without")
340
+ logger.writeln(msg)
341
+ if self.k_overall is None or self.b_iso is None:
342
+ k, b = self.initial_kb()
343
+ else:
344
+ k, b = self.k_overall, self.b_iso
345
+ if self.b_aniso is None:
346
+ self.b_aniso = gemmi.SMat33d(b,b,b,0,0,0)
347
+ x0 = [self.k_trans_inv(k)]
348
+ bounds = [(0, None)]
349
+ x0.extend(numpy.dot(self.b_aniso.elements_pdb(), self.adpdirs.T))
350
+ bounds.extend([(None, None)]*(len(x0)-1))
351
+ if use_sol:
352
+ x0.extend([self.k_sol, self.b_sol])
353
+ bounds.extend([(1e-4, None), (10., 400.)])
354
+ if 0:
355
+ f0 = self.target(x0)
356
+ ader = self.grad(x0)
357
+ e = 1e-4
358
+ nder = []
359
+ for i in range(len(x0)):
360
+ x = numpy.copy(x0)
361
+ x[i] += e
362
+ f1 = self.target(x)
363
+ nder.append((f1 - f0) / e)
364
+ print("ADER NDER RATIO")
365
+ print(ader)
366
+ print(nder)
367
+ print(ader / nder)
368
+ quit()
369
+
370
+ t0 = time.time()
371
+ if 1:
372
+ x = x0
373
+ for i in range(40):
374
+ x_ini = x.copy()
375
+ f0 = self.target(x)
376
+ dx = self.calc_shift(x)
377
+ if numpy.max(numpy.abs(dx)) < 1e-6:
378
+ break
379
+ for s in (1, 0.5, 0.25):
380
+ if 0:
381
+ with open("debug.dat", "w") as ofs:
382
+ for s in numpy.linspace(-2, 2, 100):
383
+ f1 = self.target(x+dx * s)
384
+ #print(dx, f0, f1, f0 - f1)
385
+ ofs.write("{:4e} {:4e}\n".format(s, f1))
386
+ shift = dx * s
387
+ x = x_ini + shift
388
+ if x[0] < 0: x[0] = x0[0]
389
+ if use_sol:
390
+ if x[-1] < 10: x[-1] = 10
391
+ elif x[-1] > 400: x[-1] = 400
392
+ if x[-2] < 1e-4: x[-2] = 1e-4
393
+ f1 = self.target(x)
394
+ if f1 < f0: break
395
+ #logger.writeln("cycle {} {} {} {} {} {}".format(i, f0, f1, s, shift, (f0 - f1) / f0))
396
+ if 0 < (f0 - f1) / f0 < 1e-6:
397
+ break
398
+ res_x = x
399
+ else:
400
+ res = scipy.optimize.minimize(fun=self.target, x0=x0, jac=self.grad, bounds=bounds)
401
+ #logger.writeln(str(res))
402
+ logger.writeln(" finished in {} iterations ({} evaluations)".format(res.nit, res.nfev))
403
+ res_x = res.x
404
+ logger.writeln(" time: {:.3f} sec".format(time.time() - t0))
405
+ self.k_overall = self.k_trans(res_x[0])
406
+ nadp = self.adpdirs.shape[0]
407
+ b_overall = gemmi.SMat33d(*numpy.dot(res_x[1:nadp+1], self.adpdirs))
408
+ self.b_iso = b_overall.trace() / 3
409
+ self.b_aniso = b_overall.added_kI(-self.b_iso) # subtract isotropic contribution
410
+
411
+ logger.writeln(" k_ov= {:.2e} B_iso= {:.2e} B_aniso= {}".format(self.k_overall, self.b_iso, self.b_aniso))
412
+ if use_sol:
413
+ self.k_sol = res_x[-2]
414
+ self.b_sol = res_x[-1]
415
+ logger.writeln(" k_sol= {:.2e} B_sol= {:.2e}".format(self.k_sol, self.b_sol))
416
+ calc = numpy.abs(self.scaled_fc(res_x))
417
+ if self.use_int: calc *= calc
418
+ logger.writeln(" CC{} = {:.4f}".format(self.labcut, utils.hkl.correlation(self.obs, calc)))
419
+ logger.writeln(" R{} = {:.4f}".format(self.labcut, utils.hkl.r_factor(self.obs, calc)))
420
+ # class LsqScale
421
+
422
+ def calc_abs_DFc(Ds, Fcs):
423
+ DFc = sum(Ds[i] * Fcs[i] for i in range(len(Ds)))
424
+ return numpy.abs(DFc)
425
+ # calc_abs_DFc()
426
+
427
+ #import line_profiler
428
+ #profile = line_profiler.LineProfiler()
429
+ #import atexit
430
+ #atexit.register(profile.print_stats)
431
+ #@profile
432
+ def mlf(df, fc_labs, Ds, S, k_ani, idxes):
433
+ Fcs = numpy.vstack([df[lab].to_numpy()[idxes] for lab in fc_labs]).T
434
+ DFc = (Ds * Fcs).sum(axis=1)
435
+ ll = numpy.nansum(ext.ll_amp(df.FP.to_numpy()[idxes], df.SIGFP.to_numpy()[idxes],
436
+ k_ani[idxes], S * df.epsilon.to_numpy()[idxes],
437
+ numpy.abs(DFc), df.centric.to_numpy()[idxes]+1))
438
+ return numpy.nansum(ll)
439
+ # mlf()
440
+
441
+ #@profile
442
+ def deriv_mlf_wrt_D_S(df, fc_labs, Ds, S, k_ani, idxes):
443
+ Fcs = [df[lab].to_numpy()[idxes] for lab in fc_labs]
444
+ r = ext.ll_amp_der1_DS(df.FP.to_numpy()[idxes], df.SIGFP.to_numpy()[idxes], k_ani[idxes], S,
445
+ numpy.vstack(Fcs).T, Ds,
446
+ df.centric.to_numpy()[idxes]+1, df.epsilon.to_numpy()[idxes])
447
+ g = numpy.zeros(len(fc_labs)+1)
448
+ g[:len(fc_labs)] = numpy.nansum(r[:,:len(fc_labs)], axis=0) # D
449
+ g[-1] = numpy.nansum(r[:,-1]) # S
450
+ return g
451
+ # deriv_mlf_wrt_D_S()
452
+
453
+ #@profile
454
+ def mlf_shift_S(df, fc_labs, Ds, S, k_ani, idxes):
455
+ Fcs = [df[lab].to_numpy()[idxes] for lab in fc_labs]
456
+ r = ext.ll_amp_der1_DS(df.FP.to_numpy()[idxes], df.SIGFP.to_numpy()[idxes], k_ani[idxes], S,
457
+ numpy.vstack(Fcs).T, Ds,
458
+ df.centric.to_numpy()[idxes]+1, df.epsilon.to_numpy()[idxes])
459
+ g = numpy.nansum(r[:,-1])
460
+ H = numpy.nansum(r[:,-1]**2) # approximating expectation value of second derivative
461
+ return -g / H
462
+ # mlf_shift_S()
463
+
464
+ def mli(df, fc_labs, Ds, S, k_ani, idxes):
465
+ Fcs = numpy.vstack([df[lab].to_numpy()[idxes] for lab in fc_labs]).T
466
+ DFc = (Ds * Fcs).sum(axis=1)
467
+ ll = integr.ll_int(df.I.to_numpy()[idxes], df.SIGI.to_numpy()[idxes],
468
+ k_ani[idxes], S * df.epsilon.to_numpy()[idxes],
469
+ numpy.abs(DFc), df.centric.to_numpy()[idxes]+1)
470
+ return numpy.nansum(ll)
471
+ # mli()
472
+
473
+ def deriv_mli_wrt_D_S(df, fc_labs, Ds, S, k_ani, idxes):
474
+ Fcs = numpy.vstack([df[lab].to_numpy()[idxes] for lab in fc_labs]).T
475
+ r = integr.ll_int_der1_DS(df.I.to_numpy()[idxes], df.SIGI.to_numpy()[idxes], k_ani[idxes], S,
476
+ Fcs, Ds,
477
+ df.centric.to_numpy()[idxes]+1, df.epsilon.to_numpy()[idxes])
478
+ g = numpy.zeros(len(fc_labs)+1)
479
+ g[:len(fc_labs)] = numpy.nansum(r[:,:len(fc_labs)], axis=0) # D
480
+ g[-1] = numpy.nansum(r[:,-1]) # S
481
+ return g
482
+ # deriv_mli_wrt_D_S()
483
+
484
+ def mli_shift_D(df, fc_labs, Ds, S, k_ani, idxes):
485
+ Fcs = numpy.vstack([df[lab].to_numpy()[idxes] for lab in fc_labs]).T
486
+ r = integr.ll_int_der1_DS(df.I.to_numpy()[idxes], df.SIGI.to_numpy()[idxes], k_ani[idxes], S,
487
+ Fcs, Ds,
488
+ df.centric.to_numpy()[idxes]+1, df.epsilon.to_numpy()[idxes])[:,:len(fc_labs)]
489
+ g = numpy.nansum(r, axis=0)# * trans.D_deriv(x[:len(fc_labs)]) # D
490
+ #tmp = numpy.hstack([r[:,:len(fc_labs)] #* trans.D_deriv(x[:len(fc_labs)]),
491
+ # r[:,-1,None] * trans.S_deriv(x[-1])])
492
+ H = numpy.nansum(numpy.matmul(r[:,:,None], r[:,None]), axis=0)
493
+ return -numpy.dot(g, numpy.linalg.pinv(H))
494
+ # mli_shift_D()
495
+
496
+ def mli_shift_S(df, fc_labs, Ds, S, k_ani, idxes):
497
+ Fcs = numpy.vstack([df[lab].to_numpy()[idxes] for lab in fc_labs]).T
498
+ r = integr.ll_int_der1_DS(df.I.to_numpy()[idxes], df.SIGI.to_numpy()[idxes], k_ani[idxes], S,
499
+ Fcs, Ds,
500
+ df.centric.to_numpy()[idxes]+1, df.epsilon.to_numpy()[idxes])
501
+ g = numpy.nansum(r[:,-1])
502
+ H = numpy.nansum(r[:,-1]**2) # approximating expectation value of second derivative
503
+ return -g / H
504
+ # mli_shift_S()
505
+
506
+ def determine_mlf_params_from_cc(hkldata, fc_labs, D_labs, centric_and_selections, use="all", smoothing="gauss"):
507
+ # theorhetical values
508
+ cc_a = lambda cc: (numpy.pi/4*(1-cc**2)**2 * scipy.special.hyp2f1(3/2, 3/2, 1, cc**2) - numpy.pi/4) / (1-numpy.pi/4)
509
+ cc_c = lambda cc: 2/(numpy.pi-2) * (cc**2*numpy.sqrt(1-cc**2) + cc * numpy.arctan(cc/numpy.sqrt(1-cc**2)) + (1-cc**2)**(3/2)-1)
510
+ table_fsc = numpy.arange(0, 1, 1e-3)
511
+ table_cc = [cc_a(table_fsc), cc_c(table_fsc)]
512
+
513
+ for lab in D_labs: hkldata.binned_df[lab] = 1.
514
+ hkldata.binned_df["S"] = 1.
515
+
516
+ stats = hkldata.binned_df[["d_max", "d_min"]].copy()
517
+ for i, labi in enumerate(fc_labs):
518
+ stats["CC(FP,{})".format(labi)] = numpy.nan
519
+ for i, labi in enumerate(fc_labs):
520
+ for j in range(i+1, len(fc_labs)):
521
+ labj = fc_labs[j]
522
+ stats["CC({},{})".format(labi, labj)] = numpy.nan
523
+
524
+ # sqrt of eps * c; c = 1 for acentrics and 2 for centrics
525
+ inv_sqrt_c_eps = 1. / numpy.sqrt(hkldata.df.epsilon.to_numpy() * (hkldata.df.centric.to_numpy() + 1))
526
+ for i_bin, _ in hkldata.binned():
527
+ # assume they are all acentrics.. only correct by c
528
+ if use == "all":
529
+ cidxes = numpy.concatenate([sel[i] for sel in centric_and_selections[i_bin] for i in (1,2)])
530
+ else:
531
+ i = 1 if use == "work" else 2
532
+ cidxes = numpy.concatenate([sel[i] for sel in centric_and_selections[i_bin]])
533
+ valid_sel = numpy.isfinite(hkldata.df.FP.to_numpy()[cidxes])
534
+ cidxes = cidxes[valid_sel]
535
+ factor = inv_sqrt_c_eps[cidxes]
536
+ k_ani = hkldata.df.k_aniso.to_numpy()[cidxes]
537
+ Fo = hkldata.df.FP.to_numpy()[cidxes] * factor / k_ani
538
+ mean_Fo2 = numpy.mean(Fo**2)
539
+ SigFo = hkldata.df.SIGFP.to_numpy()[cidxes] / k_ani
540
+ Fcs = [hkldata.df[lab].to_numpy()[cidxes] * factor for lab in fc_labs]
541
+ mean_Fk2 = numpy.array([numpy.mean(numpy.abs(fk)**2) for fk in Fcs])
542
+
543
+ # estimate D
544
+ cc_fo_fj = [numpy.corrcoef(numpy.abs(fj), Fo)[1,0] for fj in Fcs]
545
+ for i in range(len(fc_labs)): stats.loc[i_bin, "CC(FP,{})".format(fc_labs[i])] = cc_fo_fj[i]
546
+ mat = [[numpy.sqrt(numpy.mean(numpy.abs(fk)**2)/mean_Fo2) * numpy.real(numpy.corrcoef(fk, fj)[1,0])
547
+ for fk in Fcs]
548
+ for fj in Fcs]
549
+ A = [[numpy.sqrt(numpy.mean(numpy.abs(fk)**2) * numpy.mean(numpy.abs(fj)**2))/mean_Fo2 * numpy.real(numpy.corrcoef(fk, fj)[1,0])
550
+ for fk in Fcs]
551
+ for fj in Fcs]
552
+ A = numpy.array([[numpy.real(numpy.corrcoef(fk, fj)[1,0]) for fk in Fcs] for fj in Fcs])
553
+ v = numpy.interp(cc_fo_fj, table_cc[0], table_fsc)
554
+
555
+ for i in range(len(fc_labs)):
556
+ labi = fc_labs[i]
557
+ for j in range(i+1, len(fc_labs)):
558
+ labj = fc_labs[j]
559
+ stats.loc[i_bin, "CC({},{})".format(labi, labj)] = numpy.real(numpy.corrcoef(Fcs[i], Fcs[j])[1,0])
560
+
561
+ # test all signs, fixing first Fc positive.
562
+ cc_max = -2
563
+ for v_test in itertools.product(*((x, -x) for x in v[1:])):
564
+ v_test = numpy.array((v[0],)+v_test)
565
+ Dj_test = numpy.dot(numpy.linalg.pinv(A), v_test) * numpy.sqrt(mean_Fo2 / mean_Fk2)
566
+ DFc_test = calc_abs_DFc(Dj_test, Fcs)
567
+ cc_test = numpy.corrcoef(Fo, numpy.abs(DFc_test))[1,0]
568
+ if cc_test > cc_max:
569
+ cc_max = cc_test
570
+ v_max = v_test
571
+ DFc = DFc_test
572
+ Dj = Dj_test
573
+
574
+ for lab, D in zip(D_labs, Dj):
575
+ hkldata.binned_df.loc[i_bin, lab] = D
576
+
577
+ # estimate S
578
+ mean_DFc2 = numpy.mean(DFc**2)
579
+ est_fsc_fo_fc = numpy.interp(numpy.corrcoef(Fo, DFc)[1,0], table_cc[0], table_fsc)
580
+ S = mean_Fo2 - 2 * numpy.sqrt(mean_Fo2 * mean_DFc2) * est_fsc_fo_fc + mean_DFc2 - numpy.mean(SigFo**2)
581
+ hkldata.binned_df.loc[i_bin, "S"] = S
582
+
583
+ logger.writeln("\nCC:")
584
+ logger.writeln(stats.to_string())
585
+ logger.writeln("\nEstimates:")
586
+ logger.writeln(hkldata.binned_df.to_string())
587
+ smooth_params(hkldata, D_labs, smoothing)
588
+ # determine_mlf_params_from_cc()
589
+
590
+ def initialize_ml_params(hkldata, use_int, D_labs, b_aniso, centric_and_selections, use):
591
+ # Initial values
592
+ for lab in D_labs: hkldata.binned_df[lab] = 1.
593
+ hkldata.binned_df["S"] = 10000.
594
+ k_ani = hkldata.debye_waller_factors(b_cart=b_aniso)
595
+ lab_obs = "I" if use_int else "FP"
596
+ for i_bin, _ in hkldata.binned():
597
+ if use == "all":
598
+ idxes = numpy.concatenate([sel[i] for sel in centric_and_selections[i_bin] for i in (1,2)])
599
+ else:
600
+ i = 1 if use == "work" else 2
601
+ idxes = numpy.concatenate([sel[i] for sel in centric_and_selections[i_bin]])
602
+ valid_sel = numpy.isfinite(hkldata.df.loc[idxes, lab_obs]) # as there is no nan-safe numpy.corrcoef
603
+ if numpy.sum(valid_sel) < 2:
604
+ continue
605
+ idxes = idxes[valid_sel]
606
+ if use_int:
607
+ Io = hkldata.df.I.to_numpy()[idxes]
608
+ else:
609
+ Io = hkldata.df.FP.to_numpy()[idxes]**2
610
+ Io /= k_ani[idxes]**2
611
+ Ic = numpy.abs(hkldata.df.FC.to_numpy()[idxes])**2
612
+ mean_Io = numpy.mean(Io)
613
+ mean_Ic = numpy.mean(Ic)
614
+ cc = numpy.corrcoef(Io, Ic)[1,0]
615
+ if cc > 0 and mean_Io > 0:
616
+ D = numpy.sqrt(mean_Io / mean_Ic * cc)
617
+ else:
618
+ D = 0 # will be taken care later
619
+ hkldata.binned_df.loc[i_bin, D_labs[0]] = D
620
+ if mean_Io > 0:
621
+ S = mean_Io - 2 * numpy.sqrt(mean_Io * mean_Ic * numpy.maximum(0, cc)) + mean_Ic
622
+ else:
623
+ S = numpy.std(Io) # similar initial to french_wilson
624
+ hkldata.binned_df.loc[i_bin, "S"] = S
625
+
626
+ for D_lab in D_labs:
627
+ if hkldata.binned_df[D_lab].min() <= 0:
628
+ min_D = hkldata.binned_df[D_lab][hkldata.binned_df[D_lab] > 0].min() * 0.1
629
+ logger.writeln("WARNING: negative {} is detected from initial estimates. Replacing it using minimum positive value {:.2e}".format(D_lab, min_D))
630
+ hkldata.binned_df[D_lab].where(hkldata.binned_df[D_lab] > 0, min_D, inplace=True) # arbitrary
631
+
632
+ logger.writeln("Initial estimates:")
633
+ logger.writeln(hkldata.binned_df.to_string())
634
+ # initialize_ml_params()
635
+
636
+ def determine_ml_params(hkldata, use_int, fc_labs, D_labs, b_aniso, centric_and_selections,
637
+ D_trans=None, S_trans=None, use="all", n_cycle=1, smoothing="gauss"):
638
+ assert use in ("all", "work", "test")
639
+ assert smoothing in (None, "gauss")
640
+ logger.writeln("Estimating sigma-A parameters using {}..".format("intensities" if use_int else "amplitudes"))
641
+ trans = VarTrans(D_trans, S_trans)
642
+ lab_obs = "I" if use_int else "FP"
643
+ def get_idxes(i_bin):
644
+ if use == "all":
645
+ return numpy.concatenate([sel[i] for sel in centric_and_selections[i_bin] for i in (1,2)])
646
+ else:
647
+ i = 1 if use == "work" else 2
648
+ return numpy.concatenate([sel[i] for sel in centric_and_selections[i_bin]])
649
+
650
+ if not set(D_labs + ["S"]).issubset(hkldata.binned_df):
651
+ initialize_ml_params(hkldata, use_int, D_labs, b_aniso, centric_and_selections, use)
652
+ for dlab, fclab in zip(D_labs, fc_labs):
653
+ hkldata.binned_df["Mn(|{}*{}|)".format(dlab, fclab)] = numpy.nan
654
+
655
+ refpar = "all"
656
+ for i_cyc in range(n_cycle):
657
+ t0 = time.time()
658
+ nfev_total = 0
659
+ k_ani = hkldata.debye_waller_factors(b_cart=b_aniso)
660
+ for i_bin, _ in hkldata.binned():
661
+ idxes = get_idxes(i_bin)
662
+ valid_sel = numpy.isfinite(hkldata.df.loc[idxes, lab_obs]) # as there is no nan-safe numpy.corrcoef
663
+ if numpy.sum(valid_sel) < 5:
664
+ logger.writeln("WARNING: bin {} has no sufficient reflections".format(i_bin))
665
+ continue
666
+
667
+ def target(x):
668
+ if refpar == "all":
669
+ Ds = trans.D(x[:len(fc_labs)])
670
+ S = trans.S(x[-1])
671
+ elif refpar == "D":
672
+ Ds = trans.D(x[:len(fc_labs)])
673
+ S = hkldata.binned_df.loc[i_bin, "S"]
674
+ else:
675
+ Ds = [hkldata.binned_df.loc[i_bin, lab] for lab in D_labs]
676
+ S = trans.S(x[-1])
677
+ f = mli if use_int else mlf
678
+ return f(hkldata.df, fc_labs, Ds, S, k_ani, idxes)
679
+
680
+ def grad(x):
681
+ if refpar == "all":
682
+ Ds = trans.D(x[:len(fc_labs)])
683
+ S = trans.S(x[-1])
684
+ n_par = len(fc_labs)+1
685
+ elif refpar == "D":
686
+ Ds = trans.D(x[:len(fc_labs)])
687
+ S = hkldata.binned_df.loc[i_bin, "S"]
688
+ n_par = len(fc_labs)
689
+ else:
690
+ Ds = [hkldata.binned_df.loc[i_bin, lab] for lab in D_labs]
691
+ S = trans.S(x[-1])
692
+ n_par = 1
693
+ calc_deriv = deriv_mli_wrt_D_S if use_int else deriv_mlf_wrt_D_S
694
+ r = calc_deriv(hkldata.df, fc_labs, Ds, S, k_ani, idxes)
695
+ g = numpy.zeros(n_par)
696
+ if refpar in ("all", "D"):
697
+ g[:len(fc_labs)] = r[:len(fc_labs)]
698
+ g[:len(fc_labs)] *= trans.D_deriv(x[:len(fc_labs)])
699
+ if refpar in ("all", "S"):
700
+ g[-1] = r[-1]
701
+ g[-1] *= trans.S_deriv(x[-1])
702
+ return g
703
+
704
+ if 0:
705
+ refpar = "S"
706
+ x0 = trans.S_inv(hkldata.binned_df.loc[i_bin, "S"])
707
+ with open("s_line_{}.dat".format(i_bin), "w") as ofs:
708
+ for sval in numpy.linspace(1, x0*2, 100):
709
+ ofs.write("{:.4e} {:.10e} {:.10e}\n".format(sval,
710
+ target([sval]),
711
+ grad([sval])[0]))
712
+ continue
713
+ #print("Bin", i_bin)
714
+ if 1: # refine D and S iteratively
715
+ vals_last = None
716
+ for ids in range(10):
717
+ refpar = "D"
718
+ x0 = numpy.array([trans.D_inv(hkldata.binned_df.loc[i_bin, lab]) for lab in D_labs])
719
+ vals_now = []
720
+ if 0:
721
+ f0 = target(x0)
722
+ nfev_total += 1
723
+ shift = mli_shift_D(hkldata.df, fc_labs, trans.D(x0), hkldata.binned_df.loc[i_bin, "S"], k_ani, idxes)
724
+ shift /= trans.D_deriv(x0)
725
+ #if abs(shift) < 1e-3: break
726
+ for itry in range(10):
727
+ x1 = x0 + shift
728
+ if (D_trans and any(x1 < -3)) or (not D_trans and any(x1 < 5e-2)):
729
+ #print(i_bin, cyc_s, trans.S(x0), trans.S(x1), shift, "BAD")
730
+ shift /= 2
731
+ continue
732
+ f1 = target(x1)
733
+ nfev_total += 1
734
+ if f1 > f0:
735
+ shift /= 2
736
+ continue
737
+ else: # good
738
+ for i, lab in enumerate(D_labs):
739
+ hkldata.binned_df.loc[i_bin, lab] = trans.D(x1[i])
740
+ vals_now.append(hkldata.binned_df.loc[i_bin, lab])
741
+ break
742
+ else:
743
+ break
744
+ else:
745
+ #print(mli_shift_D(hkldata.df, fc_labs, trans.D(x0), hkldata.binned_df.S[i_bin], k_ani, idxes))
746
+ res = scipy.optimize.minimize(fun=target, x0=x0, jac=grad,
747
+ bounds=((-5 if D_trans else 1e-5, None),)*len(x0))
748
+ nfev_total += res.nfev
749
+ #print(i_bin, "mini cycle", ids, refpar)
750
+ #print(res)
751
+ for i, lab in enumerate(D_labs):
752
+ hkldata.binned_df.loc[i_bin, lab] = trans.D(res.x[i])
753
+ vals_now.append(hkldata.binned_df.loc[i_bin, lab])
754
+ refpar = "S"
755
+ if 1:
756
+ for cyc_s in range(1):
757
+ x0 = trans.S_inv(hkldata.binned_df.loc[i_bin, "S"])
758
+ f0 = target([x0])
759
+ Ds = [hkldata.binned_df.loc[i_bin, lab] for lab in D_labs]
760
+ nfev_total += 1
761
+ calc_shift_S = mli_shift_S if use_int else mlf_shift_S
762
+ shift = calc_shift_S(hkldata.df, fc_labs, Ds, trans.S(x0), k_ani, idxes)
763
+ shift /= trans.S_deriv(x0)
764
+ if abs(shift) < 1e-3: break
765
+ for itry in range(10):
766
+ x1 = x0 + shift
767
+ if (S_trans and x1 < -3) or (not S_trans and x1 < 5e-2):
768
+ #print(i_bin, cyc_s, trans.S(x0), trans.S(x1), shift, "BAD")
769
+ shift /= 2
770
+ continue
771
+ f1 = target([x1])
772
+ nfev_total += 1
773
+ if f1 > f0:
774
+ shift /= 2
775
+ continue
776
+ else: # good
777
+ #print(i_bin, cyc_s, trans.S(x0), trans.S(x1), shift)
778
+ hkldata.binned_df.loc[i_bin, "S"] = trans.S(x1)
779
+ break
780
+ else:
781
+ #print("all bad")
782
+ break
783
+ else:
784
+ # somehow this does not work well.
785
+ x0 = [trans.S_inv(hkldata.binned_df.loc[i_bin, "S"])]
786
+ res = scipy.optimize.minimize(fun=target, x0=x0, jac=grad,
787
+ bounds=((-3 if S_trans else 5e-2, None),))
788
+ nfev_total += res.nfev
789
+ #print(i_bin, "mini cycle", ids, refpar)
790
+ #print(res)
791
+ hkldata.binned_df.loc[i_bin, "S"] = trans.S(res.x[-1])
792
+ vals_now.append(hkldata.binned_df.loc[i_bin, "S"])
793
+ vals_now = numpy.array(vals_now)
794
+ if vals_last is not None and numpy.all(numpy.abs((vals_last - vals_now) / vals_now) < 1e-2):
795
+ #logger.writeln("converged in mini cycle {}".format(ids+1))
796
+ break
797
+ vals_last = vals_now
798
+ else:
799
+ x0 = [trans.D_inv(hkldata.binned_df.loc[i_bin, lab]) for lab in D_labs] + [trans.S_inv(hkldata.binned_df.loc[i_bin, "S"])]
800
+ res = scipy.optimize.minimize(fun=target, x0=x0, jac=grad,
801
+ bounds=((-5 if D_trans else 1e-5, None), )*len(D_labs) + ((-3 if S_trans else 5e-2, None),))
802
+ nfev_total += res.nfev
803
+ #print(i_bin)
804
+ #print(res)
805
+ for i, lab in enumerate(D_labs):
806
+ hkldata.binned_df.loc[i_bin, lab] = trans.D(res.x[i])
807
+ hkldata.binned_df.loc[i_bin, "S"] = trans.S(res.x[-1])
808
+
809
+ for i_bin, idxes in hkldata.binned():
810
+ for dlab, fclab in zip(D_labs, fc_labs):
811
+ mean_dfc = numpy.nanmean(numpy.abs(hkldata.binned_df[dlab][i_bin] * hkldata.df[fclab][idxes]))
812
+ hkldata.binned_df.loc[i_bin, "Mn(|{}*{}|)".format(dlab, fclab)] = mean_dfc
813
+
814
+ logger.writeln("Refined estimates:")
815
+ logger.writeln(hkldata.binned_df.to_string())
816
+ logger.writeln("time: {:.1f} sec ({} evaluations)".format(time.time() - t0, nfev_total))
817
+
818
+ if not use_int:
819
+ break # did not implement MLF B_aniso optimization
820
+
821
+ # Refine b_aniso
822
+ adpdirs = utils.model.adp_constraints(hkldata.sg.operations(), hkldata.cell, tr0=True)
823
+ SMattolist = lambda B: [B.u11, B.u22, B.u33, B.u12, B.u13, B.u23]
824
+
825
+ def target_ani(x):
826
+ b = gemmi.SMat33d(*numpy.dot(x, adpdirs))
827
+ k_ani = hkldata.debye_waller_factors(b_cart=b)
828
+ ret = 0.
829
+ for i_bin, idxes in hkldata.binned():
830
+ Ds = [hkldata.binned_df.loc[i_bin, lab] for lab in D_labs]
831
+ ret += mli(hkldata.df, fc_labs, Ds, hkldata.binned_df.loc[i_bin, "S"], k_ani, idxes)
832
+ return ret
833
+ def grad_ani(x):
834
+ b = gemmi.SMat33d(*numpy.dot(x, adpdirs))
835
+ k_ani = hkldata.debye_waller_factors(b_cart=b)
836
+ S2mat = hkldata.ssq_mat() # ssqmat
837
+ g = numpy.zeros(6)
838
+ for i_bin, idxes in hkldata.binned():
839
+ r = integr.ll_int_der1_ani(hkldata.df.I.to_numpy()[idxes], hkldata.df.SIGI.to_numpy()[idxes],
840
+ k_ani[idxes], hkldata.binned_df.loc[i_bin, "S"],
841
+ hkldata.df[fc_labs].to_numpy()[idxes], hkldata.binned_df.loc[i_bin, D_labs],
842
+ hkldata.df.centric.to_numpy()[idxes]+1, hkldata.df.epsilon.to_numpy()[idxes])
843
+ S2 = S2mat[:,idxes]
844
+ g += -numpy.nansum(S2 * r[:,0], axis=1) # k_ani is already multiplied in r
845
+ return numpy.dot(g, adpdirs.T)
846
+ def shift_ani(x):
847
+ b = gemmi.SMat33d(*numpy.dot(x, adpdirs))
848
+ k_ani = hkldata.debye_waller_factors(b_cart=b)
849
+ S2mat = hkldata.ssq_mat() # ssqmat
850
+ g = numpy.zeros(6)
851
+ H = numpy.zeros((6, 6))
852
+ for i_bin, idxes in hkldata.binned():
853
+ r = integr.ll_int_der1_ani(hkldata.df.I.to_numpy()[idxes], hkldata.df.SIGI.to_numpy()[idxes],
854
+ k_ani[idxes], hkldata.binned_df.loc[i_bin, "S"],
855
+ hkldata.df[fc_labs].to_numpy()[idxes], list(hkldata.binned_df.loc[i_bin, D_labs]),
856
+ hkldata.df.centric.to_numpy()[idxes]+1, hkldata.df.epsilon.to_numpy()[idxes])
857
+ S2 = S2mat[:,idxes]
858
+ g += -numpy.nansum(S2 * r[:,0], axis=1) # k_ani is already multiplied in r
859
+ H += numpy.nansum(numpy.matmul(S2[None,:].T, S2.T[:,None]) * (r[:,0]**2)[:,None,None], axis=0)
860
+
861
+ g, H = numpy.dot(g, adpdirs.T), numpy.dot(adpdirs, numpy.dot(H, adpdirs.T))
862
+ return -numpy.dot(g, numpy.linalg.pinv(H))
863
+
864
+ logger.writeln("Refining B_aniso. Current = {}".format(b_aniso))
865
+ if 0:
866
+ x0 = numpy.dot(SMattolist(b_aniso), numpy.linalg.pinv(adpdirs))
867
+ res = scipy.optimize.minimize(fun=target_ani, x0=x0, jac=grad_ani)
868
+ print(res)
869
+ b_aniso = gemmi.SMat33d(*numpy.dot(res.x, adpdirs))
870
+ f1 = res.fun
871
+ else:
872
+ B_converged = False
873
+ for j in range(10):
874
+ x = numpy.dot(SMattolist(b_aniso), numpy.linalg.pinv(adpdirs))
875
+ f0 = target_ani(x)
876
+ shift = shift_ani(x)
877
+ for i in range(3):
878
+ ss = shift / 2**i
879
+ f1 = target_ani(x + ss)
880
+ #logger.writeln("{:2d} f0 = {:.3e} shift = {} df = {:.3e}".format(j, f0, ss, f1 - f0))
881
+ if f1 < f0:
882
+ b_aniso = gemmi.SMat33d(*numpy.dot(x+ss, adpdirs))
883
+ if numpy.max(numpy.abs(ss)) < 1e-4: B_converged = True
884
+ break
885
+ else:
886
+ B_converged = True
887
+ if B_converged: break
888
+
889
+ logger.writeln("Refined B_aniso = {}".format(b_aniso))
890
+ logger.writeln("cycle {} f= {}".format(i_cyc, f1))
891
+
892
+ smooth_params(hkldata, D_labs, smoothing)
893
+ return b_aniso
894
+ # determine_ml_params()
895
+
896
+ def smooth_params(hkldata, D_labs, smoothing):
897
+ if smoothing is None or len(hkldata.binned()) < 2:
898
+ for i, lab in enumerate(D_labs + ["S"]):
899
+ hkldata.df[lab] = hkldata.binned_data_as_array(lab)
900
+
901
+ elif smoothing == "gauss":
902
+ bin_centers = (0.5 / hkldata.binned_df[["d_min", "d_max"]]**2).sum(axis=1).to_numpy()
903
+ vals = ext.smooth_gauss(bin_centers,
904
+ hkldata.binned_df[D_labs + ["S"]].to_numpy(),
905
+ 1./hkldata.df.d.to_numpy()**2,
906
+ 100, # min(n_ref?)
907
+ (bin_centers[1] - bin_centers[0]))
908
+ for i, lab in enumerate(D_labs + ["S"]):
909
+ hkldata.df[lab] = vals[:, i]
910
+ # Update smoothened average; this affects next refinement.
911
+ # TODO: update Mn(|Dj*FCj|) as well.
912
+ #for i_bin, idxes in hkldata.binned():
913
+ # for lab in D_labs + ["S"]:
914
+ # hkldata.binned_df.loc[i_bin, lab] = numpy.mean(hkldata.df[lab].to_numpy()[idxes])
915
+ else:
916
+ raise RuntimeError("unknown smoothing method: {}".format(smoothing))
917
+ # smooth_params()
918
+
919
+ def expected_F_from_int(Io, sigIo, k_ani, DFc, eps, c, S):
920
+ if c == 0: # acentric
921
+ k_num, k_den = 0.5, 0.
922
+ else:
923
+ k_num, k_den = 0., -0.5
924
+ to = Io / sigIo - sigIo / (c+1) / k_ani**2 / S / eps
925
+ tf = k_ani * numpy.abs(DFc) / numpy.sqrt(sigIo)
926
+ sig1 = k_ani**2 * S * eps / sigIo
927
+ f = ext.integ_J_ratio(k_num, k_den, True, to, tf, sig1, c+1, integr.exp2_threshold, integr.h, integr.N, integr.ewmax)
928
+ f *= numpy.sqrt(sigIo) / k_ani
929
+ m_proxy = ext.integ_J_ratio(k_num, k_num, True, to, tf, sig1, c+1, integr.exp2_threshold, integr.h, integr.N, integr.ewmax)
930
+ return f, m_proxy
931
+ # expected_F_from_int()
932
+
933
+ def calculate_maps_int(hkldata, b_aniso, fc_labs, D_labs, centric_and_selections, use="all"):
934
+ nmodels = len(fc_labs)
935
+ hkldata.df["FWT"] = 0j * numpy.nan
936
+ hkldata.df["DELFWT"] = 0j * numpy.nan
937
+ hkldata.df["FOM"] = numpy.nan # FOM proxy, |<F>| / <|F|>
938
+ has_ano = "I(+)" in hkldata.df and "I(-)" in hkldata.df
939
+ if has_ano:
940
+ hkldata.df["FAN"] = 0j * numpy.nan
941
+ ano_data = hkldata.df[["I(+)", "SIGI(+)", "I(-)", "SIGI(-)"]].to_numpy()
942
+ Io = hkldata.df.I.to_numpy()
943
+ sigIo = hkldata.df.SIGI.to_numpy()
944
+ k_ani = hkldata.debye_waller_factors(b_cart=b_aniso)
945
+ eps = hkldata.df.epsilon.to_numpy()
946
+ Ds = numpy.vstack([hkldata.df[lab].to_numpy() for lab in D_labs]).T
947
+ Fcs = numpy.vstack([hkldata.df[lab].to_numpy() for lab in fc_labs]).T
948
+ DFc = (Ds * Fcs).sum(axis=1)
949
+ hkldata.df["DFC"] = DFc
950
+ for i_bin, idxes in hkldata.binned():
951
+ for c, work, test in centric_and_selections[i_bin]:
952
+ cidxes = numpy.concatenate([work, test])
953
+ S = hkldata.df["S"].to_numpy()[cidxes]
954
+ f, m_proxy = expected_F_from_int(Io[cidxes], sigIo[cidxes], k_ani[cidxes], DFc[cidxes], eps[cidxes], c, S)
955
+ exp_ip = numpy.exp(numpy.angle(DFc[cidxes])*1j)
956
+ if c == 0:
957
+ hkldata.df.loc[cidxes, "FWT"] = 2 * f * exp_ip - DFc[cidxes]
958
+ else:
959
+ hkldata.df.loc[cidxes, "FWT"] = f * exp_ip
960
+ hkldata.df.loc[cidxes, "DELFWT"] = f * exp_ip - DFc[cidxes]
961
+ hkldata.df.loc[cidxes, "FOM"] = m_proxy
962
+ if has_ano:
963
+ f_p, _ = expected_F_from_int(ano_data[cidxes,0], ano_data[cidxes,1],
964
+ k_ani[cidxes], DFc[cidxes], eps[cidxes], c, S)
965
+ f_m, _ = expected_F_from_int(ano_data[cidxes,2], ano_data[cidxes,3],
966
+ k_ani[cidxes], DFc[cidxes], eps[cidxes], c, S)
967
+ hkldata.df.loc[cidxes, "FAN"] = (f_p - f_m) * exp_ip / 2j
968
+ # remove reflections that should be hidden
969
+ if use != "all":
970
+ # usually use == "work"
971
+ tohide = test if use == "work" else work
972
+ hkldata.df.loc[tohide, "FWT"] = 0j * numpy.nan
973
+ hkldata.df.loc[tohide, "DELFWT"] = 0j * numpy.nan
974
+ fill_sel = numpy.isnan(hkldata.df["FWT"][cidxes].to_numpy())
975
+ hkldata.df.loc[cidxes[fill_sel], "FWT"] = DFc[cidxes][fill_sel]
976
+ # calculate_maps_int()
977
+
978
+ def merge_models(sts): # simply merge models. no fix in chain ids etc.
979
+ st = sts[0].clone()
980
+ del st[:]
981
+ model = gemmi.Model("1")
982
+ for st in sts:
983
+ for m in st:
984
+ for c in m:
985
+ model.add_chain(c)
986
+ st.add_model(model)
987
+ return st
988
+ # merge_models()
989
+
990
+ def decide_mtz_labels(mtz, find_free=True):
991
+ dlabs = utils.hkl.mtz_find_data_columns(mtz)
992
+ logger.writeln("Finding possible options from MTZ:")
993
+ for typ in dlabs:
994
+ for labs in dlabs[typ]:
995
+ logger.writeln(" --labin '{}'".format(",".join(labs)))
996
+ if dlabs["F"]: # F is preferred for now
997
+ labin = dlabs["F"][0]
998
+ elif dlabs["J"]:
999
+ labin = dlabs["J"][0]
1000
+ elif dlabs["G"]:
1001
+ labin = dlabs["G"][0]
1002
+ elif dlabs["K"]:
1003
+ labin = dlabs["K"][0]
1004
+ else:
1005
+ raise RuntimeError("Data not found from mtz")
1006
+ if find_free:
1007
+ flabs = utils.hkl.mtz_find_free_columns(mtz)
1008
+ if flabs:
1009
+ labin += [flabs[0]]
1010
+ logger.writeln("MTZ columns automatically selected: {}".format(labin))
1011
+ return labin
1012
+ # decide_mtz_labels()
1013
+
1014
+ def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=None,
1015
+ n_per_bin=None, use="all", max_bins=None, cif_index=0, keep_charges=False):
1016
+ if labin: assert 1 < len(labin) < 6
1017
+ assert use in ("all", "work", "test")
1018
+ assert n_bins or n_per_bin #if n_bins not set, n_per_bin should be given
1019
+
1020
+ if type(hklin) is gemmi.Mtz or utils.fileio.is_mmhkl_file(hklin):
1021
+ if type(hklin) is gemmi.Mtz:
1022
+ mtz = hklin
1023
+ else:
1024
+ mtz = utils.fileio.read_mmhkl(hklin, cif_index=cif_index)
1025
+ sts = [utils.fileio.read_structure(f) for f in xyzins]
1026
+ else:
1027
+ assert len(xyzins) == 1
1028
+ st, mtz = utils.fileio.read_small_molecule_files([hklin, xyzins[0]])
1029
+ sts = [st]
1030
+
1031
+ if not labin:
1032
+ labin = decide_mtz_labels(mtz)
1033
+ col_types = {x.label:x.type for x in mtz.columns}
1034
+ if labin[0] not in col_types:
1035
+ raise RuntimeError("MTZ column not found: {}".format(labin[0]))
1036
+ labs_and_types = {"F": ("amplitude", ["FP","SIGFP"], ["F", "Q"]),
1037
+ "J": ("intensity", ["I","SIGI"], ["J", "Q"]),
1038
+ "G": ("anomalous amplitude", ["F(+)","SIGF(+)", "F(-)", "SIGF(-)"], ["G", "L", "G", "L"]),
1039
+ "K": ("anomalous intensity", ["I(+)","SIGI(+)", "I(-)", "SIGI(-)"], ["K", "M", "K", "M"])}
1040
+ if col_types[labin[0]] not in labs_and_types:
1041
+ raise RuntimeError("MTZ column {} is neither amplitude nor intensity".format(labin[0]))
1042
+ name, newlabels, require_types = labs_and_types[col_types[labin[0]]]
1043
+ logger.writeln("Observation type: {}".format(name))
1044
+ if len(newlabels) < len(labin): newlabels.append("FREE")
1045
+ hkldata = utils.hkl.hkldata_from_mtz(mtz, labin, newlabels=newlabels, require_types=require_types)
1046
+ if newlabels[0] == "F(+)":
1047
+ hkldata.merge_anomalous(newlabels[:4], ["FP", "SIGFP"])
1048
+ newlabels = ["FP", "SIGFP"] + newlabels[4:]
1049
+ elif newlabels[0] == "I(+)":
1050
+ hkldata.merge_anomalous(newlabels[:4], ["I", "SIGI"])
1051
+ newlabels = ["I", "SIGI"] + newlabels[4:]
1052
+
1053
+ if hkldata.df.empty:
1054
+ raise RuntimeError("No data in hkl data")
1055
+
1056
+ if sts:
1057
+ assert source in ["electron", "xray", "neutron"]
1058
+ for st in sts:
1059
+ if st[0].count_atom_sites() == 0:
1060
+ raise RuntimeError("No atom in the model")
1061
+ if not hkldata.cell.approx(sts[0].cell, 1e-3):
1062
+ logger.writeln("Warning: unit cell mismatch between model and reflection data")
1063
+ logger.writeln(" using unit cell from mtz")
1064
+
1065
+ for st in sts: st.cell = hkldata.cell # mtz cell is used in any case
1066
+
1067
+ sg_st = sts[0].find_spacegroup() # may be None
1068
+ sg_use = hkldata.sg
1069
+ if hkldata.sg != sg_st:
1070
+ if st.cell.is_crystal() and sg_st and sg_st.laue_str() != hkldata.sg.laue_str():
1071
+ raise RuntimeError("Crystal symmetry mismatch between model and data")
1072
+ logger.writeln("Warning: space group mismatch between model and mtz")
1073
+ if sg_st and sg_st.laue_str() == hkldata.sg.laue_str():
1074
+ logger.writeln(" using space group from model")
1075
+ sg_use = sg_st
1076
+ else:
1077
+ logger.writeln(" using space group from mtz")
1078
+ logger.writeln("")
1079
+
1080
+ for st in sts:
1081
+ if st.find_spacegroup() != sg_use:
1082
+ st.spacegroup_hm = sg_use.xhm()
1083
+ st.setup_cell_images()
1084
+ hkldata.sg = sg_use
1085
+
1086
+ if not keep_charges:
1087
+ utils.model.remove_charge(sts)
1088
+ utils.model.check_atomsf(sts, source)
1089
+
1090
+ if newlabels[0] == "FP":
1091
+ hkldata.remove_nonpositive(newlabels[0])
1092
+ hkldata.remove_nonpositive(newlabels[1])
1093
+ hkldata.switch_to_asu()
1094
+ hkldata.remove_systematic_absences()
1095
+ #hkldata.df = hkldata.df.astype({name: 'float64' for name in ["I","SIGI","FP","SIGFP"] if name in hkldata.df})
1096
+
1097
+ if (d_min, d_max).count(None) != 2:
1098
+ hkldata = hkldata.copy(d_min=d_min, d_max=d_max)
1099
+ if hkldata.df.empty:
1100
+ raise RuntimeError("No data left in hkl data")
1101
+ d_min, d_max = hkldata.d_min_max()
1102
+
1103
+ hkldata.complete()
1104
+ hkldata.sort_by_resolution()
1105
+ hkldata.calc_epsilon()
1106
+ hkldata.calc_centric()
1107
+
1108
+ if "FREE" in hkldata.df and free is None:
1109
+ free = hkldata.guess_free_number(newlabels[0])
1110
+
1111
+ if n_bins is None:
1112
+ sel = hkldata.df[newlabels[0]].notna()
1113
+ if use == "work":
1114
+ sel &= hkldata.df.FREE != free
1115
+ elif use == "test":
1116
+ sel &= hkldata.df.FREE == free
1117
+ s_array = 1/hkldata.d_spacings()[sel]
1118
+ if len(s_array) == 0:
1119
+ raise RuntimeError("no reflections in {} set".format(use))
1120
+ n_bins = utils.hkl.decide_n_bins(n_per_bin, s_array, max_bins=max_bins)
1121
+ logger.writeln("n_per_bin={} requested for {}. n_bins set to {}".format(n_per_bin, use, n_bins))
1122
+
1123
+ hkldata.setup_binning(n_bins=n_bins)
1124
+ logger.writeln("Data completeness: {:.2f}%".format(hkldata.completeness()*100.))
1125
+
1126
+ fc_labs = []
1127
+ for i, st in enumerate(sts):
1128
+ lab = "FC{}".format(i)
1129
+ hkldata.df[lab] = utils.model.calc_fc_fft(st, d_min-1e-6,
1130
+ source=source, mott_bethe=(source=="electron"),
1131
+ miller_array=hkldata.miller_array())
1132
+ fc_labs.append(lab)
1133
+
1134
+ # Create a centric selection table for faster look up
1135
+ centric_and_selections = {}
1136
+ stats = hkldata.binned_df.copy()
1137
+ stats["n_all"] = 0
1138
+ stats["n_obs"] = 0
1139
+ stats[newlabels[0]] = numpy.nan
1140
+ snr = "I/sigma" if newlabels[0] == "I" else "F/sigma"
1141
+ stats[snr] = numpy.nan
1142
+ if newlabels[0] == "I":
1143
+ stats["Mn(I)/Std(I)"] = numpy.nan
1144
+ if "FREE" in hkldata.df:
1145
+ stats["n_work"] = 0
1146
+ stats["n_test"] = 0
1147
+
1148
+ for i_bin, idxes in hkldata.binned():
1149
+ centric_and_selections[i_bin] = []
1150
+ n_obs = 0
1151
+ n_work, n_test = 0, 0
1152
+ for c, g2 in hkldata.df.loc[idxes].groupby("centric", sort=False):
1153
+ valid_sel = numpy.isfinite(g2[newlabels[0]])
1154
+ if "FREE" in g2:
1155
+ test_sel = (g2.FREE == free).fillna(False)
1156
+ test = g2.index[test_sel]
1157
+ work = g2.index[~test_sel]
1158
+ n_work += (valid_sel & ~test_sel).sum()
1159
+ n_test += (valid_sel & test_sel).sum()
1160
+ else:
1161
+ work = g2.index
1162
+ test = type(work)([], dtype=work.dtype)
1163
+ centric_and_selections[i_bin].append((c, work, test))
1164
+ n_obs += numpy.sum(valid_sel)
1165
+
1166
+ stats.loc[i_bin, "n_obs"] = n_obs
1167
+ stats.loc[i_bin, "n_all"] = len(idxes)
1168
+ obs = hkldata.df[newlabels[0]].to_numpy()[idxes]
1169
+ sigma = hkldata.df[newlabels[1]].to_numpy()[idxes]
1170
+ if n_obs > 0:
1171
+ stats.loc[i_bin, snr] = numpy.nanmean(obs / sigma)
1172
+ mean_obs = numpy.nanmean(obs)
1173
+ stats.loc[i_bin, newlabels[0]] = mean_obs
1174
+ if newlabels[0] == "I":
1175
+ stats.loc[i_bin, "Mn(I)/Std(I)"] = mean_obs / numpy.nanstd(obs)
1176
+ if "FREE" in hkldata.df:
1177
+ stats.loc[i_bin, "n_work"] = n_work
1178
+ stats.loc[i_bin, "n_test"] = n_test
1179
+
1180
+ stats["completeness"] = stats["n_obs"] / stats["n_all"] * 100
1181
+ logger.writeln(stats.to_string())
1182
+ return hkldata, sts, fc_labs, centric_and_selections, free
1183
+ # process_input()
1184
+
1185
+ def calc_Fmask(st, d_min, miller_array):
1186
+ logger.writeln("Calculating solvent contribution..")
1187
+ grid = gemmi.FloatGrid()
1188
+ spacing = min(1 / (2 * x / d_min + 1) / xr for x, xr in zip(st.cell.parameters[:3],
1189
+ st.cell.reciprocal().parameters[:3]))
1190
+ grid.setup_from(st, spacing=min(0.4, spacing))
1191
+ masker = gemmi.SolventMasker(gemmi.AtomicRadiiSet.Refmac)
1192
+ masker.put_mask_on_float_grid(grid, st[0])
1193
+ fmask_gr = gemmi.transform_map_to_f_phi(grid)
1194
+ Fmask = fmask_gr.get_value_by_hkl(miller_array)
1195
+ return Fmask
1196
+ # calc_Fmask()
1197
+
1198
+ def bulk_solvent_and_lsq_scales(hkldata, sts, fc_labs, use_solvent=True, use_int=False, mask=None):
1199
+ fc_list = [hkldata.df[fc_labs].sum(axis=1).to_numpy()]
1200
+ if use_solvent:
1201
+ if mask is None:
1202
+ Fmask = calc_Fmask(merge_models(sts), hkldata.d_min_max()[0] - 1e-6, hkldata.miller_array())
1203
+ else:
1204
+ fmask_gr = gemmi.transform_map_to_f_phi(mask)
1205
+ Fmask = fmask_gr.get_value_by_hkl(hkldata.miller_array())
1206
+ fc_list.append(Fmask)
1207
+
1208
+ scaling = LsqScale()
1209
+ scaling.set_data(hkldata, fc_list, use_int, sigma_cutoff=0)
1210
+ scaling.scale()
1211
+ b_iso = scaling.b_iso
1212
+ k_iso = hkldata.debye_waller_factors(b_iso=b_iso)
1213
+ k_aniso = hkldata.debye_waller_factors(b_cart=scaling.b_aniso)
1214
+ hkldata.df["k_aniso"] = k_aniso # we need it later when calculating stats
1215
+
1216
+ if use_solvent:
1217
+ fc_labs.append("Fbulk")
1218
+ solvent_scale = scaling.get_solvent_scale(scaling.k_sol, scaling.b_sol,
1219
+ 1. / hkldata.d_spacings().to_numpy()**2)
1220
+ hkldata.df[fc_labs[-1]] = Fmask * solvent_scale
1221
+
1222
+ # Apply scales
1223
+ if use_int:
1224
+ # in intensity case, we try to refine b_aniso with ML. perhaps we should do it in amplitude case also
1225
+ o_labs = ["I", "SIGI", "I(+)","SIGI(+)", "I(-)", "SIGI(-)"]
1226
+ hkldata.df[hkldata.df.columns.intersection(o_labs)] /= scaling.k_overall**2
1227
+ else:
1228
+ o_labs = ["FP", "SIGFP", "F(+)","SIGF(+)", "F(-)", "SIGF(-)"]
1229
+ hkldata.df[hkldata.df.columns.intersection(o_labs)] /= scaling.k_overall
1230
+ for lab in fc_labs: hkldata.df[lab] *= k_iso
1231
+ # total Fc
1232
+ hkldata.df["FC"] = hkldata.df[fc_labs].sum(axis=1)
1233
+ return scaling
1234
+ # bulk_solvent_and_lsq_scales()
1235
+
1236
+ def calculate_maps(hkldata, b_aniso, centric_and_selections, fc_labs, D_labs, log_out, use="all"):
1237
+ nmodels = len(fc_labs)
1238
+ hkldata.df["FWT"] = 0j * numpy.nan
1239
+ hkldata.df["DELFWT"] = 0j * numpy.nan
1240
+ hkldata.df["FOM"] = numpy.nan
1241
+ hkldata.df["X"] = numpy.nan # for FOM
1242
+ has_ano = "F(+)" in hkldata.df and "F(-)" in hkldata.df
1243
+ if has_ano:
1244
+ hkldata.df["FAN"] = 0j * numpy.nan
1245
+ stats_data = []
1246
+ k_ani = hkldata.debye_waller_factors(b_cart=b_aniso)
1247
+ Ds = numpy.vstack([hkldata.df[lab].to_numpy() for lab in D_labs]).T
1248
+ Fcs = numpy.vstack([hkldata.df[lab].to_numpy() for lab in fc_labs]).T
1249
+ DFc = (Ds * Fcs).sum(axis=1)
1250
+ hkldata.df["DFC"] = DFc
1251
+ for i_bin, idxes in hkldata.binned():
1252
+ bin_d_min = hkldata.binned_df.d_min[i_bin]
1253
+ bin_d_max = hkldata.binned_df.d_max[i_bin]
1254
+ # 0: acentric 1: centric
1255
+ mean_fom = [numpy.nan, numpy.nan]
1256
+ nrefs = [0, 0]
1257
+ for c, work, test in centric_and_selections[i_bin]:
1258
+ cidxes = numpy.concatenate([work, test])
1259
+ S = hkldata.df["S"].to_numpy()[cidxes]
1260
+ expip = numpy.exp(numpy.angle(DFc[cidxes])*1j)
1261
+ Fo = hkldata.df.FP.to_numpy()[cidxes] / k_ani[cidxes]
1262
+ SigFo = hkldata.df.SIGFP.to_numpy()[cidxes] / k_ani[cidxes]
1263
+ epsilon = hkldata.df.epsilon.to_numpy()[cidxes]
1264
+ nrefs[c] = numpy.sum(numpy.isfinite(Fo))
1265
+ DFc_abs = numpy.abs(DFc[cidxes])
1266
+ if c == 0:
1267
+ Sigma = 2 * SigFo**2 + epsilon * S
1268
+ X = 2 * Fo * DFc_abs / Sigma
1269
+ m = gemmi.bessel_i1_over_i0(X)
1270
+ hkldata.df.loc[cidxes, "FWT"] = (2 * m * Fo - DFc_abs) * expip
1271
+ else:
1272
+ Sigma = SigFo**2 + epsilon * S
1273
+ X = Fo * DFc_abs / Sigma
1274
+ m = numpy.tanh(X)
1275
+ hkldata.df.loc[cidxes, "FWT"] = (m * Fo) * expip
1276
+
1277
+ hkldata.df.loc[cidxes, "DELFWT"] = (m * Fo - DFc_abs) * expip
1278
+ hkldata.df.loc[cidxes, "FOM"] = m
1279
+ hkldata.df.loc[cidxes, "X"] = X
1280
+ if has_ano:
1281
+ Fo_dano = (hkldata.df["F(+)"].to_numpy()[cidxes] - hkldata.df["F(-)"].to_numpy()[cidxes]) / k_ani[cidxes]
1282
+ hkldata.df.loc[cidxes, "FAN"] = m * Fo_dano * expip / 2j
1283
+ if nrefs[c] > 0: mean_fom[c] = numpy.nanmean(m)
1284
+
1285
+ # remove reflections that should be hidden
1286
+ if use != "all":
1287
+ # usually use == "work"
1288
+ tohide = test if use == "work" else work
1289
+ hkldata.df.loc[tohide, "FWT"] = 0j * numpy.nan
1290
+ hkldata.df.loc[tohide, "DELFWT"] = 0j * numpy.nan
1291
+ fill_sel = numpy.isnan(hkldata.df["FWT"][cidxes].to_numpy())
1292
+ hkldata.df.loc[cidxes[fill_sel], "FWT"] = DFc[cidxes][fill_sel]
1293
+
1294
+ Fc = hkldata.df.FC.to_numpy()[idxes] * k_ani[idxes]
1295
+ Fo = hkldata.df.FP.to_numpy()[idxes]
1296
+ mean_DFc2 = numpy.nanmean(numpy.abs((Ds[idxes,:] * Fcs[idxes,:]).sum(axis=1) * k_ani[idxes])**2)
1297
+ mean_log_DFcs = numpy.log(numpy.nanmean(numpy.abs(Ds[idxes,:] * Fcs[idxes,:] * k_ani[idxes,None]), axis=0)).tolist()
1298
+ mean_Ds = numpy.nanmean(Ds[idxes,:], axis=0).tolist()
1299
+ if sum(nrefs) > 0:
1300
+ r = numpy.nansum(numpy.abs(numpy.abs(Fc)-Fo)) / numpy.nansum(Fo)
1301
+ cc = utils.hkl.correlation(Fo, numpy.abs(Fc))
1302
+ mean_Fo2 = numpy.nanmean(numpy.abs(Fo)**2)
1303
+ else:
1304
+ r, cc, mean_Fo2 = numpy.nan, numpy.nan, numpy.nan
1305
+ stats_data.append([i_bin, nrefs[0], nrefs[1], bin_d_max, bin_d_min,
1306
+ numpy.log(mean_Fo2),
1307
+ numpy.log(numpy.nanmean(numpy.abs(Fc)**2)),
1308
+ numpy.log(mean_DFc2),
1309
+ numpy.log(numpy.mean(hkldata.df["S"].to_numpy()[idxes])),
1310
+ mean_fom[0], mean_fom[1], r, cc] + mean_Ds + mean_log_DFcs)
1311
+
1312
+ DFc_labs = ["log(Mn(|{}{}|))".format(dl,fl) for dl,fl in zip(D_labs, fc_labs)]
1313
+ cols = ["bin", "n_a", "n_c", "d_max", "d_min",
1314
+ "log(Mn(|Fo|^2))", "log(Mn(|Fc|^2))", "log(Mn(|DFc|^2))",
1315
+ "log(Sigma)", "FOM_a", "FOM_c", "R", "CC(|Fo|,|Fc|)"] + D_labs + DFc_labs
1316
+ stats = pandas.DataFrame(stats_data, columns=cols)
1317
+ title_labs = [["log(Mn(|F|^2)) and variances", ["log(Mn(|Fo|^2))", "log(Mn(|Fc|^2))", "log(Mn(|DFc|^2))", "log(Sigma)"]],
1318
+ ["FOM", ["FOM_a", "FOM_c"]],
1319
+ ["D", D_labs],
1320
+ ["DFc", DFc_labs],
1321
+ ["R-factor", ["R"]],
1322
+ ["CC", ["CC(|Fo|,|Fc|)"]],
1323
+ ["number of reflections", ["n_a", "n_c"]]]
1324
+ with open(log_out, "w") as ofs:
1325
+ ofs.write(utils.make_loggraph_str(stats, main_title="Statistics",
1326
+ title_labs=title_labs,
1327
+ s2=1/stats["d_min"]**2))
1328
+ logger.writeln("output log: {}".format(log_out))
1329
+ # calculate_maps()
1330
+
1331
+ def main(args):
1332
+ n_per_bin = {"all": 500, "work": 500, "test": 50}[args.use]
1333
+ try:
1334
+ hkldata, sts, fc_labs, centric_and_selections,free = process_input(hklin=args.hklin,
1335
+ labin=args.labin.split(",") if args.labin else None,
1336
+ n_bins=args.nbins,
1337
+ free=args.free,
1338
+ xyzins=sum(args.model, []),
1339
+ source=args.source,
1340
+ d_max=args.d_max,
1341
+ d_min=args.d_min,
1342
+ n_per_bin=n_per_bin,
1343
+ use=args.use,
1344
+ max_bins=30,
1345
+ keep_charges=args.keep_charges)
1346
+ except RuntimeError as e:
1347
+ raise SystemExit("Error: {}".format(e))
1348
+
1349
+ is_int = "I" in hkldata.df
1350
+
1351
+ if args.mask:
1352
+ mask = utils.fileio.read_ccp4_map(args.mask)[0]
1353
+ else:
1354
+ mask = None
1355
+
1356
+ # Overall scaling & bulk solvent
1357
+ # FP/SIGFP will be scaled. Total FC will be added.
1358
+ lsq = bulk_solvent_and_lsq_scales(hkldata, sts, fc_labs, use_solvent=not args.no_solvent,
1359
+ use_int=is_int, mask=mask)
1360
+ b_aniso = lsq.b_aniso
1361
+ # stats
1362
+ stats, overall = calc_r_and_cc(hkldata, centric_and_selections)
1363
+ for lab in "R", "CC":
1364
+ logger.writeln(" ".join("{} = {:.4f}".format(x, overall[x]) for x in overall if x.startswith(lab)))
1365
+
1366
+ # Estimate ML parameters
1367
+ D_labs = ["D{}".format(i) for i in range(len(fc_labs))]
1368
+
1369
+ if args.use_cc:
1370
+ assert not is_int
1371
+ logger.writeln("Estimating sigma-A parameters from CC..")
1372
+ determine_mlf_params_from_cc(hkldata, fc_labs, D_labs, centric_and_selections, args.use)
1373
+ else:
1374
+ b_aniso = determine_ml_params(hkldata, is_int, fc_labs, D_labs, b_aniso, centric_and_selections, args.D_trans, args.S_trans, args.use)
1375
+ if is_int:
1376
+ calculate_maps_int(hkldata, b_aniso, fc_labs, D_labs, centric_and_selections,
1377
+ use={"all": "all", "work": "work", "test": "work"}[args.use])
1378
+ else:
1379
+ log_out = "{}.log".format(args.output_prefix)
1380
+ calculate_maps(hkldata, b_aniso, centric_and_selections, fc_labs, D_labs, log_out,
1381
+ use={"all": "all", "work": "work", "test": "work"}[args.use])
1382
+
1383
+ # Write mtz file
1384
+ if is_int:
1385
+ labs = ["I", "SIGI", "FOM"]
1386
+ else:
1387
+ labs = ["FP", "SIGFP", "FOM"]
1388
+ labs.extend(["FWT", "DELFWT", "FC", "DFC"])
1389
+ if "FAN" in hkldata.df:
1390
+ labs.append("FAN")
1391
+ if not args.no_solvent:
1392
+ labs.append("Fbulk")
1393
+ if "FREE" in hkldata.df:
1394
+ labs.append("FREE")
1395
+ labs += D_labs + ["S"]
1396
+ mtz_out = args.output_prefix+".mtz"
1397
+ hkldata.write_mtz(mtz_out, labs=labs, types={"FOM": "W", "FP":"F", "SIGFP":"Q"})
1398
+ return hkldata
1399
+ # main()
1400
+ if __name__ == "__main__":
1401
+ import sys
1402
+ args = parse_args(sys.argv[1:])
1403
+ main(args)