servalcat 0.4.99__cp313-cp313-macosx_10_14_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of servalcat might be problematic. Click here for more details.

Files changed (45) hide show
  1. servalcat/__init__.py +10 -0
  2. servalcat/__main__.py +120 -0
  3. servalcat/ext.cpython-313-darwin.so +0 -0
  4. servalcat/refine/__init__.py +0 -0
  5. servalcat/refine/cgsolve.py +100 -0
  6. servalcat/refine/refine.py +906 -0
  7. servalcat/refine/refine_geom.py +233 -0
  8. servalcat/refine/refine_spa.py +366 -0
  9. servalcat/refine/refine_xtal.py +281 -0
  10. servalcat/refine/spa.py +144 -0
  11. servalcat/refine/xtal.py +276 -0
  12. servalcat/refmac/__init__.py +0 -0
  13. servalcat/refmac/exte.py +182 -0
  14. servalcat/refmac/refmac_keywords.py +639 -0
  15. servalcat/refmac/refmac_wrapper.py +395 -0
  16. servalcat/spa/__init__.py +0 -0
  17. servalcat/spa/fofc.py +479 -0
  18. servalcat/spa/fsc.py +385 -0
  19. servalcat/spa/localcc.py +188 -0
  20. servalcat/spa/realspcc_from_var.py +128 -0
  21. servalcat/spa/run_refmac.py +977 -0
  22. servalcat/spa/shift_maps.py +293 -0
  23. servalcat/spa/shiftback.py +137 -0
  24. servalcat/spa/translate.py +129 -0
  25. servalcat/utils/__init__.py +35 -0
  26. servalcat/utils/commands.py +1547 -0
  27. servalcat/utils/fileio.py +744 -0
  28. servalcat/utils/generate_operators.py +296 -0
  29. servalcat/utils/hkl.py +714 -0
  30. servalcat/utils/logger.py +140 -0
  31. servalcat/utils/maps.py +345 -0
  32. servalcat/utils/model.py +782 -0
  33. servalcat/utils/refmac.py +760 -0
  34. servalcat/utils/restraints.py +781 -0
  35. servalcat/utils/symmetry.py +295 -0
  36. servalcat/xtal/__init__.py +0 -0
  37. servalcat/xtal/french_wilson.py +258 -0
  38. servalcat/xtal/run_refmac_small.py +240 -0
  39. servalcat/xtal/sigmaa.py +1644 -0
  40. servalcat/xtal/twin.py +121 -0
  41. servalcat-0.4.99.dist-info/METADATA +55 -0
  42. servalcat-0.4.99.dist-info/RECORD +45 -0
  43. servalcat-0.4.99.dist-info/WHEEL +5 -0
  44. servalcat-0.4.99.dist-info/entry_points.txt +4 -0
  45. servalcat-0.4.99.dist-info/licenses/LICENSE +373 -0
@@ -0,0 +1,1644 @@
1
+ """
2
+ Author: "Keitaro Yamashita, Garib N. Murshudov"
3
+ MRC Laboratory of Molecular Biology
4
+
5
+ This software is released under the
6
+ Mozilla Public License, version 2.0; see LICENSE.
7
+ """
8
+ from __future__ import absolute_import, division, print_function, generators
9
+ import argparse
10
+ import gemmi
11
+ import numpy
12
+ import pandas
13
+ import itertools
14
+ import time
15
+ import scipy.special
16
+ import scipy.optimize
17
+ from servalcat.utils import logger
18
+ from servalcat import utils
19
+ from servalcat import ext
20
+ from servalcat.xtal.twin import find_twin_domains_from_data, estimate_twin_fractions_from_model
21
+
22
+ """
23
+ DFc = sum_j D_j F_c,j
24
+ The last Fc,n is bulk solvent contribution.
25
+ """
26
+
27
+ integr = ext.IntensityIntegrator()
28
+
29
+ def add_arguments(parser):
30
+ parser.description = 'Sigma-A parameter estimation for crystallographic data'
31
+ parser.add_argument('--hklin', required=True,
32
+ help='Input MTZ file')
33
+ parser.add_argument('--spacegroup',
34
+ help='Override space group')
35
+ parser.add_argument('--labin',
36
+ help='MTZ column for F,SIGF,FREE')
37
+ parser.add_argument('--free', type=int,
38
+ help='flag number for test set')
39
+ parser.add_argument('--model', required=True, nargs="+", action="append",
40
+ help='Input atomic model file(s)')
41
+ parser.add_argument("-d", '--d_min', type=float)
42
+ parser.add_argument('--d_max', type=float)
43
+ parser.add_argument('--nbins', type=int,
44
+ help="Number of bins (default: auto)")
45
+ parser.add_argument('-s', '--source', choices=["electron", "xray", "neutron"], required=True,
46
+ help="Scattering factor choice")
47
+ parser.add_argument('--D_trans', choices=["exp", "splus"],
48
+ help="estimate D with positivity constraint")
49
+ parser.add_argument('--S_trans', choices=["exp", "splus"],
50
+ help="estimate variance of unexplained signal with positivity constraint")
51
+ parser.add_argument('--no_solvent', action='store_true',
52
+ help="Do not consider bulk solvent contribution")
53
+ parser.add_argument('--use_cc', action='store_true',
54
+ help="Use CC(|F1|,|F2|) to CC(F1,F2) conversion to derive D and S")
55
+ parser.add_argument('--use', choices=["all", "work", "test"], default="all",
56
+ help="Which reflections to be used for the parameter estimate.")
57
+ parser.add_argument('--twin', action="store_true", help="Turn on twin refinement")
58
+ parser.add_argument('--mask',
59
+ help="A solvent mask (by default calculated from the coordinates)")
60
+ parser.add_argument('--keep_charges', action='store_true',
61
+ help="Use scattering factor for charged atoms. Use it with care.")
62
+ parser.add_argument('-o','--output_prefix', default="sigmaa",
63
+ help='output file name prefix (default: %(default)s)')
64
+ # add_arguments()
65
+
66
+ def parse_args(arg_list):
67
+ parser = argparse.ArgumentParser()
68
+ add_arguments(parser)
69
+ return parser.parse_args(arg_list)
70
+ # parse_args()
71
+
72
+ def nanaverage(cc, w):
73
+ sel = ~numpy.isnan(cc)
74
+ if numpy.sum(w[sel]) == 0:
75
+ return numpy.nan
76
+ return numpy.average(cc[sel], weights=w[sel])
77
+
78
+ def calc_r_and_cc(hkldata, centric_and_selections, twin_data=None):
79
+ has_int = "I" in hkldata.df
80
+ has_free = "FREE" in hkldata.df
81
+ stats = hkldata.binned_df.copy()
82
+ stats[["n_obs", "n_all"]] = 0
83
+ if has_free:
84
+ stats[["n_work", "n_free"]] = 0
85
+ rlab = "R1" if has_int else "R"
86
+ cclab = "CCI" if has_int else "CCF"
87
+ if twin_data:
88
+ Fc = numpy.sqrt(twin_data.i_calc_twin())
89
+ else:
90
+ Fc = numpy.abs(hkldata.df.FC * hkldata.df.k_aniso)
91
+ if has_int:
92
+ obs = hkldata.df.I
93
+ obs_sqrt = numpy.sqrt(numpy.maximum(0, hkldata.df.I))
94
+ obs_sqrt[hkldata.df.I/hkldata.df.SIGI < 2] = numpy.nan # SHELX equivalent
95
+ calc = Fc**2
96
+ calc_sqrt = Fc
97
+ else:
98
+ obs = obs_sqrt = hkldata.df.FP
99
+ calc = calc_sqrt = Fc
100
+ if has_free:
101
+ for lab in (cclab, rlab):
102
+ for suf in ("work", "free"):
103
+ stats[lab+suf] = numpy.nan
104
+ else:
105
+ stats[cclab] = numpy.nan
106
+ stats[rlab] = numpy.nan
107
+
108
+ for i_bin, idxes in hkldata.binned():
109
+ stats.loc[i_bin, "n_obs"] = numpy.sum(numpy.isfinite(obs[idxes]))
110
+ stats.loc[i_bin, "n_all"] = len(idxes)
111
+ if has_free:
112
+ for j, suf in ((1, "work"), (2, "free")):
113
+ idxes2 = numpy.concatenate([sel[j] for sel in centric_and_selections[i_bin]])
114
+ stats.loc[i_bin, "n_"+suf] = numpy.sum(numpy.isfinite(obs[idxes2]))
115
+ stats.loc[i_bin, cclab+suf] = utils.hkl.correlation(obs[idxes2], calc[idxes2])
116
+ stats.loc[i_bin, rlab+suf] = utils.hkl.r_factor(obs_sqrt[idxes2], calc_sqrt[idxes2])
117
+ else:
118
+ stats.loc[i_bin, cclab] = utils.hkl.correlation(obs[idxes], calc[idxes])
119
+ stats.loc[i_bin, rlab] = utils.hkl.r_factor(obs_sqrt[idxes], calc_sqrt[idxes])
120
+
121
+ # Overall
122
+ ret = {}
123
+ if has_free:
124
+ for suf in ("work", "free"):
125
+ ret[cclab+suf+"avg"] = nanaverage(stats[cclab+suf], stats["n_"+suf])
126
+ for j, suf in ((1, "work"), (2, "free")):
127
+ idxes = numpy.concatenate([sel[j] for i_bin, _ in hkldata.binned() for sel in centric_and_selections[i_bin]])
128
+ ret[rlab+suf] = utils.hkl.r_factor(obs_sqrt[idxes], calc_sqrt[idxes])
129
+ else:
130
+ ret[cclab+"avg"] = nanaverage(stats[cclab], stats["n_obs"])
131
+ ret[rlab] = utils.hkl.r_factor(obs, calc)
132
+
133
+ return stats, ret
134
+ # calc_r_and_cc()
135
+
136
+ class VarTrans:
137
+ def __init__(self, D_trans, S_trans):
138
+ # splus (softplus) appears to be better than exp
139
+ # exp sometimes results in too large parameter value
140
+ trans_funcs = {"exp": (numpy.exp, # D = f(x)
141
+ numpy.exp, # dD/dx
142
+ numpy.log), # x = f^-1(D)
143
+ "splus": (lambda x: numpy.logaddexp(0, x),
144
+ scipy.special.expit, # lambda x: 1. / (1. + numpy.exp(-x))
145
+ lambda x: x + numpy.log(-numpy.expm1(-x))),
146
+ None: (lambda x: x,
147
+ lambda x: 1,
148
+ lambda x: x)}
149
+
150
+ self.D, self.D_deriv, self.D_inv = trans_funcs[D_trans]
151
+ self.S, self.S_deriv, self.S_inv = trans_funcs[S_trans]
152
+ # class VarTrans
153
+
154
+ class LsqScale:
155
+ # parameter x = [k_overall, adp_pars, k_sol, B_sol]
156
+ def __init__(self, k_as_exp=False, func_type="log_cosh"):
157
+ assert func_type in ("sq", "log_cosh")
158
+ self.k_trans = lambda x: numpy.exp(x) if k_as_exp else x
159
+ self.k_trans_der = lambda x: numpy.exp(x) if k_as_exp else 1
160
+ self.k_trans_inv = lambda x: numpy.log(x) if k_as_exp else x
161
+ self.func_type = func_type
162
+ self.reset()
163
+
164
+ def reset(self):
165
+ self.k_sol = 0.35 # same default as gemmi/scaling.hpp # refmac seems to use 0.33 and 100? SCALE_LS_PART
166
+ self.b_sol = 46.
167
+ self.k_overall = None
168
+ self.b_iso = None
169
+ self.b_aniso = None
170
+ self.stats = {}
171
+
172
+ def set_data(self, hkldata, fc_list, use_int=False, sigma_cutoff=None, twin_data=None):
173
+ assert 0 < len(fc_list) < 3
174
+ self.use_int = use_int
175
+ if sigma_cutoff is not None:
176
+ if use_int:
177
+ self.sel = hkldata.df.I / hkldata.df.SIGI > sigma_cutoff
178
+ self.labcut = "(I/SIGI>{})".format(sigma_cutoff)
179
+ else:
180
+ self.sel = hkldata.df.FP / hkldata.df.SIGFP > sigma_cutoff
181
+ self.labcut = "(F/SIGF>{})".format(sigma_cutoff)
182
+ else:
183
+ self.sel = hkldata.df.index
184
+ self.labcut = ""
185
+ self.obs = hkldata.df["I" if use_int else "FP"].to_numpy(copy=True)
186
+ self.obs[~self.sel] = numpy.nan
187
+ self.calc = [x for x in fc_list]
188
+ self.s2mat = hkldata.ssq_mat()
189
+ self.s2 = 1. / hkldata.d_spacings().to_numpy()**2
190
+ self.adpdirs = utils.model.adp_constraints(hkldata.sg.operations(), hkldata.cell, tr0=False)
191
+ self.twin_data = twin_data
192
+ if use_int:
193
+ self.sqrt_obs = numpy.sqrt(self.obs)
194
+
195
+ def get_solvent_scale(self, k_sol, b_sol, s2=None):
196
+ if s2 is None: s2 = self.s2
197
+ return k_sol * numpy.exp(-b_sol * s2 / 4)
198
+
199
+ def fc_and_mask_grad(self, x):
200
+ fc0 = self.calc[0]
201
+ if len(self.calc) == 2:
202
+ if self.twin_data:
203
+ r = self.twin_data.scaling_fc_and_mask_grad(self.calc[1], x[-2], x[-1])
204
+ return r[:,0], r[:,1], r[:,2]
205
+ else:
206
+ fmask = self.calc[1]
207
+ temp_sol = numpy.exp(-x[-1] * self.s2 / 4)
208
+ fbulk = x[-2] * temp_sol * fmask
209
+ fc = fc0 + fbulk
210
+ re_fmask_fcconj = (fmask * fc.conj()).real
211
+ fc_abs = numpy.abs(fc)
212
+ tmp = temp_sol / fc_abs * re_fmask_fcconj
213
+ return fc_abs, tmp, -tmp * x[-2] * self.s2 / 4
214
+ else:
215
+ if self.twin_data:
216
+ return numpy.sqrt(self.twin_data.i_calc_twin()), None, None
217
+ else:
218
+ return numpy.abs(fc0), None, None
219
+
220
+ def scaled_fc(self, x):
221
+ fc = self.fc_and_mask_grad(x)[0]
222
+ nadp = self.adpdirs.shape[0]
223
+ B = numpy.dot(x[1:nadp+1], self.adpdirs)
224
+ kani = numpy.exp(numpy.dot(-B, self.s2mat))
225
+ return self.k_trans(x[0]) * kani * fc
226
+
227
+ def target(self, x):
228
+ y = self.scaled_fc(x)
229
+ if self.use_int:
230
+ diff = self.sqrt_obs - y
231
+ #y2 = y**2
232
+ #diff = self.obs - y2
233
+ else:
234
+ diff = self.obs - y
235
+
236
+ if self.func_type == "sq":
237
+ return numpy.nansum(diff**2)
238
+ elif self.func_type == "log_cosh":
239
+ return numpy.nansum(gemmi.log_cosh(diff))
240
+ else:
241
+ raise RuntimeError("bad func_type")
242
+
243
+ def grad(self, x):
244
+ g = numpy.zeros_like(x)
245
+ fc_abs, der_ksol, der_bsol = self.fc_and_mask_grad(x)
246
+ nadp = self.adpdirs.shape[0]
247
+ B = numpy.dot(x[1:nadp+1], self.adpdirs)
248
+ kani = numpy.exp(numpy.dot(-B, self.s2mat))
249
+ k = self.k_trans(x[0])
250
+ y = k * kani * fc_abs
251
+ if self.use_int:
252
+ diff = self.sqrt_obs - y
253
+ diff_der = -1
254
+ #diff = self.obs - y**2
255
+ #diff_der = -2 * y
256
+ else:
257
+ diff = self.obs - y
258
+ diff_der = -1
259
+ if self.func_type == "sq":
260
+ dfdy = 2 * diff * diff_der
261
+ elif self.func_type == "log_cosh":
262
+ dfdy = numpy.tanh(diff) * diff_der
263
+ else:
264
+ raise RuntimeError("bad func_type")
265
+
266
+ dfdb = numpy.nansum(-self.s2mat * k * fc_abs * kani * dfdy, axis=1)
267
+ g[0] = numpy.nansum(kani * fc_abs * dfdy * self.k_trans_der(x[0]))
268
+ g[1:nadp+1] = numpy.dot(dfdb, self.adpdirs.T)
269
+ if len(self.calc) == 2:
270
+ g[-2] = numpy.nansum(k * kani * der_ksol * dfdy)
271
+ g[-1] = numpy.nansum(k * kani * der_bsol * dfdy)
272
+
273
+ return g
274
+
275
+ def calc_shift(self, x):
276
+ # TODO: sort out code duplication, if we use this.
277
+ g = numpy.zeros((len(self.obs), len(x)))
278
+ H = numpy.zeros((len(x), len(x)))
279
+ fc_abs, der_ksol, der_bsol = self.fc_and_mask_grad(x)
280
+ nadp = self.adpdirs.shape[0]
281
+ B = numpy.dot(x[1:nadp+1], self.adpdirs)
282
+ kani = numpy.exp(numpy.dot(-B, self.s2mat))
283
+ k = self.k_trans(x[0])
284
+ y = k * kani * fc_abs
285
+ if self.use_int:
286
+ diff = self.sqrt_obs - y
287
+ diff_der = -1
288
+ diff_der2 = 0
289
+ else:
290
+ diff = self.obs - y
291
+ diff_der = -1.
292
+ diff_der2 = 0.
293
+
294
+ if self.func_type == "sq":
295
+ dfdy = 2 * diff * diff_der
296
+ dfdy2 = 2 * diff_der**2 + 2 * diff * diff_der2
297
+ elif self.func_type == "log_cosh":
298
+ dfdy = numpy.tanh(diff) * diff_der
299
+ #dfdy2 = 1 /numpy.cosh(diff)**2 * diff_der**2 + numpy.tanh(diff) * diff_der2 # problematic with large diff
300
+ #dfdy2 = numpy.where(diff==0, 1., numpy.abs(numpy.tanh(diff)) / gemmi.log_cosh(diff)) * diff_der**2 + numpy.tanh(diff) * diff_der2
301
+ dfdy2 = numpy.where(diff==0, 1., numpy.tanh(diff) / diff) * diff_der**2 + numpy.tanh(diff) * diff_der2
302
+ else:
303
+ raise RuntimeError("bad func_type")
304
+
305
+ dfdb = -self.s2mat * k * fc_abs * kani
306
+ g[:,0] = kani * fc_abs * self.k_trans_der(x[0])
307
+ g[:,1:nadp+1] = numpy.dot(dfdb.T, self.adpdirs.T)
308
+ if len(self.calc) == 2:
309
+ g[:,-2] = k * kani * der_ksol
310
+ g[:,-1] = k * kani * der_bsol
311
+
312
+ # no numpy.nandot..
313
+ g, dfdy, dfdy2 = g[self.sel, :], dfdy[self.sel], dfdy2[self.sel]
314
+ H = numpy.dot(g.T, g * dfdy2[:,None])
315
+ g = numpy.sum(dfdy[:,None] * g, axis=0)
316
+ dx = -numpy.dot(g, numpy.linalg.pinv(H))
317
+ return dx
318
+
319
+ def initial_kb(self):
320
+ fc_abs = self.fc_and_mask_grad([self.k_sol, self.b_sol])[0]
321
+ sel = self.obs > 0 # exclude nan as well
322
+ f1p, f2p, s2p = self.obs[sel], fc_abs[sel], self.s2[sel]
323
+ if self.use_int: f2p *= f2p
324
+ tmp = numpy.log(f2p) - numpy.log(f1p)
325
+ # g = [dT/dk, dT/db]
326
+ g = numpy.array([2 * numpy.sum(tmp), -numpy.sum(tmp*s2p)/2])
327
+ H = numpy.zeros((2,2))
328
+ H[0,0] = 2*len(f1p)
329
+ H[1,1] = numpy.sum(s2p**2/8)
330
+ H[0,1] = H[1,0] = -numpy.sum(s2p)/2
331
+ x = -numpy.dot(numpy.linalg.inv(H), g)
332
+ if self.use_int: x /= 2
333
+ k = numpy.exp(x[0])
334
+ b = x[1]
335
+ logger.writeln(" initial k,b = {:.2e} {:.2e}".format(k, b))
336
+ logger.writeln(" R{} = {:.4f}".format(self.labcut, utils.hkl.r_factor(f1p, f2p * k * numpy.exp(-b*self.s2[sel]/4))))
337
+ return k, b
338
+
339
+ def scale(self):
340
+ use_sol = len(self.calc) == 2
341
+ msg = "Scaling Fc to {} {} bulk solvent contribution".format("Io" if self.use_int else "Fo",
342
+ "with" if use_sol else "without")
343
+ logger.writeln(msg)
344
+ if self.k_overall is None or self.b_iso is None:
345
+ k, b = self.initial_kb()
346
+ else:
347
+ k, b = self.k_overall, self.b_iso
348
+ if self.b_aniso is None:
349
+ self.b_aniso = gemmi.SMat33d(b,b,b,0,0,0)
350
+ x0 = [self.k_trans_inv(k)]
351
+ bounds = [(0, None)]
352
+ x0.extend(numpy.dot(self.b_aniso.elements_pdb(), self.adpdirs.T))
353
+ bounds.extend([(None, None)]*(len(x0)-1))
354
+ if use_sol:
355
+ x0.extend([self.k_sol, self.b_sol])
356
+ bounds.extend([(1e-4, None), (10., 400.)])
357
+ if 0:
358
+ f0 = self.target(x0)
359
+ ader = self.grad(x0)
360
+ e = 1e-4
361
+ nder = []
362
+ for i in range(len(x0)):
363
+ x = numpy.copy(x0)
364
+ x[i] += e
365
+ f1 = self.target(x)
366
+ nder.append((f1 - f0) / e)
367
+ print("ADER NDER RATIO")
368
+ print(ader)
369
+ print(nder)
370
+ print(ader / nder)
371
+ quit()
372
+
373
+ t0 = time.time()
374
+ if 1:
375
+ x = x0
376
+ for i in range(40):
377
+ x_ini = x.copy()
378
+ f0 = self.target(x)
379
+ dx = self.calc_shift(x)
380
+ if numpy.max(numpy.abs(dx)) < 1e-6:
381
+ break
382
+ for s in (1, 0.5, 0.25):
383
+ if 0:
384
+ with open("debug.dat", "w") as ofs:
385
+ for s in numpy.linspace(-2, 2, 100):
386
+ f1 = self.target(x+dx * s)
387
+ #print(dx, f0, f1, f0 - f1)
388
+ ofs.write("{:4e} {:4e}\n".format(s, f1))
389
+ shift = dx * s
390
+ x = x_ini + shift
391
+ if x[0] < 0: x[0] = x0[0]
392
+ if use_sol:
393
+ if x[-1] < 10: x[-1] = 10
394
+ elif x[-1] > 400: x[-1] = 400
395
+ if x[-2] < 1e-4: x[-2] = 1e-4
396
+ f1 = self.target(x)
397
+ if f1 < f0: break
398
+ #logger.writeln("cycle {} {} {} {} {} {}".format(i, f0, f1, s, shift, (f0 - f1) / f0))
399
+ if 0 < (f0 - f1) / f0 < 1e-6:
400
+ break
401
+ res_x = x
402
+ self.stats["fun"] = f1
403
+ self.stats["x"] = x
404
+ else:
405
+ res = scipy.optimize.minimize(fun=self.target, x0=x0, jac=self.grad, bounds=bounds)
406
+ #logger.writeln(str(res))
407
+ logger.writeln(" finished in {} iterations ({} evaluations)".format(res.nit, res.nfev))
408
+ res_x = res.x
409
+ self.stats["fun"] = res.fun
410
+ self.stats["x"] = res.x
411
+ logger.writeln(" time: {:.3f} sec".format(time.time() - t0))
412
+ self.k_overall = self.k_trans(res_x[0])
413
+ nadp = self.adpdirs.shape[0]
414
+ b_overall = gemmi.SMat33d(*numpy.dot(res_x[1:nadp+1], self.adpdirs))
415
+ self.b_iso = b_overall.trace() / 3
416
+ self.b_aniso = b_overall.added_kI(-self.b_iso) # subtract isotropic contribution
417
+
418
+ logger.writeln(" k_ov= {:.2e} B_iso= {:.2e} B_aniso= {}".format(self.k_overall, self.b_iso, self.b_aniso))
419
+ if use_sol:
420
+ self.k_sol = res_x[-2]
421
+ self.b_sol = res_x[-1]
422
+ logger.writeln(" k_sol= {:.2e} B_sol= {:.2e}".format(self.k_sol, self.b_sol))
423
+ calc = self.scaled_fc(res_x)
424
+ if self.use_int: calc *= calc
425
+ self.stats["cc"] = utils.hkl.correlation(self.obs, calc)
426
+ self.stats["r"] = utils.hkl.r_factor(self.obs, calc)
427
+ logger.writeln(" CC{} = {:.4f}".format(self.labcut, self.stats["cc"]))
428
+ logger.writeln(" R{} = {:.4f}".format(self.labcut, self.stats["r"]))
429
+ # class LsqScale
430
+
431
+ def calc_abs_DFc(Ds, Fcs):
432
+ DFc = sum(Ds[i] * Fcs[i] for i in range(len(Ds)))
433
+ return numpy.abs(DFc)
434
+ # calc_abs_DFc()
435
+
436
+ #import line_profiler
437
+ #profile = line_profiler.LineProfiler()
438
+ #import atexit
439
+ #atexit.register(profile.print_stats)
440
+ #@profile
441
+ def mlf(df, fc_labs, Ds, S, k_ani, idxes):
442
+ Fcs = numpy.vstack([df[lab].to_numpy()[idxes] for lab in fc_labs]).T
443
+ DFc = (Ds * Fcs).sum(axis=1)
444
+ ll = numpy.nansum(ext.ll_amp(df.FP.to_numpy()[idxes], df.SIGFP.to_numpy()[idxes],
445
+ k_ani[idxes], S * df.epsilon.to_numpy()[idxes],
446
+ numpy.abs(DFc), df.centric.to_numpy()[idxes]+1))
447
+ return numpy.nansum(ll)
448
+ # mlf()
449
+
450
+ #@profile
451
+ def deriv_mlf_wrt_D_S(df, fc_labs, Ds, S, k_ani, idxes):
452
+ Fcs = [df[lab].to_numpy()[idxes] for lab in fc_labs]
453
+ r = ext.ll_amp_der1_DS(df.FP.to_numpy()[idxes], df.SIGFP.to_numpy()[idxes], k_ani[idxes], S,
454
+ numpy.vstack(Fcs).T, Ds,
455
+ df.centric.to_numpy()[idxes]+1, df.epsilon.to_numpy()[idxes])
456
+ g = numpy.zeros(len(fc_labs)+1)
457
+ g[:len(fc_labs)] = numpy.nansum(r[:,:len(fc_labs)], axis=0) # D
458
+ g[-1] = numpy.nansum(r[:,-1]) # S
459
+ return g
460
+ # deriv_mlf_wrt_D_S()
461
+
462
+ #@profile
463
+ def mlf_shift_S(df, fc_labs, Ds, S, k_ani, idxes):
464
+ Fcs = [df[lab].to_numpy()[idxes] for lab in fc_labs]
465
+ r = ext.ll_amp_der1_DS(df.FP.to_numpy()[idxes], df.SIGFP.to_numpy()[idxes], k_ani[idxes], S,
466
+ numpy.vstack(Fcs).T, Ds,
467
+ df.centric.to_numpy()[idxes]+1, df.epsilon.to_numpy()[idxes])
468
+ g = numpy.nansum(r[:,-1])
469
+ H = numpy.nansum(r[:,-1]**2) # approximating expectation value of second derivative
470
+ return -g / H
471
+ # mlf_shift_S()
472
+
473
+ def mli(df, fc_labs, Ds, S, k_ani, idxes):
474
+ Fcs = numpy.vstack([df[lab].to_numpy()[idxes] for lab in fc_labs]).T
475
+ DFc = (Ds * Fcs).sum(axis=1)
476
+ ll = integr.ll_int(df.I.to_numpy()[idxes], df.SIGI.to_numpy()[idxes],
477
+ k_ani[idxes], S * df.epsilon.to_numpy()[idxes],
478
+ numpy.abs(DFc), df.centric.to_numpy()[idxes]+1)
479
+ return numpy.nansum(ll)
480
+ # mli()
481
+
482
+ def deriv_mli_wrt_D_S(df, fc_labs, Ds, S, k_ani, idxes):
483
+ Fcs = numpy.vstack([df[lab].to_numpy()[idxes] for lab in fc_labs]).T
484
+ r = integr.ll_int_der1_DS(df.I.to_numpy()[idxes], df.SIGI.to_numpy()[idxes], k_ani[idxes], S,
485
+ Fcs, Ds,
486
+ df.centric.to_numpy()[idxes]+1, df.epsilon.to_numpy()[idxes])
487
+ g = numpy.zeros(len(fc_labs)+1)
488
+ g[:len(fc_labs)] = numpy.nansum(r[:,:len(fc_labs)], axis=0) # D
489
+ g[-1] = numpy.nansum(r[:,-1]) # S
490
+ return g
491
+ # deriv_mli_wrt_D_S()
492
+
493
+ def mli_shift_D(df, fc_labs, Ds, S, k_ani, idxes):
494
+ Fcs = numpy.vstack([df[lab].to_numpy()[idxes] for lab in fc_labs]).T
495
+ r = integr.ll_int_der1_DS(df.I.to_numpy()[idxes], df.SIGI.to_numpy()[idxes], k_ani[idxes], S,
496
+ Fcs, Ds,
497
+ df.centric.to_numpy()[idxes]+1, df.epsilon.to_numpy()[idxes])[:,:len(fc_labs)]
498
+ g = numpy.nansum(r, axis=0)# * trans.D_deriv(x[:len(fc_labs)]) # D
499
+ #tmp = numpy.hstack([r[:,:len(fc_labs)] #* trans.D_deriv(x[:len(fc_labs)]),
500
+ # r[:,-1,None] * trans.S_deriv(x[-1])])
501
+ H = numpy.nansum(numpy.matmul(r[:,:,None], r[:,None]), axis=0)
502
+ return -numpy.dot(g, numpy.linalg.pinv(H))
503
+ # mli_shift_D()
504
+
505
+ def mli_shift_S(df, fc_labs, Ds, S, k_ani, idxes):
506
+ Fcs = numpy.vstack([df[lab].to_numpy()[idxes] for lab in fc_labs]).T
507
+ r = integr.ll_int_der1_DS(df.I.to_numpy()[idxes], df.SIGI.to_numpy()[idxes], k_ani[idxes], S,
508
+ Fcs, Ds,
509
+ df.centric.to_numpy()[idxes]+1, df.epsilon.to_numpy()[idxes])
510
+ g = numpy.nansum(r[:,-1])
511
+ H = numpy.nansum(r[:,-1]**2) # approximating expectation value of second derivative
512
+ return -g / H
513
+ # mli_shift_S()
514
+
515
+ def mltwin_est_ftrue(twin_data, df, k_ani, idxes):
516
+ kani2_inv = 1 / k_ani**2
517
+ i_sigi = numpy.empty((2, len(df.index)))
518
+ i_sigi[:] = numpy.nan
519
+ i_sigi[0, idxes] = (df.I.to_numpy() * kani2_inv)[idxes]
520
+ i_sigi[1, idxes] = (df.SIGI.to_numpy() * kani2_inv)[idxes]
521
+ twin_data.est_f_true(i_sigi[0,:], i_sigi[1,:])
522
+ # mltwin_est_ftrue()
523
+
524
+ def mltwin(df, twin_data, Ds, S, k_ani, idxes, i_bin):
525
+ twin_data.ml_sigma[i_bin] = S
526
+ twin_data.ml_scale[i_bin, :] = Ds
527
+ mltwin_est_ftrue(twin_data, df, k_ani, idxes)
528
+ return twin_data.ll()
529
+ # mltwin()
530
+
531
+ def deriv_mltwin_wrt_D_S(df, twin_data, Ds, S, k_ani, idxes, i_bin):
532
+ twin_data.ml_sigma[i_bin] = S
533
+ twin_data.ml_scale[i_bin, :] = Ds
534
+ mltwin_est_ftrue(twin_data, df, k_ani, idxes)
535
+ r = twin_data.ll_der_D_S()
536
+ g = numpy.zeros(r.shape[1])
537
+ g[:-1] = numpy.nansum(r[:,:-1], axis=0) # D
538
+ g[-1] = numpy.nansum(r[:,-1]) # S
539
+ return g
540
+ # deriv_mlf_wrt_D_S()
541
+
542
+ def mltwin_shift_S(df, twin_data, Ds, S, k_ani, idxes, i_bin):
543
+ twin_data.ml_sigma[i_bin] = S
544
+ twin_data.ml_scale[i_bin, :] = Ds
545
+ mltwin_est_ftrue(twin_data, df, k_ani, idxes)
546
+ r = twin_data.ll_der_D_S()
547
+ g = numpy.nansum(r[:,-1])
548
+ H = numpy.nansum(r[:,-1]**2) # approximating expectation value of second derivative
549
+ return -g / H
550
+ # mlf_shift_S()
551
+
552
+ def determine_mlf_params_from_cc(hkldata, fc_labs, D_labs, centric_and_selections, use="all", smoothing="gauss"):
553
+ # theorhetical values
554
+ cc_a = lambda cc: (numpy.pi/4*(1-cc**2)**2 * scipy.special.hyp2f1(3/2, 3/2, 1, cc**2) - numpy.pi/4) / (1-numpy.pi/4)
555
+ cc_c = lambda cc: 2/(numpy.pi-2) * (cc**2*numpy.sqrt(1-cc**2) + cc * numpy.arctan(cc/numpy.sqrt(1-cc**2)) + (1-cc**2)**(3/2)-1)
556
+ table_fsc = numpy.arange(0, 1, 1e-3)
557
+ table_cc = [cc_a(table_fsc), cc_c(table_fsc)]
558
+
559
+ for lab in D_labs: hkldata.binned_df[lab] = 1.
560
+ hkldata.binned_df["S"] = 1.
561
+
562
+ stats = hkldata.binned_df[["d_max", "d_min"]].copy()
563
+ for i, labi in enumerate(fc_labs):
564
+ stats["CC(FP,{})".format(labi)] = numpy.nan
565
+ for i, labi in enumerate(fc_labs):
566
+ for j in range(i+1, len(fc_labs)):
567
+ labj = fc_labs[j]
568
+ stats["CC({},{})".format(labi, labj)] = numpy.nan
569
+
570
+ # sqrt of eps * c; c = 1 for acentrics and 2 for centrics
571
+ inv_sqrt_c_eps = 1. / numpy.sqrt(hkldata.df.epsilon.to_numpy() * (hkldata.df.centric.to_numpy() + 1))
572
+ for i_bin, _ in hkldata.binned():
573
+ # assume they are all acentrics.. only correct by c
574
+ if use == "all":
575
+ cidxes = numpy.concatenate([sel[i] for sel in centric_and_selections[i_bin] for i in (1,2)])
576
+ else:
577
+ i = 1 if use == "work" else 2
578
+ cidxes = numpy.concatenate([sel[i] for sel in centric_and_selections[i_bin]])
579
+ valid_sel = numpy.isfinite(hkldata.df.FP.to_numpy()[cidxes])
580
+ cidxes = cidxes[valid_sel]
581
+ factor = inv_sqrt_c_eps[cidxes]
582
+ k_ani = hkldata.df.k_aniso.to_numpy()[cidxes]
583
+ Fo = hkldata.df.FP.to_numpy()[cidxes] * factor / k_ani
584
+ mean_Fo2 = numpy.mean(Fo**2)
585
+ SigFo = hkldata.df.SIGFP.to_numpy()[cidxes] / k_ani
586
+ Fcs = [hkldata.df[lab].to_numpy()[cidxes] * factor for lab in fc_labs]
587
+ mean_Fk2 = numpy.array([numpy.mean(numpy.abs(fk)**2) for fk in Fcs])
588
+
589
+ # estimate D
590
+ cc_fo_fj = [numpy.corrcoef(numpy.abs(fj), Fo)[1,0] for fj in Fcs]
591
+ for i in range(len(fc_labs)): stats.loc[i_bin, "CC(FP,{})".format(fc_labs[i])] = cc_fo_fj[i]
592
+ mat = [[numpy.sqrt(numpy.mean(numpy.abs(fk)**2)/mean_Fo2) * numpy.real(numpy.corrcoef(fk, fj)[1,0])
593
+ for fk in Fcs]
594
+ for fj in Fcs]
595
+ A = [[numpy.sqrt(numpy.mean(numpy.abs(fk)**2) * numpy.mean(numpy.abs(fj)**2))/mean_Fo2 * numpy.real(numpy.corrcoef(fk, fj)[1,0])
596
+ for fk in Fcs]
597
+ for fj in Fcs]
598
+ A = numpy.array([[numpy.real(numpy.corrcoef(fk, fj)[1,0]) for fk in Fcs] for fj in Fcs])
599
+ v = numpy.interp(cc_fo_fj, table_cc[0], table_fsc)
600
+
601
+ for i in range(len(fc_labs)):
602
+ labi = fc_labs[i]
603
+ for j in range(i+1, len(fc_labs)):
604
+ labj = fc_labs[j]
605
+ stats.loc[i_bin, "CC({},{})".format(labi, labj)] = numpy.real(numpy.corrcoef(Fcs[i], Fcs[j])[1,0])
606
+
607
+ # test all signs, fixing first Fc positive.
608
+ cc_max = -2
609
+ for v_test in itertools.product(*((x, -x) for x in v[1:])):
610
+ v_test = numpy.array((v[0],)+v_test)
611
+ Dj_test = numpy.dot(numpy.linalg.pinv(A), v_test) * numpy.sqrt(mean_Fo2 / mean_Fk2)
612
+ DFc_test = calc_abs_DFc(Dj_test, Fcs)
613
+ cc_test = numpy.corrcoef(Fo, numpy.abs(DFc_test))[1,0]
614
+ if cc_test > cc_max:
615
+ cc_max = cc_test
616
+ v_max = v_test
617
+ DFc = DFc_test
618
+ Dj = Dj_test
619
+
620
+ for lab, D in zip(D_labs, Dj):
621
+ hkldata.binned_df.loc[i_bin, lab] = D
622
+
623
+ # estimate S
624
+ mean_DFc2 = numpy.mean(DFc**2)
625
+ est_fsc_fo_fc = numpy.interp(numpy.corrcoef(Fo, DFc)[1,0], table_cc[0], table_fsc)
626
+ S = mean_Fo2 - 2 * numpy.sqrt(mean_Fo2 * mean_DFc2) * est_fsc_fo_fc + mean_DFc2 - numpy.mean(SigFo**2)
627
+ hkldata.binned_df.loc[i_bin, "S"] = S
628
+
629
+ logger.writeln("\nCC:")
630
+ logger.writeln(stats.to_string())
631
+ logger.writeln("\nEstimates:")
632
+ logger.writeln(hkldata.binned_df.to_string())
633
+ smooth_params(hkldata, D_labs, smoothing)
634
+ # determine_mlf_params_from_cc()
635
+
636
+ def initialize_ml_params(hkldata, use_int, D_labs, b_aniso, centric_and_selections, use, twin_data=None):
637
+ # Initial values
638
+ for lab in D_labs: hkldata.binned_df[lab] = 1.
639
+ hkldata.binned_df["S"] = 10000.
640
+ k_ani = hkldata.debye_waller_factors(b_cart=b_aniso)
641
+ lab_obs = "I" if use_int else "FP"
642
+ for i_bin, _ in hkldata.binned():
643
+ if use == "all":
644
+ idxes = numpy.concatenate([sel[i] for sel in centric_and_selections[i_bin] for i in (1,2)])
645
+ else:
646
+ i = 1 if use == "work" else 2
647
+ idxes = numpy.concatenate([sel[i] for sel in centric_and_selections[i_bin]])
648
+ valid_sel = numpy.isfinite(hkldata.df.loc[idxes, lab_obs]) # as there is no nan-safe numpy.corrcoef
649
+ if numpy.sum(valid_sel) < 2:
650
+ continue
651
+ idxes = idxes[valid_sel]
652
+ if use_int:
653
+ Io = hkldata.df.I.to_numpy()[idxes]
654
+ else:
655
+ Io = hkldata.df.FP.to_numpy()[idxes]**2
656
+ Io /= k_ani[idxes]**2
657
+ if twin_data:
658
+ Ic = twin_data.i_calc_twin()[idxes]
659
+ else:
660
+ Ic = numpy.abs(hkldata.df.FC.to_numpy()[idxes])**2
661
+ mean_Io = numpy.mean(Io)
662
+ mean_Ic = numpy.mean(Ic)
663
+ cc = numpy.corrcoef(Io, Ic)[1,0]
664
+ if cc > 0 and mean_Io > 0:
665
+ D = numpy.sqrt(mean_Io / mean_Ic * cc)
666
+ else:
667
+ D = 0 # will be taken care later
668
+ hkldata.binned_df.loc[i_bin, D_labs[0]] = D
669
+ if mean_Io > 0:
670
+ S = mean_Io - 2 * numpy.sqrt(mean_Io * mean_Ic * numpy.maximum(0, cc)) + mean_Ic
671
+ else:
672
+ S = numpy.std(Io) # similar initial to french_wilson
673
+ hkldata.binned_df.loc[i_bin, "S"] = S
674
+
675
+ for D_lab in D_labs:
676
+ if hkldata.binned_df[D_lab].min() <= 0:
677
+ min_D = hkldata.binned_df[D_lab][hkldata.binned_df[D_lab] > 0].min() * 0.1
678
+ logger.writeln("WARNING: negative {} is detected from initial estimates. Replacing it using minimum positive value {:.2e}".format(D_lab, min_D))
679
+ hkldata.binned_df[D_lab].where(hkldata.binned_df[D_lab] > 0, min_D, inplace=True) # arbitrary
680
+
681
+ if twin_data:
682
+ twin_data.ml_scale[:] = hkldata.binned_df.loc[:, D_labs]
683
+ twin_data.ml_sigma[:] = hkldata.binned_df.loc[:, "S"]
684
+
685
+ logger.writeln("Initial estimates:")
686
+ logger.writeln(hkldata.binned_df.to_string())
687
+ # initialize_ml_params()
688
+
689
+ def determine_ml_params(hkldata, use_int, fc_labs, D_labs, b_aniso, centric_and_selections,
690
+ D_trans=None, S_trans=None, use="all", n_cycle=1, smoothing="gauss",
691
+ twin_data=None):
692
+ assert use in ("all", "work", "test")
693
+ assert smoothing in (None, "gauss")
694
+ logger.writeln("Estimating sigma-A parameters using {}..".format(("intensities" if use_int else "amplitudes") + " (twin)" if twin_data else ""))
695
+ trans = VarTrans(D_trans, S_trans)
696
+ lab_obs = "I" if use_int else "FP"
697
+ def get_idxes(i_bin):
698
+ if use == "all":
699
+ return numpy.concatenate([sel[i] for sel in centric_and_selections[i_bin] for i in (1,2)])
700
+ else:
701
+ i = 1 if use == "work" else 2
702
+ return numpy.concatenate([sel[i] for sel in centric_and_selections[i_bin]])
703
+
704
+ if not set(D_labs + ["S"]).issubset(hkldata.binned_df):
705
+ initialize_ml_params(hkldata, use_int, D_labs, b_aniso, centric_and_selections, use, twin_data=twin_data)
706
+ for dlab, fclab in zip(D_labs, fc_labs):
707
+ hkldata.binned_df["Mn(|{}*{}|)".format(dlab, fclab)] = numpy.nan
708
+
709
+ refpar = "all"
710
+ for i_cyc in range(n_cycle):
711
+ t0 = time.time()
712
+ nfev_total = 0
713
+ k_ani = hkldata.debye_waller_factors(b_cart=b_aniso)
714
+ for i_bin, _ in hkldata.binned():
715
+ idxes = get_idxes(i_bin)
716
+ valid_sel = numpy.isfinite(hkldata.df.loc[idxes, lab_obs]) # as there is no nan-safe numpy.corrcoef
717
+ if numpy.sum(valid_sel) < 5:
718
+ logger.writeln("WARNING: bin {} has no sufficient reflections".format(i_bin))
719
+ continue
720
+
721
+ def target(x):
722
+ if refpar == "all":
723
+ Ds = trans.D(x[:len(fc_labs)])
724
+ S = trans.S(x[-1])
725
+ elif refpar == "D":
726
+ Ds = trans.D(x[:len(fc_labs)])
727
+ S = hkldata.binned_df.loc[i_bin, "S"]
728
+ else:
729
+ Ds = [hkldata.binned_df.loc[i_bin, lab] for lab in D_labs]
730
+ S = trans.S(x[-1])
731
+
732
+ if twin_data:
733
+ return mltwin(hkldata.df, twin_data, Ds, S, k_ani, idxes, i_bin)
734
+ else:
735
+ f = mli if use_int else mlf
736
+ return f(hkldata.df, fc_labs, Ds, S, k_ani, idxes)
737
+
738
+ def grad(x):
739
+ if refpar == "all":
740
+ Ds = trans.D(x[:len(fc_labs)])
741
+ S = trans.S(x[-1])
742
+ n_par = len(fc_labs)+1
743
+ elif refpar == "D":
744
+ Ds = trans.D(x[:len(fc_labs)])
745
+ S = hkldata.binned_df.loc[i_bin, "S"]
746
+ n_par = len(fc_labs)
747
+ else:
748
+ Ds = [hkldata.binned_df.loc[i_bin, lab] for lab in D_labs]
749
+ S = trans.S(x[-1])
750
+ n_par = 1
751
+ if twin_data:
752
+ r = deriv_mltwin_wrt_D_S(hkldata.df, twin_data, Ds, S, k_ani, idxes, i_bin)
753
+ else:
754
+ calc_deriv = deriv_mli_wrt_D_S if use_int else deriv_mlf_wrt_D_S
755
+ r = calc_deriv(hkldata.df, fc_labs, Ds, S, k_ani, idxes)
756
+ g = numpy.zeros(n_par)
757
+ if refpar in ("all", "D"):
758
+ g[:len(fc_labs)] = r[:len(fc_labs)]
759
+ g[:len(fc_labs)] *= trans.D_deriv(x[:len(fc_labs)])
760
+ if refpar in ("all", "S"):
761
+ g[-1] = r[-1]
762
+ g[-1] *= trans.S_deriv(x[-1])
763
+ return g
764
+
765
+ if 0:
766
+ refpar = "S"
767
+ x0 = trans.S_inv(hkldata.binned_df.loc[i_bin, "S"])
768
+ with open("s_line_{}.dat".format(i_bin), "w") as ofs:
769
+ for sval in numpy.linspace(1, x0*2, 100):
770
+ ofs.write("{:.4e} {:.10e} {:.10e}\n".format(sval,
771
+ target([sval]),
772
+ grad([sval])[0]))
773
+ continue
774
+ #print("Bin", i_bin)
775
+ if 1: # refine D and S iteratively
776
+ vals_last = None
777
+ for ids in range(10):
778
+ refpar = "D"
779
+ x0 = numpy.array([trans.D_inv(hkldata.binned_df.loc[i_bin, lab]) for lab in D_labs])
780
+ #print("MLTWIN=", target(x0))
781
+ #quit()
782
+ if 0:
783
+ h = 1e-3
784
+ f00 = target(x0)
785
+ g00 = grad(x0)
786
+ for ii in range(len(x0)):
787
+ xx = x0.copy()
788
+ xx[ii] += h
789
+ f01 = target(xx)
790
+ nder = (f01 - f00) / h
791
+ logger.writeln(f"DEBUG_der_D bin_{i_bin} {ii} ad={g00[ii]} nd={nder} r={g00[ii]/nder}")
792
+ vals_now = []
793
+ if 0:
794
+ f0 = target(x0)
795
+ nfev_total += 1
796
+ shift = mli_shift_D(hkldata.df, fc_labs, trans.D(x0), hkldata.binned_df.loc[i_bin, "S"], k_ani, idxes)
797
+ shift /= trans.D_deriv(x0)
798
+ #if abs(shift) < 1e-3: break
799
+ for itry in range(10):
800
+ x1 = x0 + shift
801
+ if (D_trans and any(x1 < -3)) or (not D_trans and any(x1 < 5e-2)):
802
+ #print(i_bin, cyc_s, trans.S(x0), trans.S(x1), shift, "BAD")
803
+ shift /= 2
804
+ continue
805
+ f1 = target(x1)
806
+ nfev_total += 1
807
+ if f1 > f0:
808
+ shift /= 2
809
+ continue
810
+ else: # good
811
+ for i, lab in enumerate(D_labs):
812
+ hkldata.binned_df.loc[i_bin, lab] = trans.D(x1[i])
813
+ vals_now.append(hkldata.binned_df.loc[i_bin, lab])
814
+ break
815
+ else:
816
+ break
817
+ else:
818
+ #print(mli_shift_D(hkldata.df, fc_labs, trans.D(x0), hkldata.binned_df.S[i_bin], k_ani, idxes))
819
+ res = scipy.optimize.minimize(fun=target, x0=x0, jac=grad,
820
+ bounds=((-5 if D_trans else 1e-5, None),)*len(x0))
821
+ nfev_total += res.nfev
822
+ #print(i_bin, "mini cycle", ids, refpar)
823
+ #print(res)
824
+ for i, lab in enumerate(D_labs):
825
+ hkldata.binned_df.loc[i_bin, lab] = trans.D(res.x[i])
826
+ vals_now.append(hkldata.binned_df.loc[i_bin, lab])
827
+ if twin_data:
828
+ twin_data.ml_scale[i_bin, :] = trans.D(res.x)
829
+ refpar = "S"
830
+ if 1:
831
+ for cyc_s in range(1):
832
+ x0 = trans.S_inv(hkldata.binned_df.loc[i_bin, "S"])
833
+ if 0:
834
+ h = 1e-1
835
+ f00 = target([x0])
836
+ g00 = grad([x0])
837
+ xx = x0 + h
838
+ f01 = target([xx])
839
+ nder = (f01 - f00) / h
840
+ logger.writeln(f"DEBUG_der_S bin_{i_bin} ad={g00} nd={nder} r={g00/nder}")
841
+
842
+ f0 = target([x0])
843
+ Ds = [hkldata.binned_df.loc[i_bin, lab] for lab in D_labs]
844
+ nfev_total += 1
845
+ if twin_data:
846
+ shift = mltwin_shift_S(hkldata.df, twin_data, Ds, trans.S(x0), k_ani, idxes, i_bin)
847
+ else:
848
+ calc_shift_S = mli_shift_S if use_int else mlf_shift_S
849
+ shift = calc_shift_S(hkldata.df, fc_labs, Ds, trans.S(x0), k_ani, idxes)
850
+ shift /= trans.S_deriv(x0)
851
+ if abs(shift) < 1e-3: break
852
+ for itry in range(10):
853
+ x1 = x0 + shift
854
+ if (S_trans and x1 < -3) or (not S_trans and x1 < 5e-2):
855
+ #print(i_bin, cyc_s, trans.S(x0), trans.S(x1), shift, "BAD")
856
+ shift /= 2
857
+ continue
858
+ f1 = target([x1])
859
+ nfev_total += 1
860
+ if f1 > f0:
861
+ shift /= 2
862
+ continue
863
+ else: # good
864
+ #print(i_bin, cyc_s, trans.S(x0), trans.S(x1), shift)
865
+ hkldata.binned_df.loc[i_bin, "S"] = trans.S(x1)
866
+ break
867
+ else:
868
+ #print("all bad")
869
+ break
870
+ if twin_data:
871
+ twin_data.ml_sigma[i_bin] = hkldata.binned_df.loc[i_bin, "S"]
872
+ else:
873
+ # somehow this does not work well.
874
+ x0 = [trans.S_inv(hkldata.binned_df.loc[i_bin, "S"])]
875
+ res = scipy.optimize.minimize(fun=target, x0=x0, jac=grad,
876
+ bounds=((-3 if S_trans else 5e-2, None),))
877
+ nfev_total += res.nfev
878
+ #print(i_bin, "mini cycle", ids, refpar)
879
+ #print(res)
880
+ hkldata.binned_df.loc[i_bin, "S"] = trans.S(res.x[-1])
881
+ if twin_data:
882
+ twin_data.ml_sigma[i_bin] = trans.S(res.x[-1])
883
+ vals_now.append(hkldata.binned_df.loc[i_bin, "S"])
884
+ vals_now = numpy.array(vals_now)
885
+ if vals_last is not None and numpy.all(numpy.abs((vals_last - vals_now) / vals_now) < 1e-2):
886
+ #logger.writeln("converged in mini cycle {}".format(ids+1))
887
+ break
888
+ vals_last = vals_now
889
+ else:
890
+ x0 = [trans.D_inv(hkldata.binned_df.loc[i_bin, lab]) for lab in D_labs] + [trans.S_inv(hkldata.binned_df.loc[i_bin, "S"])]
891
+ res = scipy.optimize.minimize(fun=target, x0=x0, jac=grad,
892
+ bounds=((-5 if D_trans else 1e-5, None), )*len(D_labs) + ((-3 if S_trans else 5e-2, None),))
893
+ nfev_total += res.nfev
894
+ #print(i_bin)
895
+ #print(res)
896
+ for i, lab in enumerate(D_labs):
897
+ hkldata.binned_df.loc[i_bin, lab] = trans.D(res.x[i])
898
+ hkldata.binned_df.loc[i_bin, "S"] = trans.S(res.x[-1])
899
+ if twin_data:
900
+ twin_data.ml_scale[i_bin, :] = trans.D(res.x[:-1])
901
+ twin_data.ml_sigma[i_bin] = trans.S(res.x[-1])
902
+
903
+ if twin_data:
904
+ dfc = numpy.abs(twin_data.f_calc) * twin_data.ml_scale_array()
905
+ for i_bin, idxes in hkldata.binned():
906
+ dfc_bin = dfc[numpy.asarray(twin_data.bin)==i_bin,:]
907
+ mean_dfc = numpy.nanmean(dfc_bin, axis=0)
908
+ for i, (dlab, fclab) in enumerate(zip(D_labs, fc_labs)):
909
+ hkldata.binned_df.loc[i_bin, "Mn(|{}*{}|)".format(dlab, fclab)] = mean_dfc[i]
910
+ else:
911
+ for i_bin, idxes in hkldata.binned():
912
+ for dlab, fclab in zip(D_labs, fc_labs):
913
+ mean_dfc = numpy.nanmean(numpy.abs(hkldata.binned_df[dlab][i_bin] * hkldata.df[fclab][idxes]))
914
+ hkldata.binned_df.loc[i_bin, "Mn(|{}*{}|)".format(dlab, fclab)] = mean_dfc
915
+
916
+ logger.writeln("Refined estimates:")
917
+ logger.writeln(hkldata.binned_df.to_string())
918
+ #numpy.testing.assert_allclose(hkldata.binned_df.S, twin_data.ml_sigma)
919
+ #numpy.testing.assert_allclose(hkldata.binned_df[D_labs], twin_data.ml_scale)
920
+ logger.writeln("time: {:.1f} sec ({} evaluations)".format(time.time() - t0, nfev_total))
921
+
922
+ if not use_int or twin_data:
923
+ break # did not implement MLF B_aniso optimization
924
+
925
+ # Refine b_aniso
926
+ adpdirs = utils.model.adp_constraints(hkldata.sg.operations(), hkldata.cell, tr0=True)
927
+ SMattolist = lambda B: [B.u11, B.u22, B.u33, B.u12, B.u13, B.u23]
928
+
929
+ def target_ani(x):
930
+ b = gemmi.SMat33d(*numpy.dot(x, adpdirs))
931
+ k_ani = hkldata.debye_waller_factors(b_cart=b)
932
+ ret = 0.
933
+ for i_bin, idxes in hkldata.binned():
934
+ Ds = [hkldata.binned_df.loc[i_bin, lab] for lab in D_labs]
935
+ ret += mli(hkldata.df, fc_labs, Ds, hkldata.binned_df.loc[i_bin, "S"], k_ani, idxes)
936
+ return ret
937
+ def grad_ani(x):
938
+ b = gemmi.SMat33d(*numpy.dot(x, adpdirs))
939
+ k_ani = hkldata.debye_waller_factors(b_cart=b)
940
+ S2mat = hkldata.ssq_mat() # ssqmat
941
+ g = numpy.zeros(6)
942
+ for i_bin, idxes in hkldata.binned():
943
+ r = integr.ll_int_der1_ani(hkldata.df.I.to_numpy()[idxes], hkldata.df.SIGI.to_numpy()[idxes],
944
+ k_ani[idxes], hkldata.binned_df.loc[i_bin, "S"],
945
+ hkldata.df[fc_labs].to_numpy()[idxes], hkldata.binned_df.loc[i_bin, D_labs],
946
+ hkldata.df.centric.to_numpy()[idxes]+1, hkldata.df.epsilon.to_numpy()[idxes])
947
+ S2 = S2mat[:,idxes]
948
+ g += -numpy.nansum(S2 * r[:,0], axis=1) # k_ani is already multiplied in r
949
+ return numpy.dot(g, adpdirs.T)
950
+ def shift_ani(x):
951
+ b = gemmi.SMat33d(*numpy.dot(x, adpdirs))
952
+ k_ani = hkldata.debye_waller_factors(b_cart=b)
953
+ S2mat = hkldata.ssq_mat() # ssqmat
954
+ g = numpy.zeros(6)
955
+ H = numpy.zeros((6, 6))
956
+ for i_bin, idxes in hkldata.binned():
957
+ r = integr.ll_int_der1_ani(hkldata.df.I.to_numpy()[idxes], hkldata.df.SIGI.to_numpy()[idxes],
958
+ k_ani[idxes], hkldata.binned_df.loc[i_bin, "S"],
959
+ hkldata.df[fc_labs].to_numpy()[idxes], list(hkldata.binned_df.loc[i_bin, D_labs]),
960
+ hkldata.df.centric.to_numpy()[idxes]+1, hkldata.df.epsilon.to_numpy()[idxes])
961
+ S2 = S2mat[:,idxes]
962
+ g += -numpy.nansum(S2 * r[:,0], axis=1) # k_ani is already multiplied in r
963
+ H += numpy.nansum(numpy.matmul(S2[None,:].T, S2.T[:,None]) * (r[:,0]**2)[:,None,None], axis=0)
964
+
965
+ g, H = numpy.dot(g, adpdirs.T), numpy.dot(adpdirs, numpy.dot(H, adpdirs.T))
966
+ return -numpy.dot(g, numpy.linalg.pinv(H))
967
+
968
+ logger.writeln("Refining B_aniso. Current = {}".format(b_aniso))
969
+ if 0:
970
+ x0 = numpy.dot(SMattolist(b_aniso), numpy.linalg.pinv(adpdirs))
971
+ res = scipy.optimize.minimize(fun=target_ani, x0=x0, jac=grad_ani)
972
+ print(res)
973
+ b_aniso = gemmi.SMat33d(*numpy.dot(res.x, adpdirs))
974
+ f1 = res.fun
975
+ else:
976
+ B_converged = False
977
+ for j in range(10):
978
+ x = numpy.dot(SMattolist(b_aniso), numpy.linalg.pinv(adpdirs))
979
+ f0 = target_ani(x)
980
+ shift = shift_ani(x)
981
+ for i in range(3):
982
+ ss = shift / 2**i
983
+ f1 = target_ani(x + ss)
984
+ #logger.writeln("{:2d} f0 = {:.3e} shift = {} df = {:.3e}".format(j, f0, ss, f1 - f0))
985
+ if f1 < f0:
986
+ b_aniso = gemmi.SMat33d(*numpy.dot(x+ss, adpdirs))
987
+ if numpy.max(numpy.abs(ss)) < 1e-4: B_converged = True
988
+ break
989
+ else:
990
+ B_converged = True
991
+ if B_converged: break
992
+
993
+ logger.writeln("Refined B_aniso = {}".format(b_aniso))
994
+ logger.writeln("cycle {} f= {}".format(i_cyc, f1))
995
+
996
+ smooth_params(hkldata, D_labs, smoothing)
997
+ return b_aniso
998
+ # determine_ml_params()
999
+
1000
+ def smooth_params(hkldata, D_labs, smoothing): # XXX twin_data
1001
+ if smoothing is None or len(hkldata.binned()) < 2:
1002
+ for i, lab in enumerate(D_labs + ["S"]):
1003
+ hkldata.df[lab] = hkldata.binned_data_as_array(lab)
1004
+
1005
+ elif smoothing == "gauss":
1006
+ bin_centers = (0.5 / hkldata.binned_df[["d_min", "d_max"]]**2).sum(axis=1).to_numpy()
1007
+ vals = ext.smooth_gauss(bin_centers,
1008
+ hkldata.binned_df[D_labs + ["S"]].to_numpy(),
1009
+ 1./hkldata.df.d.to_numpy()**2,
1010
+ 100, # min(n_ref?)
1011
+ (bin_centers[1] - bin_centers[0]))
1012
+ for i, lab in enumerate(D_labs + ["S"]):
1013
+ hkldata.df[lab] = vals[:, i]
1014
+ # Update smoothened average; this affects next refinement.
1015
+ # TODO: update Mn(|Dj*FCj|) as well.
1016
+ #for i_bin, idxes in hkldata.binned():
1017
+ # for lab in D_labs + ["S"]:
1018
+ # hkldata.binned_df.loc[i_bin, lab] = numpy.mean(hkldata.df[lab].to_numpy()[idxes])
1019
+ else:
1020
+ raise RuntimeError("unknown smoothing method: {}".format(smoothing))
1021
+ # smooth_params()
1022
+
1023
+ def expected_F_from_int(Io, sigIo, k_ani, DFc, eps, c, S):
1024
+ k_num = numpy.repeat(0.5 if c == 0 else 0., Io.size) # 0.5 if acentric
1025
+ k_den = k_num - 0.5
1026
+ if numpy.isscalar(c): c = numpy.repeat(c, Io.size)
1027
+ to = Io / sigIo - sigIo / (c+1) / k_ani**2 / S / eps
1028
+ tf = k_ani * numpy.abs(DFc) / numpy.sqrt(sigIo)
1029
+ sig1 = k_ani**2 * S * eps / sigIo
1030
+ f = ext.integ_J_ratio(k_num, k_den, True, to, tf, sig1, c+1, integr.exp2_threshold, integr.h, integr.N, integr.ewmax)
1031
+ f *= numpy.sqrt(sigIo) / k_ani
1032
+ m_proxy = ext.integ_J_ratio(k_num, k_num, True, to, tf, sig1, c+1, integr.exp2_threshold, integr.h, integr.N, integr.ewmax)
1033
+ return f, m_proxy
1034
+ # expected_F_from_int()
1035
+
1036
+ def calculate_maps_int(hkldata, b_aniso, fc_labs, D_labs, centric_and_selections, use="all"):
1037
+ nmodels = len(fc_labs)
1038
+ hkldata.df["FWT"] = 0j * numpy.nan
1039
+ hkldata.df["DELFWT"] = 0j * numpy.nan
1040
+ hkldata.df["FOM"] = numpy.nan # FOM proxy, |<F>| / <|F|>
1041
+ has_ano = "I(+)" in hkldata.df and "I(-)" in hkldata.df
1042
+ if has_ano:
1043
+ hkldata.df["FAN"] = 0j * numpy.nan
1044
+ ano_data = hkldata.df[["I(+)", "SIGI(+)", "I(-)", "SIGI(-)"]].to_numpy()
1045
+ Io = hkldata.df.I.to_numpy()
1046
+ sigIo = hkldata.df.SIGI.to_numpy()
1047
+ k_ani = hkldata.debye_waller_factors(b_cart=b_aniso)
1048
+ eps = hkldata.df.epsilon.to_numpy()
1049
+ Ds = numpy.vstack([hkldata.df[lab].to_numpy() for lab in D_labs]).T
1050
+ Fcs = numpy.vstack([hkldata.df[lab].to_numpy() for lab in fc_labs]).T
1051
+ DFc = (Ds * Fcs).sum(axis=1)
1052
+ hkldata.df["DFC"] = DFc
1053
+ for i_bin, idxes in hkldata.binned():
1054
+ for c, work, test in centric_and_selections[i_bin]:
1055
+ cidxes = numpy.concatenate([work, test])
1056
+ S = hkldata.df["S"].to_numpy()[cidxes]
1057
+ f, m_proxy = expected_F_from_int(Io[cidxes], sigIo[cidxes], k_ani[cidxes], DFc[cidxes], eps[cidxes], c, S)
1058
+ exp_ip = numpy.exp(numpy.angle(DFc[cidxes])*1j)
1059
+ if c == 0:
1060
+ hkldata.df.loc[cidxes, "FWT"] = 2 * f * exp_ip - DFc[cidxes]
1061
+ else:
1062
+ hkldata.df.loc[cidxes, "FWT"] = f * exp_ip
1063
+ hkldata.df.loc[cidxes, "DELFWT"] = f * exp_ip - DFc[cidxes]
1064
+ hkldata.df.loc[cidxes, "FOM"] = m_proxy
1065
+ if has_ano:
1066
+ f_p, _ = expected_F_from_int(ano_data[cidxes,0], ano_data[cidxes,1],
1067
+ k_ani[cidxes], DFc[cidxes], eps[cidxes], c, S)
1068
+ f_m, _ = expected_F_from_int(ano_data[cidxes,2], ano_data[cidxes,3],
1069
+ k_ani[cidxes], DFc[cidxes], eps[cidxes], c, S)
1070
+ hkldata.df.loc[cidxes, "FAN"] = (f_p - f_m) * exp_ip / 2j
1071
+ # remove reflections that should be hidden
1072
+ if use != "all":
1073
+ # usually use == "work"
1074
+ tohide = test if use == "work" else work
1075
+ hkldata.df.loc[tohide, "FWT"] = 0j * numpy.nan
1076
+ hkldata.df.loc[tohide, "DELFWT"] = 0j * numpy.nan
1077
+ fill_sel = numpy.isnan(hkldata.df["FWT"][cidxes].to_numpy())
1078
+ hkldata.df.loc[cidxes[fill_sel], "FWT"] = DFc[cidxes][fill_sel]
1079
+ # calculate_maps_int()
1080
+
1081
+ def calculate_maps_twin(hkldata, b_aniso, fc_labs, D_labs, twin_data, centric_and_selections, use="all"):
1082
+ k_ani2_inv = 1 / hkldata.debye_waller_factors(b_cart=b_aniso)**2
1083
+ Io = hkldata.df.I.to_numpy(copy=True) * k_ani2_inv
1084
+ sigIo = hkldata.df.SIGI.to_numpy(copy=True) * k_ani2_inv
1085
+ # Mask Io
1086
+ for i_bin, idxes in hkldata.binned():
1087
+ for c, work, test in centric_and_selections[i_bin]:
1088
+ if use != "all":
1089
+ tohide = test if use == "work" else work
1090
+ Io[tohide] = numpy.nan
1091
+
1092
+ twin_data.est_f_true(Io, sigIo)
1093
+ Ds = twin_data.ml_scale_array()
1094
+ DFc = (twin_data.f_calc * Ds).sum(axis=1)
1095
+ exp_ip = numpy.exp(numpy.angle(DFc)*1j)
1096
+ Ft = numpy.asarray(twin_data.f_true_max)
1097
+ m = twin_data.calc_fom()
1098
+ Fexp = twin_data.expected_F(Io, sigIo)
1099
+ if 1:
1100
+ fwt = numpy.where(numpy.asarray(twin_data.centric) == 0,
1101
+ 2 * m * Ft * exp_ip - DFc,
1102
+ m * Ft * exp_ip)
1103
+ delfwt = m * Ft * exp_ip - DFc
1104
+ else: # based on "more accurate" evaluation of <m|F|>
1105
+ fwt = numpy.where(numpy.asarray(twin_data.centric) == 0,
1106
+ 2 * Fexp * exp_ip - DFc,
1107
+ m * Fexp * exp_ip)
1108
+ delfwt = Fexp * exp_ip - DFc
1109
+
1110
+ sel = numpy.isnan(fwt)
1111
+ fwt[sel] = DFc[sel]
1112
+
1113
+ hkldata2 = utils.hkl.HklData(hkldata.cell, hkldata.sg,
1114
+ utils.hkl.df_from_twin_data(twin_data, fc_labs))
1115
+ hkldata2.df["FWT"] = fwt
1116
+ hkldata2.df["DELFWT"] = delfwt
1117
+ hkldata2.df["FOM"] = m
1118
+ hkldata2.df["F_est"] = Ft
1119
+ hkldata2.df["F_exp"] = Fexp
1120
+ hkldata2.df["FC"] = twin_data.f_calc.sum(axis=1)
1121
+ hkldata2.df["DFC"] = DFc
1122
+ hkldata2.df[D_labs] = Ds
1123
+ hkldata2.df["S"] = twin_data.ml_sigma_array()
1124
+ return hkldata2
1125
+ # calculate_maps_twin()
1126
+
1127
+ def merge_models(sts): # simply merge models. no fix in chain ids etc.
1128
+ st2 = sts[0].clone()
1129
+ del st2[:]
1130
+ model = gemmi.Model(1)
1131
+ for st in sts:
1132
+ for m in st:
1133
+ for c in m:
1134
+ model.add_chain(c)
1135
+ st2.add_model(model)
1136
+ return st2
1137
+ # merge_models()
1138
+
1139
+ def decide_mtz_labels(mtz, find_free=True, require=None):
1140
+ # F is preferred for now by default
1141
+ obs_types = ("F", "J", "G", "K")
1142
+ if require:
1143
+ assert set(require).issubset(obs_types)
1144
+ else:
1145
+ require = obs_types
1146
+ dlabs = utils.hkl.mtz_find_data_columns(mtz)
1147
+ logger.writeln("Finding possible options from MTZ:")
1148
+ for typ in dlabs:
1149
+ for labs in dlabs[typ]:
1150
+ logger.writeln(" --labin '{}'".format(",".join(labs)))
1151
+ for typ in require:
1152
+ if dlabs[typ]:
1153
+ labin = dlabs[typ][0]
1154
+ break
1155
+ else:
1156
+ raise RuntimeError("Data not found from mtz")
1157
+ if find_free:
1158
+ flabs = utils.hkl.mtz_find_free_columns(mtz)
1159
+ if flabs:
1160
+ labin += [flabs[0]]
1161
+ logger.writeln("MTZ columns automatically selected: {}".format(labin))
1162
+ return labin
1163
+ # decide_mtz_labels()
1164
+
1165
+ def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=None,
1166
+ n_per_bin=None, use="all", max_bins=None, cif_index=0, keep_charges=False,
1167
+ allow_unusual_occupancies=False, space_group=None):
1168
+ if labin: assert 1 < len(labin) < 6
1169
+ assert use in ("all", "work", "test")
1170
+ assert n_bins or n_per_bin #if n_bins not set, n_per_bin should be given
1171
+
1172
+ if len(xyzins) > 0 and type(xyzins[0]) is gemmi.Structure:
1173
+ sts = xyzins
1174
+ else:
1175
+ sts = []
1176
+
1177
+ if type(hklin) is gemmi.Mtz or utils.fileio.is_mmhkl_file(hklin):
1178
+ if type(hklin) is gemmi.Mtz:
1179
+ mtz = hklin
1180
+ else:
1181
+ mtz = utils.fileio.read_mmhkl(hklin, cif_index=cif_index)
1182
+ if not sts:
1183
+ sts = [utils.fileio.read_structure(f) for f in xyzins]
1184
+ else:
1185
+ assert len(xyzins) == 1
1186
+ assert not sts
1187
+ st, mtz = utils.fileio.read_small_molecule_files([hklin, xyzins[0]])
1188
+ sts = [st]
1189
+
1190
+ for st in sts:
1191
+ utils.model.check_occupancies(st, raise_error=not allow_unusual_occupancies)
1192
+
1193
+ if not labin:
1194
+ labin = decide_mtz_labels(mtz)
1195
+ col_types = {x.label:x.type for x in mtz.columns}
1196
+ if labin[0] not in col_types:
1197
+ raise RuntimeError("MTZ column not found: {}".format(labin[0]))
1198
+ labs_and_types = {"F": ("amplitude", ["FP","SIGFP"], ["F", "Q"]),
1199
+ "J": ("intensity", ["I","SIGI"], ["J", "Q"]),
1200
+ "G": ("anomalous amplitude", ["F(+)","SIGF(+)", "F(-)", "SIGF(-)"], ["G", "L", "G", "L"]),
1201
+ "K": ("anomalous intensity", ["I(+)","SIGI(+)", "I(-)", "SIGI(-)"], ["K", "M", "K", "M"])}
1202
+ if col_types[labin[0]] not in labs_and_types:
1203
+ raise RuntimeError("MTZ column {} is neither amplitude nor intensity".format(labin[0]))
1204
+ name, newlabels, require_types = labs_and_types[col_types[labin[0]]]
1205
+ logger.writeln("Observation type: {}".format(name))
1206
+ if len(newlabels) < len(labin): newlabels.append("FREE")
1207
+ hkldata = utils.hkl.hkldata_from_mtz(mtz, labin, newlabels=newlabels, require_types=require_types)
1208
+ if newlabels[0] == "F(+)":
1209
+ hkldata.merge_anomalous(newlabels[:4], ["FP", "SIGFP"])
1210
+ newlabels = ["FP", "SIGFP"] + newlabels[4:]
1211
+ elif newlabels[0] == "I(+)":
1212
+ hkldata.merge_anomalous(newlabels[:4], ["I", "SIGI"])
1213
+ newlabels = ["I", "SIGI"] + newlabels[4:]
1214
+
1215
+ if hkldata.df.empty:
1216
+ raise RuntimeError("No data in hkl data")
1217
+
1218
+ if space_group is None:
1219
+ sg_use = None
1220
+ else:
1221
+ sg_use = gemmi.SpaceGroup(space_group)
1222
+ logger.writeln(f"Space group overridden by user. Using {sg_use.xhm()}")
1223
+
1224
+ if sts:
1225
+ assert source in ["electron", "xray", "neutron"]
1226
+ for st in sts:
1227
+ if st[0].count_atom_sites() == 0:
1228
+ raise RuntimeError("No atom in the model")
1229
+ if not hkldata.cell.approx(sts[0].cell, 1e-3):
1230
+ logger.writeln("Warning: unit cell mismatch between model and reflection data")
1231
+ logger.writeln(" using unit cell from mtz")
1232
+
1233
+ for st in sts: st.cell = hkldata.cell # mtz cell is used in any case
1234
+
1235
+ sg_st = sts[0].find_spacegroup() # may be None
1236
+ if sg_use is None:
1237
+ sg_use = hkldata.sg
1238
+ if hkldata.sg != sg_st:
1239
+ if st.cell.is_crystal() and sg_st and sg_st.laue_str() != hkldata.sg.laue_str():
1240
+ raise RuntimeError("Crystal symmetry mismatch between model and data")
1241
+ logger.writeln("Warning: space group mismatch between model and mtz")
1242
+ if sg_st and sg_st.laue_str() == hkldata.sg.laue_str():
1243
+ logger.writeln(" using space group from model")
1244
+ sg_use = sg_st
1245
+ else:
1246
+ logger.writeln(" using space group from mtz")
1247
+ logger.writeln("")
1248
+
1249
+ for st in sts:
1250
+ st.spacegroup_hm = sg_use.xhm()
1251
+ st.setup_cell_images()
1252
+
1253
+ if not keep_charges:
1254
+ utils.model.remove_charge(sts)
1255
+ utils.model.check_atomsf(sts, source)
1256
+
1257
+ if sg_use is not None:
1258
+ hkldata.sg = sg_use
1259
+ if newlabels[0] == "FP":
1260
+ hkldata.remove_nonpositive(newlabels[0])
1261
+ hkldata.remove_nonpositive(newlabels[1])
1262
+ hkldata.switch_to_asu()
1263
+ hkldata.remove_systematic_absences()
1264
+ #hkldata.df = hkldata.df.astype({name: 'float64' for name in ["I","SIGI","FP","SIGFP"] if name in hkldata.df})
1265
+ d_min_data = hkldata.d_min_max(newlabels)[0]
1266
+ if d_min is None and hkldata.d_min_max()[0] != d_min_data:
1267
+ d_min = d_min_data
1268
+ logger.writeln(f"Changing resolution to {d_min:.3f} A")
1269
+ if (d_min, d_max).count(None) != 2:
1270
+ hkldata = hkldata.copy(d_min=d_min, d_max=d_max)
1271
+ if hkldata.df.empty:
1272
+ raise RuntimeError("No data left in hkl data")
1273
+
1274
+ hkldata.complete()
1275
+ hkldata.sort_by_resolution()
1276
+ hkldata.calc_epsilon()
1277
+ hkldata.calc_centric()
1278
+
1279
+ if "FREE" in hkldata.df and free is None:
1280
+ free = hkldata.guess_free_number(newlabels[0])
1281
+
1282
+ if n_bins is None:
1283
+ sel = hkldata.df[newlabels[0]].notna()
1284
+ if use == "work":
1285
+ sel &= hkldata.df.FREE != free
1286
+ elif use == "test":
1287
+ sel &= hkldata.df.FREE == free
1288
+ s_array = 1/hkldata.d_spacings()[sel]
1289
+ if len(s_array) == 0:
1290
+ raise RuntimeError("no reflections in {} set".format(use))
1291
+ n_bins = utils.hkl.decide_n_bins(n_per_bin, s_array, max_bins=max_bins)
1292
+ logger.writeln("n_per_bin={} requested for {}. n_bins set to {}".format(n_per_bin, use, n_bins))
1293
+
1294
+ hkldata.setup_binning(n_bins=n_bins)
1295
+ logger.writeln("Data completeness: {:.2f}%".format(hkldata.completeness()*100.))
1296
+
1297
+ fc_labs = ["FC{}".format(i) for i, _ in enumerate(sts)]
1298
+
1299
+ # Create a centric selection table for faster look up
1300
+ centric_and_selections = {}
1301
+ stats = hkldata.binned_df.copy()
1302
+ stats["n_all"] = 0
1303
+ stats["n_obs"] = 0
1304
+ stats[newlabels[0]] = numpy.nan
1305
+ snr = "I/sigma" if newlabels[0] == "I" else "F/sigma"
1306
+ stats[snr] = numpy.nan
1307
+ if newlabels[0] == "I":
1308
+ stats["Mn(I)/Std(I)"] = numpy.nan
1309
+ if "FREE" in hkldata.df:
1310
+ stats["n_work"] = 0
1311
+ stats["n_test"] = 0
1312
+
1313
+ for i_bin, idxes in hkldata.binned():
1314
+ centric_and_selections[i_bin] = []
1315
+ n_obs = 0
1316
+ n_work, n_test = 0, 0
1317
+ for c, g2 in hkldata.df.loc[idxes].groupby("centric", sort=False):
1318
+ valid_sel = numpy.isfinite(g2[newlabels[0]])
1319
+ if "FREE" in g2:
1320
+ test_sel = (g2.FREE == free).fillna(False)
1321
+ test = g2.index[test_sel]
1322
+ work = g2.index[~test_sel]
1323
+ n_work += (valid_sel & ~test_sel).sum()
1324
+ n_test += (valid_sel & test_sel).sum()
1325
+ else:
1326
+ work = g2.index
1327
+ test = type(work)([], dtype=work.dtype)
1328
+ centric_and_selections[i_bin].append((c, work, test))
1329
+ n_obs += numpy.sum(valid_sel)
1330
+
1331
+ stats.loc[i_bin, "n_obs"] = n_obs
1332
+ stats.loc[i_bin, "n_all"] = len(idxes)
1333
+ obs = hkldata.df[newlabels[0]].to_numpy()[idxes]
1334
+ sigma = hkldata.df[newlabels[1]].to_numpy()[idxes]
1335
+ if n_obs > 0:
1336
+ stats.loc[i_bin, snr] = numpy.nanmean(obs / sigma)
1337
+ mean_obs = numpy.nanmean(obs)
1338
+ stats.loc[i_bin, newlabels[0]] = mean_obs
1339
+ if newlabels[0] == "I":
1340
+ stats.loc[i_bin, "Mn(I)/Std(I)"] = mean_obs / numpy.nanstd(obs)
1341
+ if "FREE" in hkldata.df:
1342
+ stats.loc[i_bin, "n_work"] = n_work
1343
+ stats.loc[i_bin, "n_test"] = n_test
1344
+
1345
+ stats["completeness"] = stats["n_obs"] / stats["n_all"] * 100
1346
+ logger.writeln(stats.to_string())
1347
+ return hkldata, sts, fc_labs, centric_and_selections, free
1348
+ # process_input()
1349
+
1350
+ def update_fc(st_list, fc_labs, d_min, monlib, source, mott_bethe, hkldata=None, twin_data=None):
1351
+ #assert (hkldata, twin_data).count(None) == 1
1352
+ # hkldata not updated when twin_data is given
1353
+ for i, st in enumerate(st_list):
1354
+ if st.ncs:
1355
+ st = st.clone()
1356
+ st.expand_ncs(gemmi.HowToNameCopiedChain.Dup, merge_dist=0)
1357
+ if twin_data:
1358
+ hkl = twin_data.asu
1359
+ else:
1360
+ hkl = hkldata.miller_array()
1361
+ fc = utils.model.calc_fc_fft(st, d_min - 1e-6,
1362
+ monlib=monlib,
1363
+ source=source,
1364
+ mott_bethe=mott_bethe,
1365
+ miller_array=hkl)
1366
+ if twin_data:
1367
+ twin_data.f_calc[:,i] = fc
1368
+ else:
1369
+ hkldata.df[fc_labs[i]] = fc
1370
+ if not twin_data:
1371
+ hkldata.df["FC"] = hkldata.df[fc_labs].sum(axis=1)
1372
+ # update_fc()
1373
+
1374
+ def calc_Fmask(st, d_min, miller_array):
1375
+ logger.writeln("Calculating solvent contribution..")
1376
+ grid = gemmi.FloatGrid()
1377
+ grid.setup_from(st, spacing=min(0.6, (d_min-1e-6) / 2 - 1e-9))
1378
+ masker = gemmi.SolventMasker(gemmi.AtomicRadiiSet.Refmac)
1379
+ masker.put_mask_on_float_grid(grid, st[0])
1380
+ fmask_gr = gemmi.transform_map_to_f_phi(grid)
1381
+ Fmask = fmask_gr.get_value_by_hkl(miller_array)
1382
+ return Fmask
1383
+ # calc_Fmask()
1384
+
1385
+ def bulk_solvent_and_lsq_scales(hkldata, sts, fc_labs, use_solvent=True, use_int=False, mask=None, func_type="log_cosh", twin_data=None):
1386
+ # fc_labs must have solvent part at the end
1387
+ miller_array = twin_data.asu if twin_data else hkldata.miller_array()
1388
+ d_min = twin_data.d_min(sts[0].cell) if twin_data else hkldata.d_min_max()[0]
1389
+ if use_solvent:
1390
+ if mask is None:
1391
+ Fmask = calc_Fmask(merge_models(sts), d_min, miller_array)
1392
+ else:
1393
+ fmask_gr = gemmi.transform_map_to_f_phi(mask)
1394
+ Fmask = fmask_gr.get_value_by_hkl(miller_array)
1395
+ if twin_data:
1396
+ fc_sum = twin_data.f_calc[:,:-1].sum(axis=1)
1397
+ else:
1398
+ fc_sum = hkldata.df[fc_labs[:-1]].sum(axis=1).to_numpy()
1399
+ fc_list = [fc_sum, Fmask]
1400
+ else:
1401
+ if twin_data:
1402
+ fc_list = [twin_data.f_calc.sum(axis=1)]
1403
+ else:
1404
+ fc_list = [hkldata.df[fc_labs].sum(axis=1).to_numpy()]
1405
+
1406
+ scaling = LsqScale(func_type=func_type)
1407
+ scaling.set_data(hkldata, fc_list, use_int, sigma_cutoff=0, twin_data=twin_data)
1408
+ scaling.scale()
1409
+ b_iso = scaling.b_iso
1410
+ k_aniso = hkldata.debye_waller_factors(b_cart=scaling.b_aniso)
1411
+ hkldata.df["k_aniso"] = k_aniso # we need it later when calculating stats
1412
+
1413
+ if use_solvent:
1414
+ if twin_data:
1415
+ s2 = numpy.asarray(twin_data.s2_array)
1416
+ else:
1417
+ s2 = 1. / hkldata.d_spacings().to_numpy()**2
1418
+ Fbulk = Fmask * scaling.get_solvent_scale(scaling.k_sol, scaling.b_sol, s2)
1419
+ if twin_data:
1420
+ twin_data.f_calc[:,-1] = Fbulk
1421
+ else:
1422
+ hkldata.df[fc_labs[-1]] = Fbulk
1423
+
1424
+ # Apply scales
1425
+ if use_int:
1426
+ # in intensity case, we try to refine b_aniso with ML. perhaps we should do it in amplitude case also
1427
+ o_labs = ["I", "SIGI", "I(+)","SIGI(+)", "I(-)", "SIGI(-)"]
1428
+ hkldata.df[hkldata.df.columns.intersection(o_labs)] /= scaling.k_overall**2
1429
+ else:
1430
+ o_labs = ["FP", "SIGFP", "F(+)","SIGF(+)", "F(-)", "SIGF(-)"]
1431
+ hkldata.df[hkldata.df.columns.intersection(o_labs)] /= scaling.k_overall
1432
+ if twin_data:
1433
+ twin_data.f_calc[:] *= twin_data.debye_waller_factors(b_iso=b_iso)[:,None]
1434
+ else:
1435
+ k_iso = hkldata.debye_waller_factors(b_iso=b_iso)
1436
+ for lab in fc_labs: hkldata.df[lab] *= k_iso
1437
+ # total Fc
1438
+ hkldata.df["FC"] = hkldata.df[fc_labs].sum(axis=1)
1439
+ return scaling
1440
+ # bulk_solvent_and_lsq_scales()
1441
+
1442
+ def calculate_maps(hkldata, b_aniso, centric_and_selections, fc_labs, D_labs, log_out, use="all"):
1443
+ nmodels = len(fc_labs)
1444
+ hkldata.df["FWT"] = 0j * numpy.nan
1445
+ hkldata.df["DELFWT"] = 0j * numpy.nan
1446
+ hkldata.df["FOM"] = numpy.nan
1447
+ hkldata.df["X"] = numpy.nan # for FOM
1448
+ has_ano = "F(+)" in hkldata.df and "F(-)" in hkldata.df
1449
+ if has_ano:
1450
+ hkldata.df["FAN"] = 0j * numpy.nan
1451
+ stats_data = []
1452
+ k_ani = hkldata.debye_waller_factors(b_cart=b_aniso)
1453
+ Ds = numpy.vstack([hkldata.df[lab].to_numpy() for lab in D_labs]).T
1454
+ Fcs = numpy.vstack([hkldata.df[lab].to_numpy() for lab in fc_labs]).T
1455
+ DFc = (Ds * Fcs).sum(axis=1)
1456
+ hkldata.df["DFC"] = DFc
1457
+ for i_bin, idxes in hkldata.binned():
1458
+ bin_d_min = hkldata.binned_df.d_min[i_bin]
1459
+ bin_d_max = hkldata.binned_df.d_max[i_bin]
1460
+ # 0: acentric 1: centric
1461
+ mean_fom = [numpy.nan, numpy.nan]
1462
+ nrefs = [0, 0]
1463
+ for c, work, test in centric_and_selections[i_bin]:
1464
+ cidxes = numpy.concatenate([work, test])
1465
+ S = hkldata.df["S"].to_numpy()[cidxes]
1466
+ expip = numpy.exp(numpy.angle(DFc[cidxes])*1j)
1467
+ Fo = hkldata.df.FP.to_numpy()[cidxes] / k_ani[cidxes]
1468
+ SigFo = hkldata.df.SIGFP.to_numpy()[cidxes] / k_ani[cidxes]
1469
+ epsilon = hkldata.df.epsilon.to_numpy()[cidxes]
1470
+ nrefs[c] = numpy.sum(numpy.isfinite(Fo))
1471
+ DFc_abs = numpy.abs(DFc[cidxes])
1472
+ if c == 0:
1473
+ Sigma = 2 * SigFo**2 + epsilon * S
1474
+ X = 2 * Fo * DFc_abs / Sigma
1475
+ m = gemmi.bessel_i1_over_i0(X)
1476
+ hkldata.df.loc[cidxes, "FWT"] = (2 * m * Fo - DFc_abs) * expip
1477
+ else:
1478
+ Sigma = SigFo**2 + epsilon * S
1479
+ X = Fo * DFc_abs / Sigma
1480
+ m = numpy.tanh(X)
1481
+ hkldata.df.loc[cidxes, "FWT"] = (m * Fo) * expip
1482
+
1483
+ hkldata.df.loc[cidxes, "DELFWT"] = (m * Fo - DFc_abs) * expip
1484
+ hkldata.df.loc[cidxes, "FOM"] = m
1485
+ hkldata.df.loc[cidxes, "X"] = X
1486
+ if has_ano:
1487
+ Fo_dano = (hkldata.df["F(+)"].to_numpy()[cidxes] - hkldata.df["F(-)"].to_numpy()[cidxes]) / k_ani[cidxes]
1488
+ hkldata.df.loc[cidxes, "FAN"] = m * Fo_dano * expip / 2j
1489
+ if nrefs[c] > 0: mean_fom[c] = numpy.nanmean(m)
1490
+
1491
+ # remove reflections that should be hidden
1492
+ if use != "all":
1493
+ # usually use == "work"
1494
+ tohide = test if use == "work" else work
1495
+ hkldata.df.loc[tohide, "FWT"] = 0j * numpy.nan
1496
+ hkldata.df.loc[tohide, "DELFWT"] = 0j * numpy.nan
1497
+ fill_sel = numpy.isnan(hkldata.df["FWT"][cidxes].to_numpy())
1498
+ hkldata.df.loc[cidxes[fill_sel], "FWT"] = DFc[cidxes][fill_sel]
1499
+
1500
+ Fc = hkldata.df.FC.to_numpy()[idxes] * k_ani[idxes]
1501
+ Fo = hkldata.df.FP.to_numpy()[idxes]
1502
+ mean_DFc2 = numpy.nanmean(numpy.abs((Ds[idxes,:] * Fcs[idxes,:]).sum(axis=1) * k_ani[idxes])**2)
1503
+ mean_log_DFcs = numpy.log(numpy.nanmean(numpy.abs(Ds[idxes,:] * Fcs[idxes,:] * k_ani[idxes,None]), axis=0)).tolist()
1504
+ mean_Ds = numpy.nanmean(Ds[idxes,:], axis=0).tolist()
1505
+ if sum(nrefs) > 0:
1506
+ r = numpy.nansum(numpy.abs(numpy.abs(Fc)-Fo)) / numpy.nansum(Fo)
1507
+ cc = utils.hkl.correlation(Fo, numpy.abs(Fc))
1508
+ mean_Fo2 = numpy.nanmean(numpy.abs(Fo)**2)
1509
+ else:
1510
+ r, cc, mean_Fo2 = numpy.nan, numpy.nan, numpy.nan
1511
+ stats_data.append([i_bin, nrefs[0], nrefs[1], bin_d_max, bin_d_min,
1512
+ numpy.log(mean_Fo2),
1513
+ numpy.log(numpy.nanmean(numpy.abs(Fc)**2)),
1514
+ numpy.log(mean_DFc2),
1515
+ numpy.log(numpy.mean(hkldata.df["S"].to_numpy()[idxes])),
1516
+ mean_fom[0], mean_fom[1], r, cc] + mean_Ds + mean_log_DFcs)
1517
+
1518
+ DFc_labs = ["log(Mn(|{}{}|))".format(dl,fl) for dl,fl in zip(D_labs, fc_labs)]
1519
+ cols = ["bin", "n_a", "n_c", "d_max", "d_min",
1520
+ "log(Mn(|Fo|^2))", "log(Mn(|Fc|^2))", "log(Mn(|DFc|^2))",
1521
+ "log(Sigma)", "FOM_a", "FOM_c", "R", "CC(|Fo|,|Fc|)"] + D_labs + DFc_labs
1522
+ stats = pandas.DataFrame(stats_data, columns=cols)
1523
+ title_labs = [["log(Mn(|F|^2)) and variances", ["log(Mn(|Fo|^2))", "log(Mn(|Fc|^2))", "log(Mn(|DFc|^2))", "log(Sigma)"]],
1524
+ ["FOM", ["FOM_a", "FOM_c"]],
1525
+ ["D", D_labs],
1526
+ ["DFc", DFc_labs],
1527
+ ["R-factor", ["R"]],
1528
+ ["CC", ["CC(|Fo|,|Fc|)"]],
1529
+ ["number of reflections", ["n_a", "n_c"]]]
1530
+ with open(log_out, "w") as ofs:
1531
+ ofs.write(utils.make_loggraph_str(stats, main_title="Statistics",
1532
+ title_labs=title_labs,
1533
+ s2=1/stats["d_min"]**2))
1534
+ logger.writeln("output log: {}".format(log_out))
1535
+ # calculate_maps()
1536
+
1537
+ def main(args):
1538
+ n_per_bin = {"all": 500, "work": 500, "test": 50}[args.use]
1539
+ try:
1540
+ hkldata, sts, fc_labs, centric_and_selections,free = process_input(hklin=args.hklin,
1541
+ labin=args.labin.split(",") if args.labin else None,
1542
+ n_bins=args.nbins,
1543
+ free=args.free,
1544
+ xyzins=sum(args.model, []),
1545
+ source=args.source,
1546
+ d_max=args.d_max,
1547
+ d_min=args.d_min,
1548
+ n_per_bin=n_per_bin,
1549
+ use=args.use,
1550
+ max_bins=30,
1551
+ keep_charges=args.keep_charges,
1552
+ space_group=args.spacegroup)
1553
+ except RuntimeError as e:
1554
+ raise SystemExit("Error: {}".format(e))
1555
+
1556
+ if args.twin:
1557
+ twin_data = find_twin_domains_from_data(hkldata)
1558
+ else:
1559
+ twin_data = None
1560
+ if twin_data:
1561
+ twin_data.setup_f_calc(len(sts) + (0 if args.no_solvent else 1))
1562
+
1563
+ update_fc(sts, fc_labs, d_min=hkldata.d_min_max()[0], monlib=None,
1564
+ source=args.source, mott_bethe=(args.source=="electron"),
1565
+ hkldata=hkldata, twin_data=twin_data)
1566
+ is_int = "I" in hkldata.df
1567
+
1568
+ if args.mask:
1569
+ mask = utils.fileio.read_ccp4_map(args.mask)[0]
1570
+ else:
1571
+ mask = None
1572
+
1573
+ # Overall scaling & bulk solvent
1574
+ # FP/SIGFP will be scaled. Total FC will be added.
1575
+ if not args.no_solvent:
1576
+ fc_labs.append("Fbulk")
1577
+ lsq = bulk_solvent_and_lsq_scales(hkldata, sts, fc_labs, use_solvent=not args.no_solvent,
1578
+ use_int=is_int, mask=mask, twin_data=twin_data)
1579
+ b_aniso = lsq.b_aniso
1580
+ # stats
1581
+ stats, overall = calc_r_and_cc(hkldata, centric_and_selections, twin_data)
1582
+ for lab in "R", "CC":
1583
+ logger.writeln(" ".join("{} = {:.4f}".format(x, overall[x]) for x in overall if x.startswith(lab)))
1584
+ if is_int:
1585
+ logger.writeln("R1 is calculated for reflections with I/sigma>2.")
1586
+
1587
+ if twin_data:
1588
+ estimate_twin_fractions_from_model(twin_data, hkldata)
1589
+ #del hkldata.df["FC"]
1590
+ #del hkldata.df["Fbulk"]
1591
+ # Need to redo scaling?
1592
+ lsq = bulk_solvent_and_lsq_scales(hkldata, sts, fc_labs, use_solvent=not args.no_solvent,
1593
+ use_int=is_int, mask=mask, twin_data=twin_data)
1594
+ b_aniso = lsq.b_aniso
1595
+ stats, overall = calc_r_and_cc(hkldata, centric_and_selections, twin_data)
1596
+ for lab in "R", "CC":
1597
+ logger.writeln(" ".join("{} = {:.4f}".format(x, overall[x]) for x in overall if x.startswith(lab)))
1598
+
1599
+ # Estimate ML parameters
1600
+ D_labs = ["D{}".format(i) for i in range(len(fc_labs))]
1601
+
1602
+ if args.use_cc:
1603
+ assert not is_int
1604
+ assert not args.twin
1605
+ logger.writeln("Estimating sigma-A parameters from CC..")
1606
+ determine_mlf_params_from_cc(hkldata, fc_labs, D_labs, centric_and_selections, args.use)
1607
+ else:
1608
+ b_aniso = determine_ml_params(hkldata, is_int, fc_labs, D_labs, b_aniso, centric_and_selections, args.D_trans, args.S_trans, args.use,
1609
+ twin_data=twin_data)
1610
+ use = {"all": "all", "work": "work", "test": "work"}[args.use]
1611
+ if twin_data:
1612
+ # replace hkldata
1613
+ hkldata = calculate_maps_twin(hkldata, b_aniso, fc_labs, D_labs, twin_data, centric_and_selections, use)
1614
+ elif is_int:
1615
+ calculate_maps_int(hkldata, b_aniso, fc_labs, D_labs, centric_and_selections, use)
1616
+ else:
1617
+ log_out = "{}.log".format(args.output_prefix)
1618
+ calculate_maps(hkldata, b_aniso, centric_and_selections, fc_labs, D_labs, log_out, use)
1619
+
1620
+ # Write mtz file
1621
+ if twin_data:
1622
+ labs = ["F_est", "F_exp"]
1623
+ elif is_int:
1624
+ labs = ["I", "SIGI"]
1625
+ else:
1626
+ labs = ["FP", "SIGFP"]
1627
+ labs.extend(["FOM", "FWT", "DELFWT", "FC", "DFC"])
1628
+ if "FAN" in hkldata.df:
1629
+ labs.append("FAN")
1630
+ if not args.no_solvent:
1631
+ labs.append("Fbulk")
1632
+ if "FREE" in hkldata.df:
1633
+ labs.append("FREE")
1634
+ if "F_true_est" in hkldata.df:
1635
+ labs.append("F_true_est")
1636
+ labs += D_labs + ["S"]
1637
+ mtz_out = args.output_prefix+".mtz"
1638
+ hkldata.write_mtz(mtz_out, labs=labs, types={"FOM": "W", "FP":"F", "SIGFP":"Q", "F_est": "F", "F_exp": "F"})
1639
+ return hkldata
1640
+ # main()
1641
+ if __name__ == "__main__":
1642
+ import sys
1643
+ args = parse_args(sys.argv[1:])
1644
+ main(args)