servalcat 0.4.72__cp312-cp312-macosx_11_0_arm64.whl → 0.4.99__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of servalcat might be problematic. Click here for more details.

servalcat/xtal/sigmaa.py CHANGED
@@ -17,6 +17,7 @@ import scipy.optimize
17
17
  from servalcat.utils import logger
18
18
  from servalcat import utils
19
19
  from servalcat import ext
20
+ from servalcat.xtal.twin import find_twin_domains_from_data, estimate_twin_fractions_from_model
20
21
 
21
22
  """
22
23
  DFc = sum_j D_j F_c,j
@@ -29,6 +30,8 @@ def add_arguments(parser):
29
30
  parser.description = 'Sigma-A parameter estimation for crystallographic data'
30
31
  parser.add_argument('--hklin', required=True,
31
32
  help='Input MTZ file')
33
+ parser.add_argument('--spacegroup',
34
+ help='Override space group')
32
35
  parser.add_argument('--labin',
33
36
  help='MTZ column for F,SIGF,FREE')
34
37
  parser.add_argument('--free', type=int,
@@ -51,6 +54,7 @@ def add_arguments(parser):
51
54
  help="Use CC(|F1|,|F2|) to CC(F1,F2) conversion to derive D and S")
52
55
  parser.add_argument('--use', choices=["all", "work", "test"], default="all",
53
56
  help="Which reflections to be used for the parameter estimate.")
57
+ parser.add_argument('--twin', action="store_true", help="Turn on twin refinement")
54
58
  parser.add_argument('--mask',
55
59
  help="A solvent mask (by default calculated from the coordinates)")
56
60
  parser.add_argument('--keep_charges', action='store_true',
@@ -71,22 +75,28 @@ def nanaverage(cc, w):
71
75
  return numpy.nan
72
76
  return numpy.average(cc[sel], weights=w[sel])
73
77
 
74
- def calc_r_and_cc(hkldata, centric_and_selections):
78
+ def calc_r_and_cc(hkldata, centric_and_selections, twin_data=None):
75
79
  has_int = "I" in hkldata.df
76
80
  has_free = "FREE" in hkldata.df
77
81
  stats = hkldata.binned_df.copy()
78
- stats["n_obs"] = 0
82
+ stats[["n_obs", "n_all"]] = 0
79
83
  if has_free:
80
84
  stats[["n_work", "n_free"]] = 0
81
- rlab = "R2" if has_int else "R"
85
+ rlab = "R1" if has_int else "R"
82
86
  cclab = "CCI" if has_int else "CCF"
83
- Fc = numpy.abs(hkldata.df.FC * hkldata.df.k_aniso)
87
+ if twin_data:
88
+ Fc = numpy.sqrt(twin_data.i_calc_twin())
89
+ else:
90
+ Fc = numpy.abs(hkldata.df.FC * hkldata.df.k_aniso)
84
91
  if has_int:
85
92
  obs = hkldata.df.I
93
+ obs_sqrt = numpy.sqrt(numpy.maximum(0, hkldata.df.I))
94
+ obs_sqrt[hkldata.df.I/hkldata.df.SIGI < 2] = numpy.nan # SHELX equivalent
86
95
  calc = Fc**2
96
+ calc_sqrt = Fc
87
97
  else:
88
- obs = hkldata.df.FP
89
- calc = Fc
98
+ obs = obs_sqrt = hkldata.df.FP
99
+ calc = calc_sqrt = Fc
90
100
  if has_free:
91
101
  for lab in (cclab, rlab):
92
102
  for suf in ("work", "free"):
@@ -97,15 +107,16 @@ def calc_r_and_cc(hkldata, centric_and_selections):
97
107
 
98
108
  for i_bin, idxes in hkldata.binned():
99
109
  stats.loc[i_bin, "n_obs"] = numpy.sum(numpy.isfinite(obs[idxes]))
110
+ stats.loc[i_bin, "n_all"] = len(idxes)
100
111
  if has_free:
101
112
  for j, suf in ((1, "work"), (2, "free")):
102
113
  idxes2 = numpy.concatenate([sel[j] for sel in centric_and_selections[i_bin]])
103
114
  stats.loc[i_bin, "n_"+suf] = numpy.sum(numpy.isfinite(obs[idxes2]))
104
115
  stats.loc[i_bin, cclab+suf] = utils.hkl.correlation(obs[idxes2], calc[idxes2])
105
- stats.loc[i_bin, rlab+suf] = utils.hkl.r_factor(obs[idxes2], calc[idxes2])
116
+ stats.loc[i_bin, rlab+suf] = utils.hkl.r_factor(obs_sqrt[idxes2], calc_sqrt[idxes2])
106
117
  else:
107
118
  stats.loc[i_bin, cclab] = utils.hkl.correlation(obs[idxes], calc[idxes])
108
- stats.loc[i_bin, rlab] = utils.hkl.r_factor(obs[idxes], calc[idxes])
119
+ stats.loc[i_bin, rlab] = utils.hkl.r_factor(obs_sqrt[idxes], calc_sqrt[idxes])
109
120
 
110
121
  # Overall
111
122
  ret = {}
@@ -114,7 +125,7 @@ def calc_r_and_cc(hkldata, centric_and_selections):
114
125
  ret[cclab+suf+"avg"] = nanaverage(stats[cclab+suf], stats["n_"+suf])
115
126
  for j, suf in ((1, "work"), (2, "free")):
116
127
  idxes = numpy.concatenate([sel[j] for i_bin, _ in hkldata.binned() for sel in centric_and_selections[i_bin]])
117
- ret[rlab+suf] = utils.hkl.r_factor(obs[idxes], calc[idxes])
128
+ ret[rlab+suf] = utils.hkl.r_factor(obs_sqrt[idxes], calc_sqrt[idxes])
118
129
  else:
119
130
  ret[cclab+"avg"] = nanaverage(stats[cclab], stats["n_obs"])
120
131
  ret[rlab] = utils.hkl.r_factor(obs, calc)
@@ -158,46 +169,63 @@ class LsqScale:
158
169
  self.b_aniso = None
159
170
  self.stats = {}
160
171
 
161
- def set_data(self, hkldata, fc_list, use_int=False, sigma_cutoff=None):
172
+ def set_data(self, hkldata, fc_list, use_int=False, sigma_cutoff=None, twin_data=None):
162
173
  assert 0 < len(fc_list) < 3
163
174
  self.use_int = use_int
164
175
  if sigma_cutoff is not None:
165
176
  if use_int:
166
- sel = hkldata.df.I / hkldata.df.SIGI > sigma_cutoff
177
+ self.sel = hkldata.df.I / hkldata.df.SIGI > sigma_cutoff
167
178
  self.labcut = "(I/SIGI>{})".format(sigma_cutoff)
168
179
  else:
169
- sel = hkldata.df.FP / hkldata.df.SIGFP > sigma_cutoff
180
+ self.sel = hkldata.df.FP / hkldata.df.SIGFP > sigma_cutoff
170
181
  self.labcut = "(F/SIGF>{})".format(sigma_cutoff)
171
182
  else:
172
- sel = hkldata.df.index
183
+ self.sel = hkldata.df.index
173
184
  self.labcut = ""
174
- self.obs = hkldata.df["I" if use_int else "FP"].to_numpy()[sel]
175
- self.calc = [x[sel] for x in fc_list]
176
- self.s2mat = hkldata.ssq_mat()[:,sel]
177
- self.s2 = 1. / hkldata.d_spacings().to_numpy()[sel]**2
185
+ self.obs = hkldata.df["I" if use_int else "FP"].to_numpy(copy=True)
186
+ self.obs[~self.sel] = numpy.nan
187
+ self.calc = [x for x in fc_list]
188
+ self.s2mat = hkldata.ssq_mat()
189
+ self.s2 = 1. / hkldata.d_spacings().to_numpy()**2
178
190
  self.adpdirs = utils.model.adp_constraints(hkldata.sg.operations(), hkldata.cell, tr0=False)
191
+ self.twin_data = twin_data
179
192
  if use_int:
180
193
  self.sqrt_obs = numpy.sqrt(self.obs)
181
194
 
182
195
  def get_solvent_scale(self, k_sol, b_sol, s2=None):
183
196
  if s2 is None: s2 = self.s2
184
197
  return k_sol * numpy.exp(-b_sol * s2 / 4)
185
-
186
- def scaled_fc(self, x):
198
+
199
+ def fc_and_mask_grad(self, x):
187
200
  fc0 = self.calc[0]
188
201
  if len(self.calc) == 2:
189
- fmask = self.calc[1]
190
- fbulk = self.get_solvent_scale(x[-2], x[-1]) * fmask
191
- fc = fc0 + fbulk
202
+ if self.twin_data:
203
+ r = self.twin_data.scaling_fc_and_mask_grad(self.calc[1], x[-2], x[-1])
204
+ return r[:,0], r[:,1], r[:,2]
205
+ else:
206
+ fmask = self.calc[1]
207
+ temp_sol = numpy.exp(-x[-1] * self.s2 / 4)
208
+ fbulk = x[-2] * temp_sol * fmask
209
+ fc = fc0 + fbulk
210
+ re_fmask_fcconj = (fmask * fc.conj()).real
211
+ fc_abs = numpy.abs(fc)
212
+ tmp = temp_sol / fc_abs * re_fmask_fcconj
213
+ return fc_abs, tmp, -tmp * x[-2] * self.s2 / 4
192
214
  else:
193
- fc = fc0
215
+ if self.twin_data:
216
+ return numpy.sqrt(self.twin_data.i_calc_twin()), None, None
217
+ else:
218
+ return numpy.abs(fc0), None, None
219
+
220
+ def scaled_fc(self, x):
221
+ fc = self.fc_and_mask_grad(x)[0]
194
222
  nadp = self.adpdirs.shape[0]
195
223
  B = numpy.dot(x[1:nadp+1], self.adpdirs)
196
224
  kani = numpy.exp(numpy.dot(-B, self.s2mat))
197
225
  return self.k_trans(x[0]) * kani * fc
198
226
 
199
227
  def target(self, x):
200
- y = numpy.abs(self.scaled_fc(x))
228
+ y = self.scaled_fc(x)
201
229
  if self.use_int:
202
230
  diff = self.sqrt_obs - y
203
231
  #y2 = y**2
@@ -214,18 +242,10 @@ class LsqScale:
214
242
 
215
243
  def grad(self, x):
216
244
  g = numpy.zeros_like(x)
217
- fc0 = self.calc[0]
218
- if len(self.calc) == 2:
219
- fmask = self.calc[1]
220
- temp_sol = numpy.exp(-x[-1] * self.s2 / 4)
221
- fbulk = x[-2] * temp_sol * fmask
222
- fc = fc0 + fbulk
223
- else:
224
- fc = fc0
245
+ fc_abs, der_ksol, der_bsol = self.fc_and_mask_grad(x)
225
246
  nadp = self.adpdirs.shape[0]
226
247
  B = numpy.dot(x[1:nadp+1], self.adpdirs)
227
248
  kani = numpy.exp(numpy.dot(-B, self.s2mat))
228
- fc_abs = numpy.abs(fc)
229
249
  k = self.k_trans(x[0])
230
250
  y = k * kani * fc_abs
231
251
  if self.use_int:
@@ -247,30 +267,19 @@ class LsqScale:
247
267
  g[0] = numpy.nansum(kani * fc_abs * dfdy * self.k_trans_der(x[0]))
248
268
  g[1:nadp+1] = numpy.dot(dfdb, self.adpdirs.T)
249
269
  if len(self.calc) == 2:
250
- re_fmask_fcconj = (fmask * fc.conj()).real
251
- tmp = k * kani * temp_sol / fc_abs * re_fmask_fcconj
252
- g[-2] = numpy.nansum(tmp * dfdy)
253
- g[-1] = numpy.nansum(-tmp * dfdy * x[-2] * self.s2 / 4)
270
+ g[-2] = numpy.nansum(k * kani * der_ksol * dfdy)
271
+ g[-1] = numpy.nansum(k * kani * der_bsol * dfdy)
254
272
 
255
273
  return g
256
274
 
257
275
  def calc_shift(self, x):
258
276
  # TODO: sort out code duplication, if we use this.
259
- g = numpy.zeros((len(self.calc[0]), len(x)))
277
+ g = numpy.zeros((len(self.obs), len(x)))
260
278
  H = numpy.zeros((len(x), len(x)))
261
-
262
- fc0 = self.calc[0]
263
- if len(self.calc) == 2:
264
- fmask = self.calc[1]
265
- temp_sol = numpy.exp(-x[-1] * self.s2 / 4)
266
- fbulk = x[-2] * temp_sol * fmask
267
- fc = fc0 + fbulk
268
- else:
269
- fc = fc0
279
+ fc_abs, der_ksol, der_bsol = self.fc_and_mask_grad(x)
270
280
  nadp = self.adpdirs.shape[0]
271
281
  B = numpy.dot(x[1:nadp+1], self.adpdirs)
272
282
  kani = numpy.exp(numpy.dot(-B, self.s2mat))
273
- fc_abs = numpy.abs(fc)
274
283
  k = self.k_trans(x[0])
275
284
  y = k * kani * fc_abs
276
285
  if self.use_int:
@@ -297,27 +306,20 @@ class LsqScale:
297
306
  g[:,0] = kani * fc_abs * self.k_trans_der(x[0])
298
307
  g[:,1:nadp+1] = numpy.dot(dfdb.T, self.adpdirs.T)
299
308
  if len(self.calc) == 2:
300
- re_fmask_fcconj = (fmask * fc.conj()).real
301
- tmp = k * kani * temp_sol / fc_abs * re_fmask_fcconj
302
- g[:,-2] = tmp
303
- g[:,-1] = -tmp * x[-2] * self.s2 / 4
309
+ g[:,-2] = k * kani * der_ksol
310
+ g[:,-1] = k * kani * der_bsol
304
311
 
305
- # XXX won't work with NaNs.
312
+ # no numpy.nandot..
313
+ g, dfdy, dfdy2 = g[self.sel, :], dfdy[self.sel], dfdy2[self.sel]
306
314
  H = numpy.dot(g.T, g * dfdy2[:,None])
307
315
  g = numpy.sum(dfdy[:,None] * g, axis=0)
308
316
  dx = -numpy.dot(g, numpy.linalg.pinv(H))
309
317
  return dx
310
318
 
311
319
  def initial_kb(self):
312
- fc0 = self.calc[0]
313
- if len(self.calc) == 2:
314
- fmask = self.calc[1]
315
- fbulk = self.get_solvent_scale(self.k_sol, self.b_sol) * fmask
316
- fc = fc0 + fbulk
317
- else:
318
- fc = fc0
319
- sel = self.obs > 0
320
- f1p, f2p, s2p = self.obs[sel], numpy.abs(fc)[sel], self.s2[sel]
320
+ fc_abs = self.fc_and_mask_grad([self.k_sol, self.b_sol])[0]
321
+ sel = self.obs > 0 # exclude nan as well
322
+ f1p, f2p, s2p = self.obs[sel], fc_abs[sel], self.s2[sel]
321
323
  if self.use_int: f2p *= f2p
322
324
  tmp = numpy.log(f2p) - numpy.log(f1p)
323
325
  # g = [dT/dk, dT/db]
@@ -418,7 +420,7 @@ class LsqScale:
418
420
  self.k_sol = res_x[-2]
419
421
  self.b_sol = res_x[-1]
420
422
  logger.writeln(" k_sol= {:.2e} B_sol= {:.2e}".format(self.k_sol, self.b_sol))
421
- calc = numpy.abs(self.scaled_fc(res_x))
423
+ calc = self.scaled_fc(res_x)
422
424
  if self.use_int: calc *= calc
423
425
  self.stats["cc"] = utils.hkl.correlation(self.obs, calc)
424
426
  self.stats["r"] = utils.hkl.r_factor(self.obs, calc)
@@ -510,6 +512,43 @@ def mli_shift_S(df, fc_labs, Ds, S, k_ani, idxes):
510
512
  return -g / H
511
513
  # mli_shift_S()
512
514
 
515
+ def mltwin_est_ftrue(twin_data, df, k_ani, idxes):
516
+ kani2_inv = 1 / k_ani**2
517
+ i_sigi = numpy.empty((2, len(df.index)))
518
+ i_sigi[:] = numpy.nan
519
+ i_sigi[0, idxes] = (df.I.to_numpy() * kani2_inv)[idxes]
520
+ i_sigi[1, idxes] = (df.SIGI.to_numpy() * kani2_inv)[idxes]
521
+ twin_data.est_f_true(i_sigi[0,:], i_sigi[1,:])
522
+ # mltwin_est_ftrue()
523
+
524
+ def mltwin(df, twin_data, Ds, S, k_ani, idxes, i_bin):
525
+ twin_data.ml_sigma[i_bin] = S
526
+ twin_data.ml_scale[i_bin, :] = Ds
527
+ mltwin_est_ftrue(twin_data, df, k_ani, idxes)
528
+ return twin_data.ll()
529
+ # mltwin()
530
+
531
+ def deriv_mltwin_wrt_D_S(df, twin_data, Ds, S, k_ani, idxes, i_bin):
532
+ twin_data.ml_sigma[i_bin] = S
533
+ twin_data.ml_scale[i_bin, :] = Ds
534
+ mltwin_est_ftrue(twin_data, df, k_ani, idxes)
535
+ r = twin_data.ll_der_D_S()
536
+ g = numpy.zeros(r.shape[1])
537
+ g[:-1] = numpy.nansum(r[:,:-1], axis=0) # D
538
+ g[-1] = numpy.nansum(r[:,-1]) # S
539
+ return g
540
+ # deriv_mlf_wrt_D_S()
541
+
542
+ def mltwin_shift_S(df, twin_data, Ds, S, k_ani, idxes, i_bin):
543
+ twin_data.ml_sigma[i_bin] = S
544
+ twin_data.ml_scale[i_bin, :] = Ds
545
+ mltwin_est_ftrue(twin_data, df, k_ani, idxes)
546
+ r = twin_data.ll_der_D_S()
547
+ g = numpy.nansum(r[:,-1])
548
+ H = numpy.nansum(r[:,-1]**2) # approximating expectation value of second derivative
549
+ return -g / H
550
+ # mlf_shift_S()
551
+
513
552
  def determine_mlf_params_from_cc(hkldata, fc_labs, D_labs, centric_and_selections, use="all", smoothing="gauss"):
514
553
  # theorhetical values
515
554
  cc_a = lambda cc: (numpy.pi/4*(1-cc**2)**2 * scipy.special.hyp2f1(3/2, 3/2, 1, cc**2) - numpy.pi/4) / (1-numpy.pi/4)
@@ -594,7 +633,7 @@ def determine_mlf_params_from_cc(hkldata, fc_labs, D_labs, centric_and_selection
594
633
  smooth_params(hkldata, D_labs, smoothing)
595
634
  # determine_mlf_params_from_cc()
596
635
 
597
- def initialize_ml_params(hkldata, use_int, D_labs, b_aniso, centric_and_selections, use):
636
+ def initialize_ml_params(hkldata, use_int, D_labs, b_aniso, centric_and_selections, use, twin_data=None):
598
637
  # Initial values
599
638
  for lab in D_labs: hkldata.binned_df[lab] = 1.
600
639
  hkldata.binned_df["S"] = 10000.
@@ -614,8 +653,11 @@ def initialize_ml_params(hkldata, use_int, D_labs, b_aniso, centric_and_selectio
614
653
  Io = hkldata.df.I.to_numpy()[idxes]
615
654
  else:
616
655
  Io = hkldata.df.FP.to_numpy()[idxes]**2
617
- Io /= k_ani[idxes]**2
618
- Ic = numpy.abs(hkldata.df.FC.to_numpy()[idxes])**2
656
+ Io /= k_ani[idxes]**2
657
+ if twin_data:
658
+ Ic = twin_data.i_calc_twin()[idxes]
659
+ else:
660
+ Ic = numpy.abs(hkldata.df.FC.to_numpy()[idxes])**2
619
661
  mean_Io = numpy.mean(Io)
620
662
  mean_Ic = numpy.mean(Ic)
621
663
  cc = numpy.corrcoef(Io, Ic)[1,0]
@@ -635,16 +677,21 @@ def initialize_ml_params(hkldata, use_int, D_labs, b_aniso, centric_and_selectio
635
677
  min_D = hkldata.binned_df[D_lab][hkldata.binned_df[D_lab] > 0].min() * 0.1
636
678
  logger.writeln("WARNING: negative {} is detected from initial estimates. Replacing it using minimum positive value {:.2e}".format(D_lab, min_D))
637
679
  hkldata.binned_df[D_lab].where(hkldata.binned_df[D_lab] > 0, min_D, inplace=True) # arbitrary
638
-
680
+
681
+ if twin_data:
682
+ twin_data.ml_scale[:] = hkldata.binned_df.loc[:, D_labs]
683
+ twin_data.ml_sigma[:] = hkldata.binned_df.loc[:, "S"]
684
+
639
685
  logger.writeln("Initial estimates:")
640
686
  logger.writeln(hkldata.binned_df.to_string())
641
687
  # initialize_ml_params()
642
688
 
643
689
  def determine_ml_params(hkldata, use_int, fc_labs, D_labs, b_aniso, centric_and_selections,
644
- D_trans=None, S_trans=None, use="all", n_cycle=1, smoothing="gauss"):
690
+ D_trans=None, S_trans=None, use="all", n_cycle=1, smoothing="gauss",
691
+ twin_data=None):
645
692
  assert use in ("all", "work", "test")
646
693
  assert smoothing in (None, "gauss")
647
- logger.writeln("Estimating sigma-A parameters using {}..".format("intensities" if use_int else "amplitudes"))
694
+ logger.writeln("Estimating sigma-A parameters using {}..".format(("intensities" if use_int else "amplitudes") + " (twin)" if twin_data else ""))
648
695
  trans = VarTrans(D_trans, S_trans)
649
696
  lab_obs = "I" if use_int else "FP"
650
697
  def get_idxes(i_bin):
@@ -655,7 +702,7 @@ def determine_ml_params(hkldata, use_int, fc_labs, D_labs, b_aniso, centric_and_
655
702
  return numpy.concatenate([sel[i] for sel in centric_and_selections[i_bin]])
656
703
 
657
704
  if not set(D_labs + ["S"]).issubset(hkldata.binned_df):
658
- initialize_ml_params(hkldata, use_int, D_labs, b_aniso, centric_and_selections, use)
705
+ initialize_ml_params(hkldata, use_int, D_labs, b_aniso, centric_and_selections, use, twin_data=twin_data)
659
706
  for dlab, fclab in zip(D_labs, fc_labs):
660
707
  hkldata.binned_df["Mn(|{}*{}|)".format(dlab, fclab)] = numpy.nan
661
708
 
@@ -681,8 +728,12 @@ def determine_ml_params(hkldata, use_int, fc_labs, D_labs, b_aniso, centric_and_
681
728
  else:
682
729
  Ds = [hkldata.binned_df.loc[i_bin, lab] for lab in D_labs]
683
730
  S = trans.S(x[-1])
684
- f = mli if use_int else mlf
685
- return f(hkldata.df, fc_labs, Ds, S, k_ani, idxes)
731
+
732
+ if twin_data:
733
+ return mltwin(hkldata.df, twin_data, Ds, S, k_ani, idxes, i_bin)
734
+ else:
735
+ f = mli if use_int else mlf
736
+ return f(hkldata.df, fc_labs, Ds, S, k_ani, idxes)
686
737
 
687
738
  def grad(x):
688
739
  if refpar == "all":
@@ -697,8 +748,11 @@ def determine_ml_params(hkldata, use_int, fc_labs, D_labs, b_aniso, centric_and_
697
748
  Ds = [hkldata.binned_df.loc[i_bin, lab] for lab in D_labs]
698
749
  S = trans.S(x[-1])
699
750
  n_par = 1
700
- calc_deriv = deriv_mli_wrt_D_S if use_int else deriv_mlf_wrt_D_S
701
- r = calc_deriv(hkldata.df, fc_labs, Ds, S, k_ani, idxes)
751
+ if twin_data:
752
+ r = deriv_mltwin_wrt_D_S(hkldata.df, twin_data, Ds, S, k_ani, idxes, i_bin)
753
+ else:
754
+ calc_deriv = deriv_mli_wrt_D_S if use_int else deriv_mlf_wrt_D_S
755
+ r = calc_deriv(hkldata.df, fc_labs, Ds, S, k_ani, idxes)
702
756
  g = numpy.zeros(n_par)
703
757
  if refpar in ("all", "D"):
704
758
  g[:len(fc_labs)] = r[:len(fc_labs)]
@@ -723,6 +777,18 @@ def determine_ml_params(hkldata, use_int, fc_labs, D_labs, b_aniso, centric_and_
723
777
  for ids in range(10):
724
778
  refpar = "D"
725
779
  x0 = numpy.array([trans.D_inv(hkldata.binned_df.loc[i_bin, lab]) for lab in D_labs])
780
+ #print("MLTWIN=", target(x0))
781
+ #quit()
782
+ if 0:
783
+ h = 1e-3
784
+ f00 = target(x0)
785
+ g00 = grad(x0)
786
+ for ii in range(len(x0)):
787
+ xx = x0.copy()
788
+ xx[ii] += h
789
+ f01 = target(xx)
790
+ nder = (f01 - f00) / h
791
+ logger.writeln(f"DEBUG_der_D bin_{i_bin} {ii} ad={g00[ii]} nd={nder} r={g00[ii]/nder}")
726
792
  vals_now = []
727
793
  if 0:
728
794
  f0 = target(x0)
@@ -758,15 +824,29 @@ def determine_ml_params(hkldata, use_int, fc_labs, D_labs, b_aniso, centric_and_
758
824
  for i, lab in enumerate(D_labs):
759
825
  hkldata.binned_df.loc[i_bin, lab] = trans.D(res.x[i])
760
826
  vals_now.append(hkldata.binned_df.loc[i_bin, lab])
827
+ if twin_data:
828
+ twin_data.ml_scale[i_bin, :] = trans.D(res.x)
761
829
  refpar = "S"
762
830
  if 1:
763
831
  for cyc_s in range(1):
764
832
  x0 = trans.S_inv(hkldata.binned_df.loc[i_bin, "S"])
833
+ if 0:
834
+ h = 1e-1
835
+ f00 = target([x0])
836
+ g00 = grad([x0])
837
+ xx = x0 + h
838
+ f01 = target([xx])
839
+ nder = (f01 - f00) / h
840
+ logger.writeln(f"DEBUG_der_S bin_{i_bin} ad={g00} nd={nder} r={g00/nder}")
841
+
765
842
  f0 = target([x0])
766
843
  Ds = [hkldata.binned_df.loc[i_bin, lab] for lab in D_labs]
767
844
  nfev_total += 1
768
- calc_shift_S = mli_shift_S if use_int else mlf_shift_S
769
- shift = calc_shift_S(hkldata.df, fc_labs, Ds, trans.S(x0), k_ani, idxes)
845
+ if twin_data:
846
+ shift = mltwin_shift_S(hkldata.df, twin_data, Ds, trans.S(x0), k_ani, idxes, i_bin)
847
+ else:
848
+ calc_shift_S = mli_shift_S if use_int else mlf_shift_S
849
+ shift = calc_shift_S(hkldata.df, fc_labs, Ds, trans.S(x0), k_ani, idxes)
770
850
  shift /= trans.S_deriv(x0)
771
851
  if abs(shift) < 1e-3: break
772
852
  for itry in range(10):
@@ -787,6 +867,8 @@ def determine_ml_params(hkldata, use_int, fc_labs, D_labs, b_aniso, centric_and_
787
867
  else:
788
868
  #print("all bad")
789
869
  break
870
+ if twin_data:
871
+ twin_data.ml_sigma[i_bin] = hkldata.binned_df.loc[i_bin, "S"]
790
872
  else:
791
873
  # somehow this does not work well.
792
874
  x0 = [trans.S_inv(hkldata.binned_df.loc[i_bin, "S"])]
@@ -796,6 +878,8 @@ def determine_ml_params(hkldata, use_int, fc_labs, D_labs, b_aniso, centric_and_
796
878
  #print(i_bin, "mini cycle", ids, refpar)
797
879
  #print(res)
798
880
  hkldata.binned_df.loc[i_bin, "S"] = trans.S(res.x[-1])
881
+ if twin_data:
882
+ twin_data.ml_sigma[i_bin] = trans.S(res.x[-1])
799
883
  vals_now.append(hkldata.binned_df.loc[i_bin, "S"])
800
884
  vals_now = numpy.array(vals_now)
801
885
  if vals_last is not None and numpy.all(numpy.abs((vals_last - vals_now) / vals_now) < 1e-2):
@@ -812,17 +896,30 @@ def determine_ml_params(hkldata, use_int, fc_labs, D_labs, b_aniso, centric_and_
812
896
  for i, lab in enumerate(D_labs):
813
897
  hkldata.binned_df.loc[i_bin, lab] = trans.D(res.x[i])
814
898
  hkldata.binned_df.loc[i_bin, "S"] = trans.S(res.x[-1])
899
+ if twin_data:
900
+ twin_data.ml_scale[i_bin, :] = trans.D(res.x[:-1])
901
+ twin_data.ml_sigma[i_bin] = trans.S(res.x[-1])
815
902
 
816
- for i_bin, idxes in hkldata.binned():
817
- for dlab, fclab in zip(D_labs, fc_labs):
818
- mean_dfc = numpy.nanmean(numpy.abs(hkldata.binned_df[dlab][i_bin] * hkldata.df[fclab][idxes]))
819
- hkldata.binned_df.loc[i_bin, "Mn(|{}*{}|)".format(dlab, fclab)] = mean_dfc
903
+ if twin_data:
904
+ dfc = numpy.abs(twin_data.f_calc) * twin_data.ml_scale_array()
905
+ for i_bin, idxes in hkldata.binned():
906
+ dfc_bin = dfc[numpy.asarray(twin_data.bin)==i_bin,:]
907
+ mean_dfc = numpy.nanmean(dfc_bin, axis=0)
908
+ for i, (dlab, fclab) in enumerate(zip(D_labs, fc_labs)):
909
+ hkldata.binned_df.loc[i_bin, "Mn(|{}*{}|)".format(dlab, fclab)] = mean_dfc[i]
910
+ else:
911
+ for i_bin, idxes in hkldata.binned():
912
+ for dlab, fclab in zip(D_labs, fc_labs):
913
+ mean_dfc = numpy.nanmean(numpy.abs(hkldata.binned_df[dlab][i_bin] * hkldata.df[fclab][idxes]))
914
+ hkldata.binned_df.loc[i_bin, "Mn(|{}*{}|)".format(dlab, fclab)] = mean_dfc
820
915
 
821
916
  logger.writeln("Refined estimates:")
822
917
  logger.writeln(hkldata.binned_df.to_string())
918
+ #numpy.testing.assert_allclose(hkldata.binned_df.S, twin_data.ml_sigma)
919
+ #numpy.testing.assert_allclose(hkldata.binned_df[D_labs], twin_data.ml_scale)
823
920
  logger.writeln("time: {:.1f} sec ({} evaluations)".format(time.time() - t0, nfev_total))
824
921
 
825
- if not use_int:
922
+ if not use_int or twin_data:
826
923
  break # did not implement MLF B_aniso optimization
827
924
 
828
925
  # Refine b_aniso
@@ -900,7 +997,7 @@ def determine_ml_params(hkldata, use_int, fc_labs, D_labs, b_aniso, centric_and_
900
997
  return b_aniso
901
998
  # determine_ml_params()
902
999
 
903
- def smooth_params(hkldata, D_labs, smoothing):
1000
+ def smooth_params(hkldata, D_labs, smoothing): # XXX twin_data
904
1001
  if smoothing is None or len(hkldata.binned()) < 2:
905
1002
  for i, lab in enumerate(D_labs + ["S"]):
906
1003
  hkldata.df[lab] = hkldata.binned_data_as_array(lab)
@@ -924,10 +1021,9 @@ def smooth_params(hkldata, D_labs, smoothing):
924
1021
  # smooth_params()
925
1022
 
926
1023
  def expected_F_from_int(Io, sigIo, k_ani, DFc, eps, c, S):
927
- if c == 0: # acentric
928
- k_num, k_den = 0.5, 0.
929
- else:
930
- k_num, k_den = 0., -0.5
1024
+ k_num = numpy.repeat(0.5 if c == 0 else 0., Io.size) # 0.5 if acentric
1025
+ k_den = k_num - 0.5
1026
+ if numpy.isscalar(c): c = numpy.repeat(c, Io.size)
931
1027
  to = Io / sigIo - sigIo / (c+1) / k_ani**2 / S / eps
932
1028
  tf = k_ani * numpy.abs(DFc) / numpy.sqrt(sigIo)
933
1029
  sig1 = k_ani**2 * S * eps / sigIo
@@ -982,32 +1078,80 @@ def calculate_maps_int(hkldata, b_aniso, fc_labs, D_labs, centric_and_selections
982
1078
  hkldata.df.loc[cidxes[fill_sel], "FWT"] = DFc[cidxes][fill_sel]
983
1079
  # calculate_maps_int()
984
1080
 
1081
+ def calculate_maps_twin(hkldata, b_aniso, fc_labs, D_labs, twin_data, centric_and_selections, use="all"):
1082
+ k_ani2_inv = 1 / hkldata.debye_waller_factors(b_cart=b_aniso)**2
1083
+ Io = hkldata.df.I.to_numpy(copy=True) * k_ani2_inv
1084
+ sigIo = hkldata.df.SIGI.to_numpy(copy=True) * k_ani2_inv
1085
+ # Mask Io
1086
+ for i_bin, idxes in hkldata.binned():
1087
+ for c, work, test in centric_and_selections[i_bin]:
1088
+ if use != "all":
1089
+ tohide = test if use == "work" else work
1090
+ Io[tohide] = numpy.nan
1091
+
1092
+ twin_data.est_f_true(Io, sigIo)
1093
+ Ds = twin_data.ml_scale_array()
1094
+ DFc = (twin_data.f_calc * Ds).sum(axis=1)
1095
+ exp_ip = numpy.exp(numpy.angle(DFc)*1j)
1096
+ Ft = numpy.asarray(twin_data.f_true_max)
1097
+ m = twin_data.calc_fom()
1098
+ Fexp = twin_data.expected_F(Io, sigIo)
1099
+ if 1:
1100
+ fwt = numpy.where(numpy.asarray(twin_data.centric) == 0,
1101
+ 2 * m * Ft * exp_ip - DFc,
1102
+ m * Ft * exp_ip)
1103
+ delfwt = m * Ft * exp_ip - DFc
1104
+ else: # based on "more accurate" evaluation of <m|F|>
1105
+ fwt = numpy.where(numpy.asarray(twin_data.centric) == 0,
1106
+ 2 * Fexp * exp_ip - DFc,
1107
+ m * Fexp * exp_ip)
1108
+ delfwt = Fexp * exp_ip - DFc
1109
+
1110
+ sel = numpy.isnan(fwt)
1111
+ fwt[sel] = DFc[sel]
1112
+
1113
+ hkldata2 = utils.hkl.HklData(hkldata.cell, hkldata.sg,
1114
+ utils.hkl.df_from_twin_data(twin_data, fc_labs))
1115
+ hkldata2.df["FWT"] = fwt
1116
+ hkldata2.df["DELFWT"] = delfwt
1117
+ hkldata2.df["FOM"] = m
1118
+ hkldata2.df["F_est"] = Ft
1119
+ hkldata2.df["F_exp"] = Fexp
1120
+ hkldata2.df["FC"] = twin_data.f_calc.sum(axis=1)
1121
+ hkldata2.df["DFC"] = DFc
1122
+ hkldata2.df[D_labs] = Ds
1123
+ hkldata2.df["S"] = twin_data.ml_sigma_array()
1124
+ return hkldata2
1125
+ # calculate_maps_twin()
1126
+
985
1127
  def merge_models(sts): # simply merge models. no fix in chain ids etc.
986
- st = sts[0].clone()
987
- del st[:]
988
- model = gemmi.Model("1")
1128
+ st2 = sts[0].clone()
1129
+ del st2[:]
1130
+ model = gemmi.Model(1)
989
1131
  for st in sts:
990
1132
  for m in st:
991
1133
  for c in m:
992
1134
  model.add_chain(c)
993
- st.add_model(model)
994
- return st
1135
+ st2.add_model(model)
1136
+ return st2
995
1137
  # merge_models()
996
1138
 
997
- def decide_mtz_labels(mtz, find_free=True):
1139
+ def decide_mtz_labels(mtz, find_free=True, require=None):
1140
+ # F is preferred for now by default
1141
+ obs_types = ("F", "J", "G", "K")
1142
+ if require:
1143
+ assert set(require).issubset(obs_types)
1144
+ else:
1145
+ require = obs_types
998
1146
  dlabs = utils.hkl.mtz_find_data_columns(mtz)
999
1147
  logger.writeln("Finding possible options from MTZ:")
1000
1148
  for typ in dlabs:
1001
1149
  for labs in dlabs[typ]:
1002
1150
  logger.writeln(" --labin '{}'".format(",".join(labs)))
1003
- if dlabs["F"]: # F is preferred for now
1004
- labin = dlabs["F"][0]
1005
- elif dlabs["J"]:
1006
- labin = dlabs["J"][0]
1007
- elif dlabs["G"]:
1008
- labin = dlabs["G"][0]
1009
- elif dlabs["K"]:
1010
- labin = dlabs["K"][0]
1151
+ for typ in require:
1152
+ if dlabs[typ]:
1153
+ labin = dlabs[typ][0]
1154
+ break
1011
1155
  else:
1012
1156
  raise RuntimeError("Data not found from mtz")
1013
1157
  if find_free:
@@ -1019,7 +1163,8 @@ def decide_mtz_labels(mtz, find_free=True):
1019
1163
  # decide_mtz_labels()
1020
1164
 
1021
1165
  def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=None,
1022
- n_per_bin=None, use="all", max_bins=None, cif_index=0, keep_charges=False):
1166
+ n_per_bin=None, use="all", max_bins=None, cif_index=0, keep_charges=False,
1167
+ allow_unusual_occupancies=False, space_group=None):
1023
1168
  if labin: assert 1 < len(labin) < 6
1024
1169
  assert use in ("all", "work", "test")
1025
1170
  assert n_bins or n_per_bin #if n_bins not set, n_per_bin should be given
@@ -1042,6 +1187,9 @@ def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=
1042
1187
  st, mtz = utils.fileio.read_small_molecule_files([hklin, xyzins[0]])
1043
1188
  sts = [st]
1044
1189
 
1190
+ for st in sts:
1191
+ utils.model.check_occupancies(st, raise_error=not allow_unusual_occupancies)
1192
+
1045
1193
  if not labin:
1046
1194
  labin = decide_mtz_labels(mtz)
1047
1195
  col_types = {x.label:x.type for x in mtz.columns}
@@ -1067,6 +1215,12 @@ def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=
1067
1215
  if hkldata.df.empty:
1068
1216
  raise RuntimeError("No data in hkl data")
1069
1217
 
1218
+ if space_group is None:
1219
+ sg_use = None
1220
+ else:
1221
+ sg_use = gemmi.SpaceGroup(space_group)
1222
+ logger.writeln(f"Space group overridden by user. Using {sg_use.xhm()}")
1223
+
1070
1224
  if sts:
1071
1225
  assert source in ["electron", "xray", "neutron"]
1072
1226
  for st in sts:
@@ -1079,39 +1233,43 @@ def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=
1079
1233
  for st in sts: st.cell = hkldata.cell # mtz cell is used in any case
1080
1234
 
1081
1235
  sg_st = sts[0].find_spacegroup() # may be None
1082
- sg_use = hkldata.sg
1083
- if hkldata.sg != sg_st:
1084
- if st.cell.is_crystal() and sg_st and sg_st.laue_str() != hkldata.sg.laue_str():
1085
- raise RuntimeError("Crystal symmetry mismatch between model and data")
1086
- logger.writeln("Warning: space group mismatch between model and mtz")
1087
- if sg_st and sg_st.laue_str() == hkldata.sg.laue_str():
1088
- logger.writeln(" using space group from model")
1089
- sg_use = sg_st
1090
- else:
1091
- logger.writeln(" using space group from mtz")
1092
- logger.writeln("")
1093
-
1236
+ if sg_use is None:
1237
+ sg_use = hkldata.sg
1238
+ if hkldata.sg != sg_st:
1239
+ if st.cell.is_crystal() and sg_st and sg_st.laue_str() != hkldata.sg.laue_str():
1240
+ raise RuntimeError("Crystal symmetry mismatch between model and data")
1241
+ logger.writeln("Warning: space group mismatch between model and mtz")
1242
+ if sg_st and sg_st.laue_str() == hkldata.sg.laue_str():
1243
+ logger.writeln(" using space group from model")
1244
+ sg_use = sg_st
1245
+ else:
1246
+ logger.writeln(" using space group from mtz")
1247
+ logger.writeln("")
1248
+
1094
1249
  for st in sts:
1095
1250
  st.spacegroup_hm = sg_use.xhm()
1096
1251
  st.setup_cell_images()
1097
- hkldata.sg = sg_use
1098
1252
 
1099
1253
  if not keep_charges:
1100
1254
  utils.model.remove_charge(sts)
1101
1255
  utils.model.check_atomsf(sts, source)
1102
1256
 
1257
+ if sg_use is not None:
1258
+ hkldata.sg = sg_use
1103
1259
  if newlabels[0] == "FP":
1104
1260
  hkldata.remove_nonpositive(newlabels[0])
1105
1261
  hkldata.remove_nonpositive(newlabels[1])
1106
1262
  hkldata.switch_to_asu()
1107
1263
  hkldata.remove_systematic_absences()
1108
1264
  #hkldata.df = hkldata.df.astype({name: 'float64' for name in ["I","SIGI","FP","SIGFP"] if name in hkldata.df})
1109
-
1265
+ d_min_data = hkldata.d_min_max(newlabels)[0]
1266
+ if d_min is None and hkldata.d_min_max()[0] != d_min_data:
1267
+ d_min = d_min_data
1268
+ logger.writeln(f"Changing resolution to {d_min:.3f} A")
1110
1269
  if (d_min, d_max).count(None) != 2:
1111
1270
  hkldata = hkldata.copy(d_min=d_min, d_max=d_max)
1112
1271
  if hkldata.df.empty:
1113
1272
  raise RuntimeError("No data left in hkl data")
1114
- d_min, d_max = hkldata.d_min_max()
1115
1273
 
1116
1274
  hkldata.complete()
1117
1275
  hkldata.sort_by_resolution()
@@ -1136,13 +1294,7 @@ def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=
1136
1294
  hkldata.setup_binning(n_bins=n_bins)
1137
1295
  logger.writeln("Data completeness: {:.2f}%".format(hkldata.completeness()*100.))
1138
1296
 
1139
- fc_labs = []
1140
- for i, st in enumerate(sts):
1141
- lab = "FC{}".format(i)
1142
- hkldata.df[lab] = utils.model.calc_fc_fft(st, d_min-1e-6,
1143
- source=source, mott_bethe=(source=="electron"),
1144
- miller_array=hkldata.miller_array())
1145
- fc_labs.append(lab)
1297
+ fc_labs = ["FC{}".format(i) for i, _ in enumerate(sts)]
1146
1298
 
1147
1299
  # Create a centric selection table for faster look up
1148
1300
  centric_and_selections = {}
@@ -1195,10 +1347,34 @@ def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=
1195
1347
  return hkldata, sts, fc_labs, centric_and_selections, free
1196
1348
  # process_input()
1197
1349
 
1350
+ def update_fc(st_list, fc_labs, d_min, monlib, source, mott_bethe, hkldata=None, twin_data=None):
1351
+ #assert (hkldata, twin_data).count(None) == 1
1352
+ # hkldata not updated when twin_data is given
1353
+ for i, st in enumerate(st_list):
1354
+ if st.ncs:
1355
+ st = st.clone()
1356
+ st.expand_ncs(gemmi.HowToNameCopiedChain.Dup, merge_dist=0)
1357
+ if twin_data:
1358
+ hkl = twin_data.asu
1359
+ else:
1360
+ hkl = hkldata.miller_array()
1361
+ fc = utils.model.calc_fc_fft(st, d_min - 1e-6,
1362
+ monlib=monlib,
1363
+ source=source,
1364
+ mott_bethe=mott_bethe,
1365
+ miller_array=hkl)
1366
+ if twin_data:
1367
+ twin_data.f_calc[:,i] = fc
1368
+ else:
1369
+ hkldata.df[fc_labs[i]] = fc
1370
+ if not twin_data:
1371
+ hkldata.df["FC"] = hkldata.df[fc_labs].sum(axis=1)
1372
+ # update_fc()
1373
+
1198
1374
  def calc_Fmask(st, d_min, miller_array):
1199
1375
  logger.writeln("Calculating solvent contribution..")
1200
1376
  grid = gemmi.FloatGrid()
1201
- grid.setup_from(st, spacing=min(0.6, d_min / 2 - 1e-9))
1377
+ grid.setup_from(st, spacing=min(0.6, (d_min-1e-6) / 2 - 1e-9))
1202
1378
  masker = gemmi.SolventMasker(gemmi.AtomicRadiiSet.Refmac)
1203
1379
  masker.put_mask_on_float_grid(grid, st[0])
1204
1380
  fmask_gr = gemmi.transform_map_to_f_phi(grid)
@@ -1206,29 +1382,44 @@ def calc_Fmask(st, d_min, miller_array):
1206
1382
  return Fmask
1207
1383
  # calc_Fmask()
1208
1384
 
1209
- def bulk_solvent_and_lsq_scales(hkldata, sts, fc_labs, use_solvent=True, use_int=False, mask=None, func_type="log_cosh"):
1210
- fc_list = [hkldata.df[fc_labs].sum(axis=1).to_numpy()]
1385
+ def bulk_solvent_and_lsq_scales(hkldata, sts, fc_labs, use_solvent=True, use_int=False, mask=None, func_type="log_cosh", twin_data=None):
1386
+ # fc_labs must have solvent part at the end
1387
+ miller_array = twin_data.asu if twin_data else hkldata.miller_array()
1388
+ d_min = twin_data.d_min(sts[0].cell) if twin_data else hkldata.d_min_max()[0]
1211
1389
  if use_solvent:
1212
1390
  if mask is None:
1213
- Fmask = calc_Fmask(merge_models(sts), hkldata.d_min_max()[0] - 1e-6, hkldata.miller_array())
1391
+ Fmask = calc_Fmask(merge_models(sts), d_min, miller_array)
1214
1392
  else:
1215
1393
  fmask_gr = gemmi.transform_map_to_f_phi(mask)
1216
- Fmask = fmask_gr.get_value_by_hkl(hkldata.miller_array())
1217
- fc_list.append(Fmask)
1394
+ Fmask = fmask_gr.get_value_by_hkl(miller_array)
1395
+ if twin_data:
1396
+ fc_sum = twin_data.f_calc[:,:-1].sum(axis=1)
1397
+ else:
1398
+ fc_sum = hkldata.df[fc_labs[:-1]].sum(axis=1).to_numpy()
1399
+ fc_list = [fc_sum, Fmask]
1400
+ else:
1401
+ if twin_data:
1402
+ fc_list = [twin_data.f_calc.sum(axis=1)]
1403
+ else:
1404
+ fc_list = [hkldata.df[fc_labs].sum(axis=1).to_numpy()]
1218
1405
 
1219
1406
  scaling = LsqScale(func_type=func_type)
1220
- scaling.set_data(hkldata, fc_list, use_int, sigma_cutoff=0)
1407
+ scaling.set_data(hkldata, fc_list, use_int, sigma_cutoff=0, twin_data=twin_data)
1221
1408
  scaling.scale()
1222
1409
  b_iso = scaling.b_iso
1223
- k_iso = hkldata.debye_waller_factors(b_iso=b_iso)
1224
1410
  k_aniso = hkldata.debye_waller_factors(b_cart=scaling.b_aniso)
1225
1411
  hkldata.df["k_aniso"] = k_aniso # we need it later when calculating stats
1226
1412
 
1227
1413
  if use_solvent:
1228
- fc_labs.append("Fbulk")
1229
- solvent_scale = scaling.get_solvent_scale(scaling.k_sol, scaling.b_sol,
1230
- 1. / hkldata.d_spacings().to_numpy()**2)
1231
- hkldata.df[fc_labs[-1]] = Fmask * solvent_scale
1414
+ if twin_data:
1415
+ s2 = numpy.asarray(twin_data.s2_array)
1416
+ else:
1417
+ s2 = 1. / hkldata.d_spacings().to_numpy()**2
1418
+ Fbulk = Fmask * scaling.get_solvent_scale(scaling.k_sol, scaling.b_sol, s2)
1419
+ if twin_data:
1420
+ twin_data.f_calc[:,-1] = Fbulk
1421
+ else:
1422
+ hkldata.df[fc_labs[-1]] = Fbulk
1232
1423
 
1233
1424
  # Apply scales
1234
1425
  if use_int:
@@ -1238,9 +1429,13 @@ def bulk_solvent_and_lsq_scales(hkldata, sts, fc_labs, use_solvent=True, use_int
1238
1429
  else:
1239
1430
  o_labs = ["FP", "SIGFP", "F(+)","SIGF(+)", "F(-)", "SIGF(-)"]
1240
1431
  hkldata.df[hkldata.df.columns.intersection(o_labs)] /= scaling.k_overall
1241
- for lab in fc_labs: hkldata.df[lab] *= k_iso
1242
- # total Fc
1243
- hkldata.df["FC"] = hkldata.df[fc_labs].sum(axis=1)
1432
+ if twin_data:
1433
+ twin_data.f_calc[:] *= twin_data.debye_waller_factors(b_iso=b_iso)[:,None]
1434
+ else:
1435
+ k_iso = hkldata.debye_waller_factors(b_iso=b_iso)
1436
+ for lab in fc_labs: hkldata.df[lab] *= k_iso
1437
+ # total Fc
1438
+ hkldata.df["FC"] = hkldata.df[fc_labs].sum(axis=1)
1244
1439
  return scaling
1245
1440
  # bulk_solvent_and_lsq_scales()
1246
1441
 
@@ -1353,10 +1548,21 @@ def main(args):
1353
1548
  n_per_bin=n_per_bin,
1354
1549
  use=args.use,
1355
1550
  max_bins=30,
1356
- keep_charges=args.keep_charges)
1551
+ keep_charges=args.keep_charges,
1552
+ space_group=args.spacegroup)
1357
1553
  except RuntimeError as e:
1358
1554
  raise SystemExit("Error: {}".format(e))
1359
1555
 
1556
+ if args.twin:
1557
+ twin_data = find_twin_domains_from_data(hkldata)
1558
+ else:
1559
+ twin_data = None
1560
+ if twin_data:
1561
+ twin_data.setup_f_calc(len(sts) + (0 if args.no_solvent else 1))
1562
+
1563
+ update_fc(sts, fc_labs, d_min=hkldata.d_min_max()[0], monlib=None,
1564
+ source=args.source, mott_bethe=(args.source=="electron"),
1565
+ hkldata=hkldata, twin_data=twin_data)
1360
1566
  is_int = "I" in hkldata.df
1361
1567
 
1362
1568
  if args.mask:
@@ -1366,46 +1572,70 @@ def main(args):
1366
1572
 
1367
1573
  # Overall scaling & bulk solvent
1368
1574
  # FP/SIGFP will be scaled. Total FC will be added.
1575
+ if not args.no_solvent:
1576
+ fc_labs.append("Fbulk")
1369
1577
  lsq = bulk_solvent_and_lsq_scales(hkldata, sts, fc_labs, use_solvent=not args.no_solvent,
1370
- use_int=is_int, mask=mask)
1578
+ use_int=is_int, mask=mask, twin_data=twin_data)
1371
1579
  b_aniso = lsq.b_aniso
1372
1580
  # stats
1373
- stats, overall = calc_r_and_cc(hkldata, centric_and_selections)
1581
+ stats, overall = calc_r_and_cc(hkldata, centric_and_selections, twin_data)
1374
1582
  for lab in "R", "CC":
1375
1583
  logger.writeln(" ".join("{} = {:.4f}".format(x, overall[x]) for x in overall if x.startswith(lab)))
1584
+ if is_int:
1585
+ logger.writeln("R1 is calculated for reflections with I/sigma>2.")
1586
+
1587
+ if twin_data:
1588
+ estimate_twin_fractions_from_model(twin_data, hkldata)
1589
+ #del hkldata.df["FC"]
1590
+ #del hkldata.df["Fbulk"]
1591
+ # Need to redo scaling?
1592
+ lsq = bulk_solvent_and_lsq_scales(hkldata, sts, fc_labs, use_solvent=not args.no_solvent,
1593
+ use_int=is_int, mask=mask, twin_data=twin_data)
1594
+ b_aniso = lsq.b_aniso
1595
+ stats, overall = calc_r_and_cc(hkldata, centric_and_selections, twin_data)
1596
+ for lab in "R", "CC":
1597
+ logger.writeln(" ".join("{} = {:.4f}".format(x, overall[x]) for x in overall if x.startswith(lab)))
1376
1598
 
1377
1599
  # Estimate ML parameters
1378
1600
  D_labs = ["D{}".format(i) for i in range(len(fc_labs))]
1379
1601
 
1380
1602
  if args.use_cc:
1381
1603
  assert not is_int
1604
+ assert not args.twin
1382
1605
  logger.writeln("Estimating sigma-A parameters from CC..")
1383
1606
  determine_mlf_params_from_cc(hkldata, fc_labs, D_labs, centric_and_selections, args.use)
1384
1607
  else:
1385
- b_aniso = determine_ml_params(hkldata, is_int, fc_labs, D_labs, b_aniso, centric_and_selections, args.D_trans, args.S_trans, args.use)
1386
- if is_int:
1387
- calculate_maps_int(hkldata, b_aniso, fc_labs, D_labs, centric_and_selections,
1388
- use={"all": "all", "work": "work", "test": "work"}[args.use])
1608
+ b_aniso = determine_ml_params(hkldata, is_int, fc_labs, D_labs, b_aniso, centric_and_selections, args.D_trans, args.S_trans, args.use,
1609
+ twin_data=twin_data)
1610
+ use = {"all": "all", "work": "work", "test": "work"}[args.use]
1611
+ if twin_data:
1612
+ # replace hkldata
1613
+ hkldata = calculate_maps_twin(hkldata, b_aniso, fc_labs, D_labs, twin_data, centric_and_selections, use)
1614
+ elif is_int:
1615
+ calculate_maps_int(hkldata, b_aniso, fc_labs, D_labs, centric_and_selections, use)
1389
1616
  else:
1390
1617
  log_out = "{}.log".format(args.output_prefix)
1391
- calculate_maps(hkldata, b_aniso, centric_and_selections, fc_labs, D_labs, log_out,
1392
- use={"all": "all", "work": "work", "test": "work"}[args.use])
1618
+ calculate_maps(hkldata, b_aniso, centric_and_selections, fc_labs, D_labs, log_out, use)
1393
1619
 
1394
1620
  # Write mtz file
1395
- if is_int:
1396
- labs = ["I", "SIGI", "FOM"]
1621
+ if twin_data:
1622
+ labs = ["F_est", "F_exp"]
1623
+ elif is_int:
1624
+ labs = ["I", "SIGI"]
1397
1625
  else:
1398
- labs = ["FP", "SIGFP", "FOM"]
1399
- labs.extend(["FWT", "DELFWT", "FC", "DFC"])
1626
+ labs = ["FP", "SIGFP"]
1627
+ labs.extend(["FOM", "FWT", "DELFWT", "FC", "DFC"])
1400
1628
  if "FAN" in hkldata.df:
1401
1629
  labs.append("FAN")
1402
1630
  if not args.no_solvent:
1403
1631
  labs.append("Fbulk")
1404
1632
  if "FREE" in hkldata.df:
1405
1633
  labs.append("FREE")
1634
+ if "F_true_est" in hkldata.df:
1635
+ labs.append("F_true_est")
1406
1636
  labs += D_labs + ["S"]
1407
1637
  mtz_out = args.output_prefix+".mtz"
1408
- hkldata.write_mtz(mtz_out, labs=labs, types={"FOM": "W", "FP":"F", "SIGFP":"Q"})
1638
+ hkldata.write_mtz(mtz_out, labs=labs, types={"FOM": "W", "FP":"F", "SIGFP":"Q", "F_est": "F", "F_exp": "F"})
1409
1639
  return hkldata
1410
1640
  # main()
1411
1641
  if __name__ == "__main__":