servalcat 0.4.99__cp38-cp38-win_amd64.whl → 0.4.100__cp38-cp38-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of servalcat might be problematic. Click here for more details.

servalcat/__init__.py CHANGED
@@ -6,5 +6,5 @@ This software is released under the
6
6
  Mozilla Public License, version 2.0; see LICENSE.
7
7
  """
8
8
 
9
- __version__ = '0.4.99'
10
- __date__ = '2024-12-04'
9
+ __version__ = '0.4.100'
10
+ __date__ = '2025-01-22'
Binary file
@@ -271,7 +271,6 @@ def main(args):
271
271
  st_expanded = refiner.st.clone()
272
272
  if not all(op.given for op in st.ncs):
273
273
  utils.model.expand_ncs(st_expanded)
274
- utils.fileio.write_model(st_expanded, args.output_prefix+"_expanded", pdb=True, cif=True, hout=args.hout)
275
274
 
276
275
  # Calc FSC
277
276
  if args.hklin: # cannot update a mask
@@ -294,6 +293,10 @@ def main(args):
294
293
  update_meta(refiner.st, stats_for_meta, ll)
295
294
  refiner.st.name = args.output_prefix
296
295
  utils.fileio.write_model(refiner.st, args.output_prefix, pdb=True, cif=True, hout=args.hout)
296
+ if not all(op.given for op in st.ncs): # to apply updated metadata
297
+ st_expanded = refiner.st.clone()
298
+ utils.model.expand_ncs(st_expanded)
299
+ utils.fileio.write_model(st_expanded, args.output_prefix+"_expanded", pdb=True, cif=True, hout=args.hout)
297
300
  if args.hklin:
298
301
  return
299
302
  # Calc Fo-Fc (and updated) maps
@@ -129,19 +129,6 @@ def main(args):
129
129
  hklin = utils.fileio.read_mmhkl(hklin)
130
130
  labin = decide_mtz_labels(hklin)
131
131
 
132
- if labin and len(labin) == 3: # with test flags
133
- use_in_target = "work"
134
- if args.use_work_in_est:
135
- use_in_est = "work"
136
- n_per_bin = 100
137
- else:
138
- use_in_est = "test"
139
- n_per_bin = 50
140
- else:
141
- use_in_est = "all"
142
- use_in_target = "all"
143
- n_per_bin = 100
144
-
145
132
  try:
146
133
  hkldata, sts, fc_labs, centric_and_selections, args.free = process_input(hklin=hklin,
147
134
  labin=labin,
@@ -151,14 +138,23 @@ def main(args):
151
138
  source=args.source,
152
139
  d_max=args.d_max,
153
140
  d_min=args.d_min,
154
- n_per_bin=n_per_bin,
155
- use=use_in_est,
141
+ use="work" if args.use_work_in_est else "test",
156
142
  max_bins=30,
157
143
  keep_charges=args.keep_charges,
158
144
  allow_unusual_occupancies=args.allow_unusual_occupancies)
159
145
  except RuntimeError as e:
160
146
  raise SystemExit("Error: {}".format(e))
161
147
 
148
+ if "FREE" in hkldata.df:
149
+ use_in_target = "work"
150
+ if args.use_work_in_est:
151
+ use_in_est = "work"
152
+ else:
153
+ use_in_est = "test"
154
+ else:
155
+ use_in_est = "all"
156
+ use_in_target = "all"
157
+
162
158
  is_int = "I" in hkldata.df
163
159
  st = sts[0]
164
160
  utils.model.fix_deuterium_residues(st)
servalcat/refine/xtal.py CHANGED
@@ -45,7 +45,7 @@ class LL_Xtal:
45
45
  self.ll = None
46
46
  self.scaling = sigmaa.LsqScale()
47
47
  if twin:
48
- self.twin_data = find_twin_domains_from_data(self.hkldata)
48
+ self.twin_data, _ = find_twin_domains_from_data(self.hkldata)
49
49
  else:
50
50
  self.twin_data = None
51
51
  if self.twin_data:
@@ -94,7 +94,7 @@ class LL_Xtal:
94
94
  fc_sum = self.hkldata.df[self.fc_labs[:-1]].sum(axis=1).to_numpy()
95
95
  fc_list = [fc_sum, Fmask]
96
96
  else:
97
- if twin_data:
97
+ if self.twin_data:
98
98
  fc_list = [self.twin_data.f_calc.sum(axis=1)]
99
99
  else:
100
100
  fc_list = [self.hkldata.df[self.fc_labs].sum(axis=1).to_numpy()]
@@ -235,11 +235,11 @@ class LL_Xtal:
235
235
  m = numpy.tanh(X)
236
236
  g = (Fc_abs - m * Fo) / Sigma * Ds[:,0]
237
237
  dll_dab[cidxes] = g * expip
238
- d2ll_dab2[cidxes] = (1. / Sigma - (Fo / (Sigma * numpy.cosh(X)))**2) * Ds[:,0]**2
238
+ d2ll_dab2[cidxes] = (1. / Sigma - (Fo / Sigma)**2 * (1. - m**2)) * Ds[:,0]**2
239
239
  dll_dab *= self.hkldata.debye_waller_factors(b_iso=-blur)
240
240
 
241
241
  if self.mott_bethe:
242
- d2 = 1 / self.twin_data.s2_array if self.twin_data else self.hkldata.d_spacings()**2
242
+ d2 = numpy.reciprocal(self.twin_data.s2_array) if self.twin_data else self.hkldata.d_spacings()**2
243
243
  dll_dab *= d2 * gemmi.mott_bethe_const()
244
244
  d2ll_dab2 *= gemmi.mott_bethe_const()**2
245
245
 
servalcat/utils/hkl.py CHANGED
@@ -493,6 +493,31 @@ class HklData:
493
493
  self.df = self.df[~sel]
494
494
  # remove_nonpositive()
495
495
 
496
+ def mask_invalid_obs_values(self, labels):
497
+ assert 1 < len(labels) < 6
498
+ assert labels[1].startswith("SIG")
499
+ def do_mask(label, target_labels):
500
+ sel = self.df[label] <= 0
501
+ n_bad = sel.sum()
502
+ if n_bad > 0:
503
+ logger.writeln("Removing {} reflections with {}<=0".format(n_bad, label))
504
+ self.df.loc[sel, target_labels] = numpy.nan
505
+ # If any element within target_labels is non-finite, mask all elements
506
+ self.df.loc[(~numpy.isfinite(self.df[target_labels])).any(axis=1), target_labels] = numpy.nan
507
+
508
+ if len(labels) < 4: # F/SIGF or I/SIGI
509
+ if labels[0].startswith("F"):
510
+ do_mask(labels[0], labels[:2]) # bad F
511
+ do_mask(labels[1], labels[:2]) # bad sigma
512
+ else: # I(+)/SIGI(+)/I(-)/SIGI(-) or F...
513
+ assert labels[3].startswith("SIG")
514
+ if labels[0].startswith("F"):
515
+ do_mask(labels[0], labels[:2]) # bad F+
516
+ do_mask(labels[2], labels[2:4]) # bad F-
517
+ do_mask(labels[1], labels[:2]) # bad sigma+
518
+ do_mask(labels[3], labels[2:4]) # bad sigma-
519
+ # mask_invalid_obs_values()
520
+
496
521
  def remove_systematic_absences(self):
497
522
  is_absent = self.sg.operations().systematic_absences(self.miller_array())
498
523
  n_absent = numpy.sum(is_absent)
@@ -501,12 +526,22 @@ class HklData:
501
526
  self.df = self.df[~is_absent]
502
527
  # remove_systematic_absences()
503
528
 
504
- def merge_anomalous(self, labs, newlabs):
529
+ def merge_anomalous(self, labs, newlabs, method="weighted"):
530
+ assert method in ("weighted", "simple")
505
531
  assert len(labs) == 4 # i+,sigi+,i-,sigi- for example
506
532
  assert len(newlabs) == 2
507
- # skipna=True is default, so missing value is handled nicely.
508
- self.df[newlabs[0]] = self.df[[labs[0], labs[2]]].mean(axis=1)
509
- self.df[newlabs[1]] = self.df[[labs[1], labs[3]]].pow(2).mean(axis=1).pow(0.5)
533
+ if method == "simple":
534
+ # skipna=True is default, so missing value is handled nicely.
535
+ self.df[newlabs[0]] = self.df[[labs[0], labs[2]]].mean(axis=1)
536
+ self.df[newlabs[1]] = self.df[[labs[1], labs[3]]].pow(2).mean(axis=1).pow(0.5)
537
+ else:
538
+ obs = self.df[[labs[0], labs[2]]].to_numpy()
539
+ weights = 1. / self.df[[labs[1], labs[3]]].to_numpy()**2
540
+ sum_w = numpy.nansum(weights, axis=1)
541
+ sum_w[sum_w == 0] = numpy.nan # mask when both are nan
542
+ self.df[newlabs[0]] = numpy.nansum(obs * weights, axis=1) / sum_w
543
+ self.df[newlabs[1]] = numpy.sqrt(1. / sum_w)
544
+ # merge_anomalous()
510
545
 
511
546
  def as_asu_data(self, label=None, data=None, label_sigma=None):
512
547
  if label is None: assert data is not None
servalcat/utils/logger.py CHANGED
@@ -37,7 +37,7 @@ class Logger(object):
37
37
  def write(self, l, end="", flush=True, fs=None, print_fs=sys.stdout):
38
38
  if self.stopped: return
39
39
  if self.prefix:
40
- l = "\n".join(self.prefix + x for x in l.splitlines(keepends=True))
40
+ l = "".join(self.prefix + x for x in l.splitlines(keepends=True))
41
41
  print(l, end=end, file=print_fs, flush=flush)
42
42
  for f in (self.ofs, fs):
43
43
  if f is not None:
servalcat/utils/model.py CHANGED
@@ -73,11 +73,12 @@ def remove_charge(sts):
73
73
  def check_atomsf(sts, source, mott_bethe=True):
74
74
  assert source in ("xray", "electron", "neutron")
75
75
  if source != "electron": mott_bethe = False
76
- logger.writeln("Atomic scattering factors for {}".format("electron (Mott-Bethe)" if mott_bethe else source))
76
+ logger.writeln("Atomic scattering factors for {}".format("xray (use Mott-Bethe to convert to electrons)" if mott_bethe else source))
77
77
  if source != "xray" and not mott_bethe:
78
78
  logger.writeln(" Note that charges will be ignored")
79
79
  el_charges = {(cra.atom.element, cra.atom.charge) for st in sts for cra in st[0].all()}
80
80
  elems = {x[0] for x in el_charges}
81
+ tmp = {}
81
82
  if source == "xray" or mott_bethe:
82
83
  shown = set()
83
84
  for el, charge in sorted(el_charges, key=lambda x: (x[0].atomic_number, x[1])):
@@ -88,12 +89,16 @@ def check_atomsf(sts, source, mott_bethe=True):
88
89
  charge = 0
89
90
  if (el, charge) in shown: continue
90
91
  label = el.name if charge == 0 else "{}{:+}".format(el.name, charge)
91
- logger.writeln(" {} {}".format(label, tuple(sf.get_coefs())))
92
92
  shown.add((el, charge))
93
+ tmp[label] = {**{f"{k}{i+1}": x for k in ("a", "b") for i, x in enumerate(getattr(sf, k))}, "c": sf.c}
93
94
  else:
94
95
  for el in sorted(elems, key=lambda x: x.atomic_number):
95
- sf = el.c4322 if source == "electron" else el.neutron92
96
- logger.writeln(" {} {}".format(el.name, tuple(sf.get_coefs())))
96
+ if source == "electron":
97
+ tmp[el.name] = {f"{k}{i+1}": x for k in ("a", "b") for i, x in enumerate(getattr(el.c4322, k))}
98
+ else:
99
+ tmp[el.name] = {"a": el.neutron92.get_coefs()[0]}
100
+ with logger.with_prefix(" "):
101
+ logger.writeln(pandas.DataFrame(tmp).T.to_string())
97
102
  logger.writeln("")
98
103
  # check_atomsf()
99
104
 
@@ -553,7 +558,9 @@ def reset_adp(model, bfactor=None, adp_mode="iso"):
553
558
  if adp_mode == "iso" or (adp_mode == "fix" and bfactor is not None):
554
559
  cra.atom.aniso = gemmi.SMat33f(0,0,0,0,0,0)
555
560
  elif adp_mode == "aniso":
556
- if not cra.atom.aniso.nonzero() or bfactor is not None:
561
+ if cra.atom.aniso.nonzero() and bfactor is None: # just in case
562
+ cra.atom.b_iso = numpy.mean(cra.atom.aniso.calculate_eigenvalues()) * u_to_b
563
+ else:
557
564
  u = cra.atom.b_iso * b_to_u
558
565
  cra.atom.aniso = gemmi.SMat33f(u, u, u, 0, 0, 0)
559
566
  # reset_adp()
servalcat/xtal/sigmaa.py CHANGED
@@ -1056,10 +1056,7 @@ def calculate_maps_int(hkldata, b_aniso, fc_labs, D_labs, centric_and_selections
1056
1056
  S = hkldata.df["S"].to_numpy()[cidxes]
1057
1057
  f, m_proxy = expected_F_from_int(Io[cidxes], sigIo[cidxes], k_ani[cidxes], DFc[cidxes], eps[cidxes], c, S)
1058
1058
  exp_ip = numpy.exp(numpy.angle(DFc[cidxes])*1j)
1059
- if c == 0:
1060
- hkldata.df.loc[cidxes, "FWT"] = 2 * f * exp_ip - DFc[cidxes]
1061
- else:
1062
- hkldata.df.loc[cidxes, "FWT"] = f * exp_ip
1059
+ hkldata.df.loc[cidxes, "FWT"] = 2 * f * exp_ip - DFc[cidxes]
1063
1060
  hkldata.df.loc[cidxes, "DELFWT"] = f * exp_ip - DFc[cidxes]
1064
1061
  hkldata.df.loc[cidxes, "FOM"] = m_proxy
1065
1062
  if has_ano:
@@ -1167,7 +1164,6 @@ def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=
1167
1164
  allow_unusual_occupancies=False, space_group=None):
1168
1165
  if labin: assert 1 < len(labin) < 6
1169
1166
  assert use in ("all", "work", "test")
1170
- assert n_bins or n_per_bin #if n_bins not set, n_per_bin should be given
1171
1167
 
1172
1168
  if len(xyzins) > 0 and type(xyzins[0]) is gemmi.Structure:
1173
1169
  sts = xyzins
@@ -1205,6 +1201,7 @@ def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=
1205
1201
  logger.writeln("Observation type: {}".format(name))
1206
1202
  if len(newlabels) < len(labin): newlabels.append("FREE")
1207
1203
  hkldata = utils.hkl.hkldata_from_mtz(mtz, labin, newlabels=newlabels, require_types=require_types)
1204
+ hkldata.mask_invalid_obs_values(newlabels)
1208
1205
  if newlabels[0] == "F(+)":
1209
1206
  hkldata.merge_anomalous(newlabels[:4], ["FP", "SIGFP"])
1210
1207
  newlabels = ["FP", "SIGFP"] + newlabels[4:]
@@ -1256,9 +1253,6 @@ def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=
1256
1253
 
1257
1254
  if sg_use is not None:
1258
1255
  hkldata.sg = sg_use
1259
- if newlabels[0] == "FP":
1260
- hkldata.remove_nonpositive(newlabels[0])
1261
- hkldata.remove_nonpositive(newlabels[1])
1262
1256
  hkldata.switch_to_asu()
1263
1257
  hkldata.remove_systematic_absences()
1264
1258
  #hkldata.df = hkldata.df.astype({name: 'float64' for name in ["I","SIGI","FP","SIGFP"] if name in hkldata.df})
@@ -1280,6 +1274,17 @@ def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=
1280
1274
  free = hkldata.guess_free_number(newlabels[0])
1281
1275
 
1282
1276
  if n_bins is None:
1277
+ if n_per_bin is None:
1278
+ if use == "all" or "FREE" not in hkldata.df:
1279
+ n_per_bin = 100
1280
+ use = "all"
1281
+ elif use == "work":
1282
+ n_per_bin = 100
1283
+ elif use == "test":
1284
+ n_per_bin = 50
1285
+ else:
1286
+ raise RuntimeError(f"should not happen: {use=}")
1287
+
1283
1288
  sel = hkldata.df[newlabels[0]].notna()
1284
1289
  if use == "work":
1285
1290
  sel &= hkldata.df.FREE != free
@@ -1473,13 +1478,11 @@ def calculate_maps(hkldata, b_aniso, centric_and_selections, fc_labs, D_labs, lo
1473
1478
  Sigma = 2 * SigFo**2 + epsilon * S
1474
1479
  X = 2 * Fo * DFc_abs / Sigma
1475
1480
  m = gemmi.bessel_i1_over_i0(X)
1476
- hkldata.df.loc[cidxes, "FWT"] = (2 * m * Fo - DFc_abs) * expip
1477
1481
  else:
1478
1482
  Sigma = SigFo**2 + epsilon * S
1479
1483
  X = Fo * DFc_abs / Sigma
1480
1484
  m = numpy.tanh(X)
1481
- hkldata.df.loc[cidxes, "FWT"] = (m * Fo) * expip
1482
-
1485
+ hkldata.df.loc[cidxes, "FWT"] = (2 * m * Fo - DFc_abs) * expip
1483
1486
  hkldata.df.loc[cidxes, "DELFWT"] = (m * Fo - DFc_abs) * expip
1484
1487
  hkldata.df.loc[cidxes, "FOM"] = m
1485
1488
  hkldata.df.loc[cidxes, "X"] = X
@@ -1500,7 +1503,8 @@ def calculate_maps(hkldata, b_aniso, centric_and_selections, fc_labs, D_labs, lo
1500
1503
  Fc = hkldata.df.FC.to_numpy()[idxes] * k_ani[idxes]
1501
1504
  Fo = hkldata.df.FP.to_numpy()[idxes]
1502
1505
  mean_DFc2 = numpy.nanmean(numpy.abs((Ds[idxes,:] * Fcs[idxes,:]).sum(axis=1) * k_ani[idxes])**2)
1503
- mean_log_DFcs = numpy.log(numpy.nanmean(numpy.abs(Ds[idxes,:] * Fcs[idxes,:] * k_ani[idxes,None]), axis=0)).tolist()
1506
+ with numpy.errstate(divide="ignore"):
1507
+ mean_log_DFcs = numpy.log(numpy.nanmean(numpy.abs(Ds[idxes,:] * Fcs[idxes,:] * k_ani[idxes,None]), axis=0)).tolist()
1504
1508
  mean_Ds = numpy.nanmean(Ds[idxes,:], axis=0).tolist()
1505
1509
  if sum(nrefs) > 0:
1506
1510
  r = numpy.nansum(numpy.abs(numpy.abs(Fc)-Fo)) / numpy.nansum(Fo)
@@ -1535,7 +1539,6 @@ def calculate_maps(hkldata, b_aniso, centric_and_selections, fc_labs, D_labs, lo
1535
1539
  # calculate_maps()
1536
1540
 
1537
1541
  def main(args):
1538
- n_per_bin = {"all": 500, "work": 500, "test": 50}[args.use]
1539
1542
  try:
1540
1543
  hkldata, sts, fc_labs, centric_and_selections,free = process_input(hklin=args.hklin,
1541
1544
  labin=args.labin.split(",") if args.labin else None,
@@ -1545,7 +1548,6 @@ def main(args):
1545
1548
  source=args.source,
1546
1549
  d_max=args.d_max,
1547
1550
  d_min=args.d_min,
1548
- n_per_bin=n_per_bin,
1549
1551
  use=args.use,
1550
1552
  max_bins=30,
1551
1553
  keep_charges=args.keep_charges,
@@ -1554,7 +1556,7 @@ def main(args):
1554
1556
  raise SystemExit("Error: {}".format(e))
1555
1557
 
1556
1558
  if args.twin:
1557
- twin_data = find_twin_domains_from_data(hkldata)
1559
+ twin_data, _ = find_twin_domains_from_data(hkldata)
1558
1560
  else:
1559
1561
  twin_data = None
1560
1562
  if twin_data:
servalcat/xtal/twin.py CHANGED
@@ -21,39 +21,43 @@ def find_twin_domains_from_data(hkldata, max_oblique=5, min_alpha=0.05):
21
21
  #for op in ops:
22
22
  # logger.writeln(f" {op.triplet()}")
23
23
  if not ops:
24
- return
24
+ logger.writeln("")
25
+ return None, None
25
26
  twin_data = ext.TwinData()
26
27
  twin_data.setup(hkldata.miller_array(), hkldata.df.bin, hkldata.sg, hkldata.cell, ops)
27
28
  if "I" in hkldata.df:
28
29
  Io = hkldata.df.I.to_numpy()
29
30
  else:
30
31
  Io = hkldata.df.FP.to_numpy()**2
31
- alphas = []
32
32
  ccs, nums = [], []
33
+ tmp = []
33
34
  for i_bin, bin_idxes in hkldata.binned():
34
35
  ratios = [1.]
35
36
  ccs.append([])
36
37
  nums.append([])
38
+ rs = []
37
39
  for i_op, op in enumerate(ops):
38
40
  ii = numpy.array(twin_data.pairs(i_op, i_bin))
39
41
  val = numpy.all(numpy.isfinite(Io[ii]), axis=1)
40
42
  if numpy.sum(val) == 0:
41
- cc = numpy.nan
43
+ cc = r = numpy.nan
42
44
  else:
43
45
  cc = numpy.corrcoef(Io[ii][val].T)[0,1]
44
- rr = (1 - numpy.sqrt(1 - cc**2)) / cc
45
- ratios.append(rr)
46
+ r = numpy.sum(numpy.abs(Io[ii][val, 0] - Io[ii][val, 1])) / numpy.sum(Io[ii][val])
47
+ ratio = (1 - numpy.sqrt(1 - cc**2)) / cc
48
+ ratios.append(ratio)
46
49
  ccs[-1].append(cc)
50
+ rs.append(r)
47
51
  nums[-1].append(len(val))
48
- alphas.append(numpy.array(ratios) / numpy.nansum(ratios))
49
- alphas = numpy.maximum(0, numpy.mean(alphas, axis=0))
50
- alphas /= numpy.nansum(alphas)
52
+ tmp.append(rs + ccs[-1] + nums[-1] + (numpy.array(ratios) / numpy.nansum(ratios)).tolist()[1:])
53
+ df = pandas.DataFrame(tmp, columns=[f"{n}_op{i+1}" for n in ("R", "CC", "num", "raw_est") for i in range(len(ops))])
54
+ with logger.with_prefix(" "):
55
+ logger.writeln(df.to_string(float_format="%.4f"))
51
56
  ccs = numpy.array(ccs)
52
57
  nums = numpy.array(nums)
53
58
  tmp = [{"Operator": gemmi.Op().triplet(),
54
59
  "R_twin_obs": 0,
55
- "CC_mean": 1,
56
- "Alpha_from_CC": alphas[0]}]
60
+ "CC_mean": 1}]
57
61
  for i_op, op in enumerate(ops):
58
62
  ii = numpy.array(twin_data.pairs(i_op))
59
63
  val = numpy.all(numpy.isfinite(Io[ii]), axis=1)
@@ -61,61 +65,80 @@ def find_twin_domains_from_data(hkldata, max_oblique=5, min_alpha=0.05):
61
65
  r_obs = numpy.nan
62
66
  else:
63
67
  r_obs = numpy.sum(numpy.abs(Io[ii][val, 0] - Io[ii][val, 1])) / numpy.sum(Io[ii][val])
68
+ cc = numpy.sum(nums[:,i_op] * ccs[:,i_op]) / numpy.sum(nums[:,i_op])
64
69
  tmp.append({"Operator": op.triplet(),
65
- "CC_mean": numpy.sum(nums[:,i_op] * ccs[:,i_op]) / numpy.sum(nums[:,i_op]),
70
+ "CC_mean": cc,
66
71
  "R_twin_obs": r_obs,
67
- "Alpha_from_CC": alphas[i_op+1],
68
72
  })
69
73
  df = pandas.DataFrame(tmp)
70
- logger.writeln(df.to_string(float_format="%.2f"))
74
+ df["Alpha_from_CC"] = (1 - numpy.sqrt(1 - df["CC_mean"]**2)) / df["CC_mean"]
75
+ df["Alpha_from_CC"] /= numpy.nansum(df["Alpha_from_CC"])
76
+ logger.writeln("\n Initial twin fraction estimates:")
77
+ with logger.with_prefix(" "):
78
+ logger.writeln(df.to_string(float_format="%.2f"))
71
79
 
72
- sel_idxes = [i for i, a in enumerate(alphas) if i > 0 and a > min_alpha]
73
- if not sel_idxes:
74
- logger.writeln(" No twinning detected")
75
- return
80
+ sel = df["Alpha_from_CC"].to_numpy() > min_alpha
81
+ if sel[1:].sum() == 0:
82
+ logger.writeln(" No twinning detected\n")
83
+ return None, None
76
84
 
77
- if len(sel_idxes) + 1 != len(alphas):
78
- ops = [ops[i-1] for i in sel_idxes]
79
- logger.writeln(" Twin operators after filtering small fractions")
80
- alphas = numpy.array([alphas[0]] + [alphas[i] for i in sel_idxes])
81
- alphas /= numpy.sum(alphas)
82
- df = pandas.DataFrame({"Operator": [x.triplet() for x in [gemmi.Op()]+ops],
83
- "Alpha": alphas})
84
- logger.writeln(df.to_string(float_format="%.2f"))
85
+ if not sel.all():
86
+ ops = [ops[i] for i in range(len(ops)) if sel[i+1]]
87
+ logger.writeln(f"\n Twin operators after filtering small fractions (<= {min_alpha})")
88
+ df = df[sel]
89
+ df["Alpha_from_CC"] /= numpy.nansum(df["Alpha_from_CC"])
90
+ with logger.with_prefix(" "):
91
+ logger.writeln(df.to_string(float_format="%.2f"))
85
92
  twin_data = ext.TwinData()
86
93
  twin_data.setup(hkldata.miller_array(), hkldata.df.bin, hkldata.sg, hkldata.cell, ops)
87
- twin_data.alphas = alphas
94
+ twin_data.alphas = df["Alpha_from_CC"].tolist()
88
95
  if "I" not in hkldata.df:
89
96
  logger.writeln('Generating "observed" intensities for twin refinement: Io = Fo**2, SigIo = 2*F*SigFo')
90
97
  hkldata.df["I"] = hkldata.df.FP**2
91
98
  hkldata.df["SIGI"] = 2 * hkldata.df.FP * hkldata.df.SIGFP
92
- return twin_data
99
+ logger.writeln("")
100
+ return twin_data, df
93
101
 
94
102
  # find_twin_domains_from_data()
95
103
 
96
104
  def estimate_twin_fractions_from_model(twin_data, hkldata):
97
105
  logger.writeln("Estimating twin fractions")
98
106
  Ic = numpy.abs(twin_data.f_calc.sum(axis=1))**2
99
- Ic_all = Ic[twin_data.twin_related(hkldata.sg)]
107
+ idx_all = twin_data.twin_related(hkldata.sg)
108
+ Ic_all = Ic[idx_all]
109
+ Ic_all[(idx_all < 0).any(axis=1)] = numpy.nan
100
110
  rr = twin_data.obs_related_asu()
101
111
  tmp = []
112
+ P_list, cc_oc_list, weight_list = [], [], []
113
+ n_ops = len(twin_data.ops) + 1
114
+ tidxes = numpy.triu_indices(n_ops, 1)
102
115
  for i_bin, bin_idxes in hkldata.binned():
103
- cc_o_c = []
104
116
  i_tmp = Ic_all[numpy.asarray(twin_data.bin)==i_bin,:]
117
+ i_tmp = i_tmp[numpy.isfinite(i_tmp).all(axis=1)]
105
118
  P = numpy.corrcoef(i_tmp.T)
106
119
  iobs = hkldata.df.I.to_numpy()[bin_idxes]
107
120
  ic_bin = Ic[rr[bin_idxes,:]]
108
- val = numpy.isfinite(iobs) & numpy.isfinite(ic_bin).all(axis=1)
121
+ val = numpy.isfinite(iobs) & numpy.isfinite(ic_bin).all(axis=1) & numpy.all(rr[bin_idxes,:]>=0, axis=1)
109
122
  iobs, ic_bin = iobs[val], ic_bin[val,:]
110
- cc_o_c = [numpy.corrcoef(iobs, ic_bin[:,i])[0,1] for i in range(len(twin_data.ops)+1)]
111
- frac_est = numpy.dot(numpy.linalg.pinv(P), cc_o_c)
112
- tmp.append(frac_est.tolist())
113
-
114
- df = pandas.DataFrame(tmp)
115
- df.iloc[:,:] /= df.sum(axis=1).to_numpy()[:,None]
116
- mean_alphas = numpy.maximum(0, df.mean())
117
- mean_alphas /= numpy.sum(mean_alphas)
118
- logger.write(" Estimated fractions from data-model correlations: ")
119
- logger.writeln(" ".join("%.2f"%x for x in mean_alphas))
120
- twin_data.alphas = mean_alphas
123
+ cc_oc = [numpy.corrcoef(iobs, ic_bin[:,i])[0,1] for i in range(n_ops)]
124
+ P_list.append(P)
125
+ cc_oc_list.append(cc_oc)
126
+ weight_list.append(numpy.sum(val))
127
+ frac_est = numpy.dot(numpy.linalg.pinv(P), cc_oc)
128
+ frac_est /= frac_est.sum()
129
+ tmp.append(P[tidxes].tolist() + cc_oc + [weight_list[-1]] + frac_est.tolist())
121
130
 
131
+ P = numpy.average(P_list, axis=0, weights=weight_list)
132
+ cc_oc = numpy.average(cc_oc_list, axis=0, weights=weight_list)
133
+ frac_est = numpy.dot(numpy.linalg.pinv(P), cc_oc)
134
+ frac_est = numpy.maximum(0, frac_est)
135
+ frac_est /= frac_est.sum()
136
+ df = pandas.DataFrame(tmp, columns=[f"cc_{i+1}_{j+1}" for i, j in zip(*tidxes)] +
137
+ [f"cc_o_{i+1}" for i in range(n_ops)] +
138
+ ["nref"] + [f"raw_est_{i+1}" for i in range(n_ops)])
139
+ with logger.with_prefix(" "):
140
+ logger.writeln(df.to_string(float_format="%.4f"))
141
+ logger.write(" Final twin fraction estimate: ")
142
+ logger.writeln(" ".join("%.2f"%x for x in frac_est))
143
+ twin_data.alphas = frac_est
144
+ return df
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: servalcat
3
- Version: 0.4.99
3
+ Version: 0.4.100
4
4
  Summary: Structure refinement and validation for crystallography and single particle analysis
5
5
  Author: Keitaro Yamashita, Garib N. Murshudov
6
6
  License: MPL-2.0
@@ -1,14 +1,14 @@
1
- servalcat/__init__.py,sha256=HJt3p1jXzt-hnlShh1LfxHF3WRNM7hyk1FY-B4EqExc,241
1
+ servalcat/__init__.py,sha256=bOvfsOFrPkgPBtZIILjkf4hYnPjPRDQYcfbDpTzod3k,242
2
2
  servalcat/__main__.py,sha256=XUM193aDwZAEQY02VzvZasAzD6AYEM-_A4wqy93KDWE,4190
3
- servalcat/ext.cp38-win_amd64.pyd,sha256=b3XA-uj4P_eMgKwSrIG3jDYCQU49xOMqosymXrWmaFs,1334784
3
+ servalcat/ext.cp38-win_amd64.pyd,sha256=LH6re0zSkTkpDdMcS5ibAhBJ49FUTTK6qV6CKWEnumY,1335296
4
4
  servalcat/refine/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  servalcat/refine/cgsolve.py,sha256=Xz7EwGI-mr4nnbfARyfFjAqbJbPWGH9qNWQ0GpltDuc,3172
6
6
  servalcat/refine/refine.py,sha256=P6epHQZLpz3Tl2fuGrdCp6WOxuzvik7YJJTBsd4zuHg,44941
7
7
  servalcat/refine/refine_geom.py,sha256=RMfhmBb-jlSFZoMhV3QONev9U2ZuHuPyjSslhECMYXo,11585
8
- servalcat/refine/refine_spa.py,sha256=BFoJPNhyLVUWl2U2aDufkRJheYmzhnP4iEdE9DZ5JWc,19528
9
- servalcat/refine/refine_xtal.py,sha256=kI6oqE2u8E82KUqk6zg4Y9JVLC2PIE9LG227rXWlYM8,15017
8
+ servalcat/refine/refine_spa.py,sha256=tc1ZW-jJoIga5tfYA3DTLFg6QsgiN9xc_cDQKJQgtcc,19687
9
+ servalcat/refine/refine_xtal.py,sha256=5qrLrUawTRuXVGoaBNpZeTJiHB6jYrJeNnARNFe9kvQ,14841
10
10
  servalcat/refine/spa.py,sha256=7aYSnSty0fSe46P_RQ79Bd_8RwD88k3uAaLHVsz6yHc,6616
11
- servalcat/refine/xtal.py,sha256=qusNpwRdBie9pnHol_NTDgou08reuiERWBujyhREFbc,14893
11
+ servalcat/refine/xtal.py,sha256=2GUcJZhY35CYr3P6StpjEnPdgBtLWG10mAuN-X4cXy4,14911
12
12
  servalcat/refmac/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  servalcat/refmac/exte.py,sha256=LRPej2yCZ4RoWJnEQldILEiUED6S_w1lgutWAdhu9_o,9201
14
14
  servalcat/refmac/refmac_keywords.py,sha256=WPLDQdk91i5yKZ0QcfaTQ6qL-_qBv7_91eUv4Is9j54,27288
@@ -26,20 +26,20 @@ servalcat/utils/__init__.py,sha256=4umrD2fhG1QAQppDBQM95-6y8QYbtxr7Mi_-Nkc3XOA,1
26
26
  servalcat/utils/commands.py,sha256=fEWF6HesIAuxnzbaKAPws_QaIPxJUQc6DVthHeex_Vk,72503
27
27
  servalcat/utils/fileio.py,sha256=3fyHTQ62sibP3cUYtX8D9G7cC6qelkfPKnbA47MqPOo,30246
28
28
  servalcat/utils/generate_operators.py,sha256=fCrcTcciRn-6fpFtHYvf1oPXAObjuqR3WQejz5d0RrA,10980
29
- servalcat/utils/hkl.py,sha256=sMaXJprnceVLDHWfwPSTWmbVbAJEPfj86tF3C0S2g_o,28776
30
- servalcat/utils/logger.py,sha256=VXSkn54Rp2dxnCQ-Smb5T0NZqysvw8GGb4bKyA8Vuig,4729
29
+ servalcat/utils/hkl.py,sha256=pciLeh7dPxuF0-gmKRAI0h6zhYCA7QWH91LEBLzAHOA,30536
30
+ servalcat/utils/logger.py,sha256=25uXUw8xCjvd9bnzIQ0Nq1C4MNIqEXSG-xScEnXGmLM,4727
31
31
  servalcat/utils/maps.py,sha256=1Gm54nEvPj4peCuo0Rx-8_gvRD1pr2qECq7U9N6mrVw,13570
32
- servalcat/utils/model.py,sha256=YPqYNCPZrbtUPbtMROQUA1RowEC7zrBlUvAEFNfoW5k,30959
32
+ servalcat/utils/model.py,sha256=foNzqFxN306Ir40dpU4Djxx0sZ7Gbh0ZIJj-e3ifTPg,31345
33
33
  servalcat/utils/refmac.py,sha256=dY92UZo2orgZJOEmPacCek8PMj-ydCp6-pR0rdsAVG8,31682
34
34
  servalcat/utils/restraints.py,sha256=hclAJ3CKzIqeyC-I0rH8TSwikwQN1k8dUtYWKC0OAnU,37741
35
35
  servalcat/utils/symmetry.py,sha256=PSSD-S_j_t1m4E9F8Fd9RJqTWyKf92zLCjSED7iXFkU,12164
36
36
  servalcat/xtal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
37
  servalcat/xtal/french_wilson.py,sha256=u4wzHuotO8-avPkhuEylg9WmMZYdGHYz1IvG07uqW3M,12045
38
38
  servalcat/xtal/run_refmac_small.py,sha256=_Rw7wz7_yAlQgDqjJ_njeK6ZqN_r7KAakoor9WeNhxI,10634
39
- servalcat/xtal/sigmaa.py,sha256=uF552XnFyaBKfN36c8ajMOxFGrW_r3p9FFQ4T5nB1wg,76970
40
- servalcat/xtal/twin.py,sha256=gsDyyeC5jxsY8wepcB9lnNNPgVCvqlGVj9_kQZkkdWg,5021
41
- servalcat-0.4.99.dist-info/METADATA,sha256=OxT6LeODHswv4-Yg1Xtv6pjkvHjriRcZNFfq5asJObs,2741
42
- servalcat-0.4.99.dist-info/WHEEL,sha256=kwS-TzWMc793MQ545g1AsDhyMUJrOZEAGChkok6pUJs,104
43
- servalcat-0.4.99.dist-info/entry_points.txt,sha256=G1mDxhOCdF3umYz4k0kfwJbSdYSKqhvQdGCmrP8FRAY,111
44
- servalcat-0.4.99.dist-info/licenses/LICENSE,sha256=JoTeFzAOCkNGhvHsf4r2BFIHpLRXo_4EsrnOZV58XVA,17098
45
- servalcat-0.4.99.dist-info/RECORD,,
39
+ servalcat/xtal/sigmaa.py,sha256=c2WuFu5cOew-TTJyYkJDaIykbRqFuKddGqTQblWfH6U,76904
40
+ servalcat/xtal/twin.py,sha256=Z1WY3K61SFE4EcCo3C2lUxByhx68nkZMlruUNr5jimc,6325
41
+ servalcat-0.4.100.dist-info/METADATA,sha256=fY8VFfVql5PFlkJdbmAFaPOH9WVm8MJ7oKedUyyL9ys,2742
42
+ servalcat-0.4.100.dist-info/WHEEL,sha256=kwS-TzWMc793MQ545g1AsDhyMUJrOZEAGChkok6pUJs,104
43
+ servalcat-0.4.100.dist-info/entry_points.txt,sha256=G1mDxhOCdF3umYz4k0kfwJbSdYSKqhvQdGCmrP8FRAY,111
44
+ servalcat-0.4.100.dist-info/licenses/LICENSE,sha256=JoTeFzAOCkNGhvHsf4r2BFIHpLRXo_4EsrnOZV58XVA,17098
45
+ servalcat-0.4.100.dist-info/RECORD,,