servalcat 0.4.88__cp313-cp313-macosx_11_0_arm64.whl → 0.4.100__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of servalcat might be problematic. Click here for more details.

servalcat/xtal/sigmaa.py CHANGED
@@ -30,6 +30,8 @@ def add_arguments(parser):
30
30
  parser.description = 'Sigma-A parameter estimation for crystallographic data'
31
31
  parser.add_argument('--hklin', required=True,
32
32
  help='Input MTZ file')
33
+ parser.add_argument('--spacegroup',
34
+ help='Override space group')
33
35
  parser.add_argument('--labin',
34
36
  help='MTZ column for F,SIGF,FREE')
35
37
  parser.add_argument('--free', type=int,
@@ -77,7 +79,7 @@ def calc_r_and_cc(hkldata, centric_and_selections, twin_data=None):
77
79
  has_int = "I" in hkldata.df
78
80
  has_free = "FREE" in hkldata.df
79
81
  stats = hkldata.binned_df.copy()
80
- stats["n_obs"] = 0
82
+ stats[["n_obs", "n_all"]] = 0
81
83
  if has_free:
82
84
  stats[["n_work", "n_free"]] = 0
83
85
  rlab = "R1" if has_int else "R"
@@ -105,6 +107,7 @@ def calc_r_and_cc(hkldata, centric_and_selections, twin_data=None):
105
107
 
106
108
  for i_bin, idxes in hkldata.binned():
107
109
  stats.loc[i_bin, "n_obs"] = numpy.sum(numpy.isfinite(obs[idxes]))
110
+ stats.loc[i_bin, "n_all"] = len(idxes)
108
111
  if has_free:
109
112
  for j, suf in ((1, "work"), (2, "free")):
110
113
  idxes2 = numpy.concatenate([sel[j] for sel in centric_and_selections[i_bin]])
@@ -1018,10 +1021,9 @@ def smooth_params(hkldata, D_labs, smoothing): # XXX twin_data
1018
1021
  # smooth_params()
1019
1022
 
1020
1023
  def expected_F_from_int(Io, sigIo, k_ani, DFc, eps, c, S):
1021
- if c == 0: # acentric
1022
- k_num, k_den = 0.5, 0.
1023
- else:
1024
- k_num, k_den = 0., -0.5
1024
+ k_num = numpy.repeat(0.5 if c == 0 else 0., Io.size) # 0.5 if acentric
1025
+ k_den = k_num - 0.5
1026
+ if numpy.isscalar(c): c = numpy.repeat(c, Io.size)
1025
1027
  to = Io / sigIo - sigIo / (c+1) / k_ani**2 / S / eps
1026
1028
  tf = k_ani * numpy.abs(DFc) / numpy.sqrt(sigIo)
1027
1029
  sig1 = k_ani**2 * S * eps / sigIo
@@ -1054,10 +1056,7 @@ def calculate_maps_int(hkldata, b_aniso, fc_labs, D_labs, centric_and_selections
1054
1056
  S = hkldata.df["S"].to_numpy()[cidxes]
1055
1057
  f, m_proxy = expected_F_from_int(Io[cidxes], sigIo[cidxes], k_ani[cidxes], DFc[cidxes], eps[cidxes], c, S)
1056
1058
  exp_ip = numpy.exp(numpy.angle(DFc[cidxes])*1j)
1057
- if c == 0:
1058
- hkldata.df.loc[cidxes, "FWT"] = 2 * f * exp_ip - DFc[cidxes]
1059
- else:
1060
- hkldata.df.loc[cidxes, "FWT"] = f * exp_ip
1059
+ hkldata.df.loc[cidxes, "FWT"] = 2 * f * exp_ip - DFc[cidxes]
1061
1060
  hkldata.df.loc[cidxes, "DELFWT"] = f * exp_ip - DFc[cidxes]
1062
1061
  hkldata.df.loc[cidxes, "FOM"] = m_proxy
1063
1062
  if has_ano:
@@ -1088,15 +1087,23 @@ def calculate_maps_twin(hkldata, b_aniso, fc_labs, D_labs, twin_data, centric_an
1088
1087
  Io[tohide] = numpy.nan
1089
1088
 
1090
1089
  twin_data.est_f_true(Io, sigIo)
1091
- F_true = numpy.asarray(twin_data.f_true_max)
1092
1090
  Ds = twin_data.ml_scale_array()
1093
1091
  DFc = (twin_data.f_calc * Ds).sum(axis=1)
1094
1092
  exp_ip = numpy.exp(numpy.angle(DFc)*1j)
1095
1093
  Ft = numpy.asarray(twin_data.f_true_max)
1096
1094
  m = twin_data.calc_fom()
1097
- fwt = numpy.where(numpy.asarray(twin_data.centric) == 0,
1098
- 2 * m * Ft * exp_ip - DFc, m * Ft * exp_ip)
1099
- delfwt = m * Ft * exp_ip - DFc
1095
+ Fexp = twin_data.expected_F(Io, sigIo)
1096
+ if 1:
1097
+ fwt = numpy.where(numpy.asarray(twin_data.centric) == 0,
1098
+ 2 * m * Ft * exp_ip - DFc,
1099
+ m * Ft * exp_ip)
1100
+ delfwt = m * Ft * exp_ip - DFc
1101
+ else: # based on "more accurate" evaluation of <m|F|>
1102
+ fwt = numpy.where(numpy.asarray(twin_data.centric) == 0,
1103
+ 2 * Fexp * exp_ip - DFc,
1104
+ m * Fexp * exp_ip)
1105
+ delfwt = Fexp * exp_ip - DFc
1106
+
1100
1107
  sel = numpy.isnan(fwt)
1101
1108
  fwt[sel] = DFc[sel]
1102
1109
 
@@ -1105,7 +1112,8 @@ def calculate_maps_twin(hkldata, b_aniso, fc_labs, D_labs, twin_data, centric_an
1105
1112
  hkldata2.df["FWT"] = fwt
1106
1113
  hkldata2.df["DELFWT"] = delfwt
1107
1114
  hkldata2.df["FOM"] = m
1108
- hkldata2.df["F_est"] = F_true
1115
+ hkldata2.df["F_est"] = Ft
1116
+ hkldata2.df["F_exp"] = Fexp
1109
1117
  hkldata2.df["FC"] = twin_data.f_calc.sum(axis=1)
1110
1118
  hkldata2.df["DFC"] = DFc
1111
1119
  hkldata2.df[D_labs] = Ds
@@ -1116,7 +1124,7 @@ def calculate_maps_twin(hkldata, b_aniso, fc_labs, D_labs, twin_data, centric_an
1116
1124
  def merge_models(sts): # simply merge models. no fix in chain ids etc.
1117
1125
  st2 = sts[0].clone()
1118
1126
  del st2[:]
1119
- model = gemmi.Model("1")
1127
+ model = gemmi.Model(1)
1120
1128
  for st in sts:
1121
1129
  for m in st:
1122
1130
  for c in m:
@@ -1153,10 +1161,9 @@ def decide_mtz_labels(mtz, find_free=True, require=None):
1153
1161
 
1154
1162
  def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=None,
1155
1163
  n_per_bin=None, use="all", max_bins=None, cif_index=0, keep_charges=False,
1156
- allow_unusual_occupancies=False):
1164
+ allow_unusual_occupancies=False, space_group=None):
1157
1165
  if labin: assert 1 < len(labin) < 6
1158
1166
  assert use in ("all", "work", "test")
1159
- assert n_bins or n_per_bin #if n_bins not set, n_per_bin should be given
1160
1167
 
1161
1168
  if len(xyzins) > 0 and type(xyzins[0]) is gemmi.Structure:
1162
1169
  sts = xyzins
@@ -1194,6 +1201,7 @@ def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=
1194
1201
  logger.writeln("Observation type: {}".format(name))
1195
1202
  if len(newlabels) < len(labin): newlabels.append("FREE")
1196
1203
  hkldata = utils.hkl.hkldata_from_mtz(mtz, labin, newlabels=newlabels, require_types=require_types)
1204
+ hkldata.mask_invalid_obs_values(newlabels)
1197
1205
  if newlabels[0] == "F(+)":
1198
1206
  hkldata.merge_anomalous(newlabels[:4], ["FP", "SIGFP"])
1199
1207
  newlabels = ["FP", "SIGFP"] + newlabels[4:]
@@ -1204,6 +1212,12 @@ def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=
1204
1212
  if hkldata.df.empty:
1205
1213
  raise RuntimeError("No data in hkl data")
1206
1214
 
1215
+ if space_group is None:
1216
+ sg_use = None
1217
+ else:
1218
+ sg_use = gemmi.SpaceGroup(space_group)
1219
+ logger.writeln(f"Space group overridden by user. Using {sg_use.xhm()}")
1220
+
1207
1221
  if sts:
1208
1222
  assert source in ["electron", "xray", "neutron"]
1209
1223
  for st in sts:
@@ -1216,39 +1230,40 @@ def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=
1216
1230
  for st in sts: st.cell = hkldata.cell # mtz cell is used in any case
1217
1231
 
1218
1232
  sg_st = sts[0].find_spacegroup() # may be None
1219
- sg_use = hkldata.sg
1220
- if hkldata.sg != sg_st:
1221
- if st.cell.is_crystal() and sg_st and sg_st.laue_str() != hkldata.sg.laue_str():
1222
- raise RuntimeError("Crystal symmetry mismatch between model and data")
1223
- logger.writeln("Warning: space group mismatch between model and mtz")
1224
- if sg_st and sg_st.laue_str() == hkldata.sg.laue_str():
1225
- logger.writeln(" using space group from model")
1226
- sg_use = sg_st
1227
- else:
1228
- logger.writeln(" using space group from mtz")
1229
- logger.writeln("")
1230
-
1233
+ if sg_use is None:
1234
+ sg_use = hkldata.sg
1235
+ if hkldata.sg != sg_st:
1236
+ if st.cell.is_crystal() and sg_st and sg_st.laue_str() != hkldata.sg.laue_str():
1237
+ raise RuntimeError("Crystal symmetry mismatch between model and data")
1238
+ logger.writeln("Warning: space group mismatch between model and mtz")
1239
+ if sg_st and sg_st.laue_str() == hkldata.sg.laue_str():
1240
+ logger.writeln(" using space group from model")
1241
+ sg_use = sg_st
1242
+ else:
1243
+ logger.writeln(" using space group from mtz")
1244
+ logger.writeln("")
1245
+
1231
1246
  for st in sts:
1232
1247
  st.spacegroup_hm = sg_use.xhm()
1233
1248
  st.setup_cell_images()
1234
- hkldata.sg = sg_use
1235
1249
 
1236
1250
  if not keep_charges:
1237
1251
  utils.model.remove_charge(sts)
1238
1252
  utils.model.check_atomsf(sts, source)
1239
1253
 
1240
- if newlabels[0] == "FP":
1241
- hkldata.remove_nonpositive(newlabels[0])
1242
- hkldata.remove_nonpositive(newlabels[1])
1254
+ if sg_use is not None:
1255
+ hkldata.sg = sg_use
1243
1256
  hkldata.switch_to_asu()
1244
1257
  hkldata.remove_systematic_absences()
1245
1258
  #hkldata.df = hkldata.df.astype({name: 'float64' for name in ["I","SIGI","FP","SIGFP"] if name in hkldata.df})
1246
-
1259
+ d_min_data = hkldata.d_min_max(newlabels)[0]
1260
+ if d_min is None and hkldata.d_min_max()[0] != d_min_data:
1261
+ d_min = d_min_data
1262
+ logger.writeln(f"Changing resolution to {d_min:.3f} A")
1247
1263
  if (d_min, d_max).count(None) != 2:
1248
1264
  hkldata = hkldata.copy(d_min=d_min, d_max=d_max)
1249
1265
  if hkldata.df.empty:
1250
1266
  raise RuntimeError("No data left in hkl data")
1251
- d_min, d_max = hkldata.d_min_max()
1252
1267
 
1253
1268
  hkldata.complete()
1254
1269
  hkldata.sort_by_resolution()
@@ -1259,6 +1274,17 @@ def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=
1259
1274
  free = hkldata.guess_free_number(newlabels[0])
1260
1275
 
1261
1276
  if n_bins is None:
1277
+ if n_per_bin is None:
1278
+ if use == "all" or "FREE" not in hkldata.df:
1279
+ n_per_bin = 100
1280
+ use = "all"
1281
+ elif use == "work":
1282
+ n_per_bin = 100
1283
+ elif use == "test":
1284
+ n_per_bin = 50
1285
+ else:
1286
+ raise RuntimeError(f"should not happen: {use=}")
1287
+
1262
1288
  sel = hkldata.df[newlabels[0]].notna()
1263
1289
  if use == "work":
1264
1290
  sel &= hkldata.df.FREE != free
@@ -1452,13 +1478,11 @@ def calculate_maps(hkldata, b_aniso, centric_and_selections, fc_labs, D_labs, lo
1452
1478
  Sigma = 2 * SigFo**2 + epsilon * S
1453
1479
  X = 2 * Fo * DFc_abs / Sigma
1454
1480
  m = gemmi.bessel_i1_over_i0(X)
1455
- hkldata.df.loc[cidxes, "FWT"] = (2 * m * Fo - DFc_abs) * expip
1456
1481
  else:
1457
1482
  Sigma = SigFo**2 + epsilon * S
1458
1483
  X = Fo * DFc_abs / Sigma
1459
1484
  m = numpy.tanh(X)
1460
- hkldata.df.loc[cidxes, "FWT"] = (m * Fo) * expip
1461
-
1485
+ hkldata.df.loc[cidxes, "FWT"] = (2 * m * Fo - DFc_abs) * expip
1462
1486
  hkldata.df.loc[cidxes, "DELFWT"] = (m * Fo - DFc_abs) * expip
1463
1487
  hkldata.df.loc[cidxes, "FOM"] = m
1464
1488
  hkldata.df.loc[cidxes, "X"] = X
@@ -1479,7 +1503,8 @@ def calculate_maps(hkldata, b_aniso, centric_and_selections, fc_labs, D_labs, lo
1479
1503
  Fc = hkldata.df.FC.to_numpy()[idxes] * k_ani[idxes]
1480
1504
  Fo = hkldata.df.FP.to_numpy()[idxes]
1481
1505
  mean_DFc2 = numpy.nanmean(numpy.abs((Ds[idxes,:] * Fcs[idxes,:]).sum(axis=1) * k_ani[idxes])**2)
1482
- mean_log_DFcs = numpy.log(numpy.nanmean(numpy.abs(Ds[idxes,:] * Fcs[idxes,:] * k_ani[idxes,None]), axis=0)).tolist()
1506
+ with numpy.errstate(divide="ignore"):
1507
+ mean_log_DFcs = numpy.log(numpy.nanmean(numpy.abs(Ds[idxes,:] * Fcs[idxes,:] * k_ani[idxes,None]), axis=0)).tolist()
1483
1508
  mean_Ds = numpy.nanmean(Ds[idxes,:], axis=0).tolist()
1484
1509
  if sum(nrefs) > 0:
1485
1510
  r = numpy.nansum(numpy.abs(numpy.abs(Fc)-Fo)) / numpy.nansum(Fo)
@@ -1514,7 +1539,6 @@ def calculate_maps(hkldata, b_aniso, centric_and_selections, fc_labs, D_labs, lo
1514
1539
  # calculate_maps()
1515
1540
 
1516
1541
  def main(args):
1517
- n_per_bin = {"all": 500, "work": 500, "test": 50}[args.use]
1518
1542
  try:
1519
1543
  hkldata, sts, fc_labs, centric_and_selections,free = process_input(hklin=args.hklin,
1520
1544
  labin=args.labin.split(",") if args.labin else None,
@@ -1524,15 +1548,15 @@ def main(args):
1524
1548
  source=args.source,
1525
1549
  d_max=args.d_max,
1526
1550
  d_min=args.d_min,
1527
- n_per_bin=n_per_bin,
1528
1551
  use=args.use,
1529
1552
  max_bins=30,
1530
- keep_charges=args.keep_charges)
1553
+ keep_charges=args.keep_charges,
1554
+ space_group=args.spacegroup)
1531
1555
  except RuntimeError as e:
1532
1556
  raise SystemExit("Error: {}".format(e))
1533
1557
 
1534
1558
  if args.twin:
1535
- twin_data = find_twin_domains_from_data(hkldata)
1559
+ twin_data, _ = find_twin_domains_from_data(hkldata)
1536
1560
  else:
1537
1561
  twin_data = None
1538
1562
  if twin_data:
@@ -1597,7 +1621,7 @@ def main(args):
1597
1621
 
1598
1622
  # Write mtz file
1599
1623
  if twin_data:
1600
- labs = ["F_est"]
1624
+ labs = ["F_est", "F_exp"]
1601
1625
  elif is_int:
1602
1626
  labs = ["I", "SIGI"]
1603
1627
  else:
@@ -1613,7 +1637,7 @@ def main(args):
1613
1637
  labs.append("F_true_est")
1614
1638
  labs += D_labs + ["S"]
1615
1639
  mtz_out = args.output_prefix+".mtz"
1616
- hkldata.write_mtz(mtz_out, labs=labs, types={"FOM": "W", "FP":"F", "SIGFP":"Q", "F_est": "F"})
1640
+ hkldata.write_mtz(mtz_out, labs=labs, types={"FOM": "W", "FP":"F", "SIGFP":"Q", "F_est": "F", "F_exp": "F"})
1617
1641
  return hkldata
1618
1642
  # main()
1619
1643
  if __name__ == "__main__":
servalcat/xtal/twin.py CHANGED
@@ -21,95 +21,124 @@ def find_twin_domains_from_data(hkldata, max_oblique=5, min_alpha=0.05):
21
21
  #for op in ops:
22
22
  # logger.writeln(f" {op.triplet()}")
23
23
  if not ops:
24
- return
24
+ logger.writeln("")
25
+ return None, None
25
26
  twin_data = ext.TwinData()
26
- twin_data.setup(hkldata.miller_array().to_numpy(), hkldata.df.bin, hkldata.sg, hkldata.cell, ops)
27
+ twin_data.setup(hkldata.miller_array(), hkldata.df.bin, hkldata.sg, hkldata.cell, ops)
27
28
  if "I" in hkldata.df:
28
29
  Io = hkldata.df.I.to_numpy()
29
30
  else:
30
31
  Io = hkldata.df.FP.to_numpy()**2
31
- alphas = []
32
32
  ccs, nums = [], []
33
+ tmp = []
33
34
  for i_bin, bin_idxes in hkldata.binned():
34
35
  ratios = [1.]
35
36
  ccs.append([])
36
37
  nums.append([])
38
+ rs = []
37
39
  for i_op, op in enumerate(ops):
38
40
  ii = numpy.array(twin_data.pairs(i_op, i_bin))
39
41
  val = numpy.all(numpy.isfinite(Io[ii]), axis=1)
40
- cc = numpy.corrcoef(Io[ii][val].T)[0,1]
41
- rr = (1 - numpy.sqrt(1 - cc**2)) / cc
42
- ratios.append(rr)
42
+ if numpy.sum(val) == 0:
43
+ cc = r = numpy.nan
44
+ else:
45
+ cc = numpy.corrcoef(Io[ii][val].T)[0,1]
46
+ r = numpy.sum(numpy.abs(Io[ii][val, 0] - Io[ii][val, 1])) / numpy.sum(Io[ii][val])
47
+ ratio = (1 - numpy.sqrt(1 - cc**2)) / cc
48
+ ratios.append(ratio)
43
49
  ccs[-1].append(cc)
50
+ rs.append(r)
44
51
  nums[-1].append(len(val))
45
- alphas.append(numpy.array(ratios) / sum(ratios))
46
- alphas = numpy.maximum(0, numpy.mean(alphas, axis=0))
47
- alphas /= numpy.sum(alphas)
52
+ tmp.append(rs + ccs[-1] + nums[-1] + (numpy.array(ratios) / numpy.nansum(ratios)).tolist()[1:])
53
+ df = pandas.DataFrame(tmp, columns=[f"{n}_op{i+1}" for n in ("R", "CC", "num", "raw_est") for i in range(len(ops))])
54
+ with logger.with_prefix(" "):
55
+ logger.writeln(df.to_string(float_format="%.4f"))
48
56
  ccs = numpy.array(ccs)
49
57
  nums = numpy.array(nums)
50
58
  tmp = [{"Operator": gemmi.Op().triplet(),
51
59
  "R_twin_obs": 0,
52
- "CC_mean": 1,
53
- "Alpha_from_CC": alphas[0]}]
60
+ "CC_mean": 1}]
54
61
  for i_op, op in enumerate(ops):
55
62
  ii = numpy.array(twin_data.pairs(i_op))
56
63
  val = numpy.all(numpy.isfinite(Io[ii]), axis=1)
57
- r_obs = numpy.sum(numpy.abs(Io[ii][val, 0] - Io[ii][val, 1])) / numpy.sum(Io[ii][val])
64
+ if numpy.sum(val) == 0:
65
+ r_obs = numpy.nan
66
+ else:
67
+ r_obs = numpy.sum(numpy.abs(Io[ii][val, 0] - Io[ii][val, 1])) / numpy.sum(Io[ii][val])
68
+ cc = numpy.sum(nums[:,i_op] * ccs[:,i_op]) / numpy.sum(nums[:,i_op])
58
69
  tmp.append({"Operator": op.triplet(),
59
- "CC_mean": numpy.sum(nums[:,i_op] * ccs[:,i_op]) / numpy.sum(nums[:,i_op]),
70
+ "CC_mean": cc,
60
71
  "R_twin_obs": r_obs,
61
- "Alpha_from_CC": alphas[i_op+1],
62
72
  })
63
73
  df = pandas.DataFrame(tmp)
64
- logger.writeln(df.to_string(float_format="%.2f"))
74
+ df["Alpha_from_CC"] = (1 - numpy.sqrt(1 - df["CC_mean"]**2)) / df["CC_mean"]
75
+ df["Alpha_from_CC"] /= numpy.nansum(df["Alpha_from_CC"])
76
+ logger.writeln("\n Initial twin fraction estimates:")
77
+ with logger.with_prefix(" "):
78
+ logger.writeln(df.to_string(float_format="%.2f"))
65
79
 
66
- sel_idxes = [i for i, a in enumerate(alphas) if i > 0 and a > min_alpha]
67
- if not sel_idxes:
68
- logger.writeln(" No twinning detected")
69
- return
80
+ sel = df["Alpha_from_CC"].to_numpy() > min_alpha
81
+ if sel[1:].sum() == 0:
82
+ logger.writeln(" No twinning detected\n")
83
+ return None, None
70
84
 
71
- if len(sel_idxes) + 1 != len(alphas):
72
- ops = [ops[i-1] for i in sel_idxes]
73
- logger.writeln(" Twin operators after filtering small fractions")
74
- alphas = numpy.array([alphas[0]] + [alphas[i] for i in sel_idxes])
75
- alphas /= numpy.sum(alphas)
76
- df = pandas.DataFrame({"Operator": [x.triplet() for x in [gemmi.Op()]+ops],
77
- "Alpha": alphas})
78
- logger.writeln(df.to_string(float_format="%.2f"))
85
+ if not sel.all():
86
+ ops = [ops[i] for i in range(len(ops)) if sel[i+1]]
87
+ logger.writeln(f"\n Twin operators after filtering small fractions (<= {min_alpha})")
88
+ df = df[sel]
89
+ df["Alpha_from_CC"] /= numpy.nansum(df["Alpha_from_CC"])
90
+ with logger.with_prefix(" "):
91
+ logger.writeln(df.to_string(float_format="%.2f"))
79
92
  twin_data = ext.TwinData()
80
- twin_data.setup(hkldata.miller_array().to_numpy(), hkldata.df.bin, hkldata.sg, hkldata.cell, ops)
81
- twin_data.alphas = alphas
93
+ twin_data.setup(hkldata.miller_array(), hkldata.df.bin, hkldata.sg, hkldata.cell, ops)
94
+ twin_data.alphas = df["Alpha_from_CC"].tolist()
82
95
  if "I" not in hkldata.df:
83
96
  logger.writeln('Generating "observed" intensities for twin refinement: Io = Fo**2, SigIo = 2*F*SigFo')
84
97
  hkldata.df["I"] = hkldata.df.FP**2
85
98
  hkldata.df["SIGI"] = 2 * hkldata.df.FP * hkldata.df.SIGFP
86
- return twin_data
99
+ logger.writeln("")
100
+ return twin_data, df
87
101
 
88
102
  # find_twin_domains_from_data()
89
103
 
90
104
  def estimate_twin_fractions_from_model(twin_data, hkldata):
91
105
  logger.writeln("Estimating twin fractions")
92
106
  Ic = numpy.abs(twin_data.f_calc.sum(axis=1))**2
93
- Ic_all = Ic[twin_data.twin_related(hkldata.sg)]
107
+ idx_all = twin_data.twin_related(hkldata.sg)
108
+ Ic_all = Ic[idx_all]
109
+ Ic_all[(idx_all < 0).any(axis=1)] = numpy.nan
94
110
  rr = twin_data.obs_related_asu()
95
111
  tmp = []
112
+ P_list, cc_oc_list, weight_list = [], [], []
113
+ n_ops = len(twin_data.ops) + 1
114
+ tidxes = numpy.triu_indices(n_ops, 1)
96
115
  for i_bin, bin_idxes in hkldata.binned():
97
- cc_o_c = []
98
116
  i_tmp = Ic_all[numpy.asarray(twin_data.bin)==i_bin,:]
117
+ i_tmp = i_tmp[numpy.isfinite(i_tmp).all(axis=1)]
99
118
  P = numpy.corrcoef(i_tmp.T)
100
119
  iobs = hkldata.df.I.to_numpy()[bin_idxes]
101
120
  ic_bin = Ic[rr[bin_idxes,:]]
102
- val = numpy.isfinite(iobs) & numpy.isfinite(ic_bin).all(axis=1)
121
+ val = numpy.isfinite(iobs) & numpy.isfinite(ic_bin).all(axis=1) & numpy.all(rr[bin_idxes,:]>=0, axis=1)
103
122
  iobs, ic_bin = iobs[val], ic_bin[val,:]
104
- cc_o_c = [numpy.corrcoef(iobs, ic_bin[:,i])[0,1] for i in range(len(twin_data.ops)+1)]
105
- frac_est = numpy.dot(numpy.linalg.pinv(P), cc_o_c)
106
- tmp.append(frac_est.tolist())
107
-
108
- df = pandas.DataFrame(tmp)
109
- df.iloc[:,:] /= df.sum(axis=1).to_numpy()[:,None]
110
- mean_alphas = numpy.maximum(0, df.mean())
111
- mean_alphas /= numpy.sum(mean_alphas)
112
- logger.write(" Estimated fractions from data-model correlations: ")
113
- logger.writeln(" ".join("%.2f"%x for x in mean_alphas))
114
- twin_data.alphas = mean_alphas
123
+ cc_oc = [numpy.corrcoef(iobs, ic_bin[:,i])[0,1] for i in range(n_ops)]
124
+ P_list.append(P)
125
+ cc_oc_list.append(cc_oc)
126
+ weight_list.append(numpy.sum(val))
127
+ frac_est = numpy.dot(numpy.linalg.pinv(P), cc_oc)
128
+ frac_est /= frac_est.sum()
129
+ tmp.append(P[tidxes].tolist() + cc_oc + [weight_list[-1]] + frac_est.tolist())
115
130
 
131
+ P = numpy.average(P_list, axis=0, weights=weight_list)
132
+ cc_oc = numpy.average(cc_oc_list, axis=0, weights=weight_list)
133
+ frac_est = numpy.dot(numpy.linalg.pinv(P), cc_oc)
134
+ frac_est = numpy.maximum(0, frac_est)
135
+ frac_est /= frac_est.sum()
136
+ df = pandas.DataFrame(tmp, columns=[f"cc_{i+1}_{j+1}" for i, j in zip(*tidxes)] +
137
+ [f"cc_o_{i+1}" for i in range(n_ops)] +
138
+ ["nref"] + [f"raw_est_{i+1}" for i in range(n_ops)])
139
+ with logger.with_prefix(" "):
140
+ logger.writeln(df.to_string(float_format="%.4f"))
141
+ logger.write(" Final twin fraction estimate: ")
142
+ logger.writeln(" ".join("%.2f"%x for x in frac_est))
143
+ twin_data.alphas = frac_est
144
+ return df
@@ -1,16 +1,16 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: servalcat
3
- Version: 0.4.88
3
+ Version: 0.4.100
4
4
  Summary: Structure refinement and validation for crystallography and single particle analysis
5
5
  Author: Keitaro Yamashita, Garib N. Murshudov
6
6
  License: MPL-2.0
7
7
  Project-URL: Repository, https://github.com/keitaroyam/servalcat
8
- Requires-Python: >=3.7
8
+ Requires-Python: >=3.8
9
9
  Requires-Dist: packaging
10
10
  Requires-Dist: numpy>=1.15
11
11
  Requires-Dist: scipy
12
12
  Requires-Dist: pandas>=1.1.0
13
- Requires-Dist: gemmi==0.6.7
13
+ Requires-Dist: gemmi==0.7.0
14
14
  Description-Content-Type: text/markdown
15
15
 
16
16
  # Servalcat
@@ -40,7 +40,7 @@ pip install servalcat
40
40
  ```
41
41
  will install the stable version.
42
42
 
43
- The required GEMMI version is now [v0.6.7](https://github.com/project-gemmi/gemmi/releases/tag/v0.6.7). It may not work with the latest gemmi code from the github. The policy is in the main branch I only push the code that works with the latest package of GEMMI.
43
+ The required GEMMI version is now [v0.7.0](https://github.com/project-gemmi/gemmi/releases/tag/v0.7.0). It may not work with the latest gemmi code from the github. The policy is in the main branch I only push the code that works with the latest package of GEMMI.
44
44
 
45
45
  To use the Refmac5 related commands, you also need to install [CCP4](https://www.ccp4.ac.uk/). For "No Refmac5" commands, you may just need [the monomer library](https://github.com/MonomerLibrary/monomers) if CCP4 is not installed.
46
46
 
@@ -0,0 +1,45 @@
1
+ servalcat-0.4.100.dist-info/RECORD,,
2
+ servalcat-0.4.100.dist-info/WHEEL,sha256=PJzLOGr1UQbVJ9ClJnwqiFhnlyWYjSsyNgebnLgIQKg,114
3
+ servalcat-0.4.100.dist-info/entry_points.txt,sha256=G1mDxhOCdF3umYz4k0kfwJbSdYSKqhvQdGCmrP8FRAY,111
4
+ servalcat-0.4.100.dist-info/METADATA,sha256=fY8VFfVql5PFlkJdbmAFaPOH9WVm8MJ7oKedUyyL9ys,2742
5
+ servalcat-0.4.100.dist-info/licenses/LICENSE,sha256=HyVuytGSiAUQ6ErWBHTqt1iSGHhLmlC8fO7jTCuR8dU,16725
6
+ servalcat/__init__.py,sha256=QKT0vTBwtQtat3LpKrUkOQ5o2aA_FjjhNjTdQPhyes8,232
7
+ servalcat/__main__.py,sha256=bNEZKrG5765uOp32UiBAJmeJi3O0sk4I5QjrXUbngIE,4070
8
+ servalcat/ext.cpython-313-darwin.so,sha256=RD8t3hM02Jf50Km9p97-thsCHwrICTQMNcOQ9sMcz18,1219592
9
+ servalcat/refmac/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ servalcat/refmac/refmac_keywords.py,sha256=yzYSJELMjdTnf4pT0_8EdIOjCDncqkjpPJ3VNcsFDrU,26649
11
+ servalcat/refmac/exte.py,sha256=HtmlTHRzCeCN-vSJZdKCD1GIhQO6zTe1OKLUObC3gZ8,9019
12
+ servalcat/refmac/refmac_wrapper.py,sha256=euuhBv2F_ArmUvlfAA9fld4oCDkWMZENP73ioTtv118,17838
13
+ servalcat/xtal/twin.py,sha256=FfmVIdwve7MCCFXu019MYKSeWX8_Vy0SrHjqh1_sxBs,6181
14
+ servalcat/xtal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
+ servalcat/xtal/sigmaa.py,sha256=ZkswqjUw-ISOjsRiwJC4FcsR_RlRvv6eYIzBbeKVTIc,75258
16
+ servalcat/xtal/french_wilson.py,sha256=GE3WEPhuizDVSPq1Afyb1O-OPTlzKDXYhkDCIELWI_0,11787
17
+ servalcat/xtal/run_refmac_small.py,sha256=Yg-bEPuOdQxCwdwY8StCvXabcWHrRl6A22gJgLsypeU,10394
18
+ servalcat/utils/symmetry.py,sha256=ahMiaEDMyfV2YW0BkikF_ldMtIN4-usAGsNRCXGVbug,11869
19
+ servalcat/utils/generate_operators.py,sha256=fCrcTcciRn-6fpFtHYvf1oPXAObjuqR3WQejz5d0RrA,10980
20
+ servalcat/utils/maps.py,sha256=0hen0Pt9IxbGQOLZyZaGiNWBAttc6aMqb6NzrCKkZMQ,13225
21
+ servalcat/utils/__init__.py,sha256=j-fMAqHvzyMMQXb2Sf7Urnk4oQ31pT-g_NXf3K-NM4c,1137
22
+ servalcat/utils/logger.py,sha256=eglkG7RxCJl4Q5P0aQGp4SVauoWh36nQgFvSJJLpyrM,4587
23
+ servalcat/utils/model.py,sha256=78flu7sl-MmlN_ItY4YSWxyvTxuSsRMgBPotzh9MHqc,30556
24
+ servalcat/utils/restraints.py,sha256=V2QBfgSH636upbLtGany4QyBk3r05CLxqvJYiM1SLSQ,36960
25
+ servalcat/utils/refmac.py,sha256=LKwgCtLg7R5D8mjI7yCGd2lrjcjTeEJVAo1pey2u3IU,30922
26
+ servalcat/utils/commands.py,sha256=dxsbPMZl-1BVKKr0rXzGF08k-Aco5qwzEHRWxZlcxek,70956
27
+ servalcat/utils/hkl.py,sha256=dPwHxCRx33j_Egd9lKRWlUbk5MecziSiqW6vJz_eeUk,29787
28
+ servalcat/utils/fileio.py,sha256=YytwlL0SeaxHCgIzFEjfS_tRpLDmvhVDjwEnkDXw1_E,29502
29
+ servalcat/spa/shiftback.py,sha256=A9OfaZ8r2BC6A2uGXxNNhmyBNORUB_MeqJC29ZRebnw,4664
30
+ servalcat/spa/localcc.py,sha256=0otKfGKH33cFsD6Qvh8iL1mINlWqb3gqAbVKqy8fQPY,7985
31
+ servalcat/spa/translate.py,sha256=BwmVcyDbqDehjtH7kA4wQFF4hJb9c1gcHu7JbGREUTU,5099
32
+ servalcat/spa/run_refmac.py,sha256=tX0a37-aTJgADZ_NCdKbT8aE4P9Bnmm1qrFW2jc9Hw4,49550
33
+ servalcat/spa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
+ servalcat/spa/fsc.py,sha256=ISsBOl5JJZA6yT2yuDHNQ4I780TrmuYW6CPvimCwDqo,17454
35
+ servalcat/spa/realspcc_from_var.py,sha256=VpdvNEY4zRza08Vz727JI6Seq-ix-oujx4Gvzy_VkYI,5275
36
+ servalcat/spa/shift_maps.py,sha256=iP1w4LNXETvZblElJ64AFMcE-lPrhx1s4SfacGktg4o,13268
37
+ servalcat/spa/fofc.py,sha256=Q97XkJX_4KWmxCIzq16R5BTMUY28V_4CPdbzF6p5Oxo,23113
38
+ servalcat/refine/xtal.py,sha256=F_inGxiUJ1FabeJe2IMZiqJbJRuDrvJedYAoCwWnDSY,14635
39
+ servalcat/refine/refine.py,sha256=W3oduNRNgc05mqMO1wYUdgN3pRrUuR9kEJ3yhAJd9jQ,44035
40
+ servalcat/refine/refine_spa.py,sha256=nTNw2c1HU6sCbdfjR7gPyWiKDpvhep8wNnL4d5Mfjnc,19318
41
+ servalcat/refine/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
+ servalcat/refine/spa.py,sha256=lmh4P6nQNFIdaZUKFIc_ES6B6X-FG-JkxyfpqHgtk9g,6472
43
+ servalcat/refine/cgsolve.py,sha256=tHuWXr76x4sRAMUQ4dGVJzyHF_n1LiwKHTjAwh8iFMg,3072
44
+ servalcat/refine/refine_geom.py,sha256=uATaTCEVkPJw3DjpVX66j7mhYs9Mp21rDwBfumrpwfw,11352
45
+ servalcat/refine/refine_xtal.py,sha256=dxJsS73frNbjgQOwiMopv4ijed23-AXxmhZv5v0-kGk,14564
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: scikit-build-core 0.10.6
2
+ Generator: scikit-build-core 0.10.7
3
3
  Root-Is-Purelib: false
4
4
  Tag: cp313-cp313-macosx_11_0_arm64
5
5
 
@@ -1,45 +0,0 @@
1
- servalcat-0.4.88.dist-info/RECORD,,
2
- servalcat-0.4.88.dist-info/WHEEL,sha256=ULmZMDTYkGtY4RRvuVnUViaZ4lRrvEqSp2J4UKfolWA,114
3
- servalcat-0.4.88.dist-info/entry_points.txt,sha256=G1mDxhOCdF3umYz4k0kfwJbSdYSKqhvQdGCmrP8FRAY,111
4
- servalcat-0.4.88.dist-info/METADATA,sha256=B0VVGu-KUJy9euhZEeDrZ_mIlwvMBeKh3e8Pz8zm_ww,2741
5
- servalcat-0.4.88.dist-info/licenses/LICENSE,sha256=HyVuytGSiAUQ6ErWBHTqt1iSGHhLmlC8fO7jTCuR8dU,16725
6
- servalcat/__init__.py,sha256=6hLXc7c2AoM0dJqaoKHAGlmleut4b6M7aAbDE0byRG4,231
7
- servalcat/__main__.py,sha256=bNEZKrG5765uOp32UiBAJmeJi3O0sk4I5QjrXUbngIE,4070
8
- servalcat/ext.cpython-313-darwin.so,sha256=Sq-7TQ8Bw8lu2KTlT5XfefOtoS6mpUI1Eh0TwIbGfL0,2131880
9
- servalcat/refmac/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- servalcat/refmac/refmac_keywords.py,sha256=yzYSJELMjdTnf4pT0_8EdIOjCDncqkjpPJ3VNcsFDrU,26649
11
- servalcat/refmac/exte.py,sha256=HtmlTHRzCeCN-vSJZdKCD1GIhQO6zTe1OKLUObC3gZ8,9019
12
- servalcat/refmac/refmac_wrapper.py,sha256=iZvHZRpFKrdxx0ggT9AFUmjnuBXyfZBdJ69ZnN0_zkQ,18130
13
- servalcat/xtal/twin.py,sha256=Q1lbYm17w75KohvOS5blyp8TEGviWJ9zJlmmy8zSnU8,4741
14
- servalcat/xtal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- servalcat/xtal/sigmaa.py,sha256=LvRu76y0AqrT_n7LpDQnqcKm4NDkslroCkWowmIs5lk,74185
16
- servalcat/xtal/french_wilson.py,sha256=pL97K22WIkHHy_IexGPFhCan2WRHHemdWO0x7glTT7s,11656
17
- servalcat/xtal/run_refmac_small.py,sha256=Yg-bEPuOdQxCwdwY8StCvXabcWHrRl6A22gJgLsypeU,10394
18
- servalcat/utils/symmetry.py,sha256=H-5RBiQOp0MlWJE8LGJWvqe-YzmtzzrXfLO_2E_OpZU,11937
19
- servalcat/utils/generate_operators.py,sha256=fCrcTcciRn-6fpFtHYvf1oPXAObjuqR3WQejz5d0RrA,10980
20
- servalcat/utils/maps.py,sha256=ZoB3wwZZTLHYuwC4HBGFAYX4zmkZ8CdxD9wSLftL48c,13217
21
- servalcat/utils/__init__.py,sha256=j-fMAqHvzyMMQXb2Sf7Urnk4oQ31pT-g_NXf3K-NM4c,1137
22
- servalcat/utils/logger.py,sha256=oQcoKcM81fSsoQt_dKgwfUekJN5CcLMbti_kLb2loXM,3828
23
- servalcat/utils/model.py,sha256=_XpYd1WKtI1ErCXzNCxIukYClhJWl8BcFUfTgu-NB4E,30272
24
- servalcat/utils/restraints.py,sha256=IookRqipmUQlEgYRbt9lhzSuRHPuIq0QputfN87cEms,36911
25
- servalcat/utils/refmac.py,sha256=SFIReNexBpBBMgBUX67V_63NY8Yy8mOmAn9rYBPiDIo,30921
26
- servalcat/utils/commands.py,sha256=zkSFmdXIthC4ogvxgiHsVUyhBLZaiwflJAwP0CCBnZ8,64037
27
- servalcat/utils/hkl.py,sha256=3sw5NVJTKAp-7KzZmKsgGCpeRLKih4Wdf1kPK1JR4fM,28170
28
- servalcat/utils/fileio.py,sha256=NROzr_mpgc5qbvhaoEsIOHEFnN0uthabqKUiINE8Xnw,29085
29
- servalcat/spa/shiftback.py,sha256=A9OfaZ8r2BC6A2uGXxNNhmyBNORUB_MeqJC29ZRebnw,4664
30
- servalcat/spa/localcc.py,sha256=0otKfGKH33cFsD6Qvh8iL1mINlWqb3gqAbVKqy8fQPY,7985
31
- servalcat/spa/translate.py,sha256=InePz9d38KaUK4wr8uk06_6v4y80T3HYcURWp8c21zM,5091
32
- servalcat/spa/run_refmac.py,sha256=KWl7Cc6MLlsJdEsJ6MXBP8_2FiWeDZ66xmk04Ka-cVY,49196
33
- servalcat/spa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
- servalcat/spa/fsc.py,sha256=lRkNRRBSIcVlnoF5mvCKNXTxasp73a7SCLth4hTnWzE,17419
35
- servalcat/spa/realspcc_from_var.py,sha256=VpdvNEY4zRza08Vz727JI6Seq-ix-oujx4Gvzy_VkYI,5275
36
- servalcat/spa/shift_maps.py,sha256=iP1w4LNXETvZblElJ64AFMcE-lPrhx1s4SfacGktg4o,13268
37
- servalcat/spa/fofc.py,sha256=7ZG7wXtbcaeHG9dVZvYofNb75cbqthhE7O_XTVx-y70,22762
38
- servalcat/refine/xtal.py,sha256=YRZusmJXcMp1HtrKZiUd6JF-igS6MbHvOIuyFSPOVv0,14384
39
- servalcat/refine/refine.py,sha256=4ly66LSUNHbT6hqRnp-J-0G0CXOwLu_UKHW0LzT9CUw,38290
40
- servalcat/refine/refine_spa.py,sha256=J6LS8efSQ6sHEp5VH4It8TT734CQ5SVknP5AiQW9p0U,17896
41
- servalcat/refine/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
- servalcat/refine/spa.py,sha256=P_IkA291ocl92SpZ8Uagcab_Usqj2JzW8cAw_aCPmpg,6238
43
- servalcat/refine/cgsolve.py,sha256=tHuWXr76x4sRAMUQ4dGVJzyHF_n1LiwKHTjAwh8iFMg,3072
44
- servalcat/refine/refine_geom.py,sha256=GNVyll5LVoEmAzzenikg-nKjJXCtJ8B8Gd13q5nMtCk,10757
45
- servalcat/refine/refine_xtal.py,sha256=tW4AC1O8esxls6MS6YMiya-R41-daIROGxZscLf2b4c,13843