servalcat 0.4.99__cp312-cp312-win_amd64.whl → 0.4.105__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of servalcat might be problematic. Click here for more details.
- servalcat/__init__.py +2 -2
- servalcat/ext.cp312-win_amd64.pyd +0 -0
- servalcat/refine/refine.py +11 -1
- servalcat/refine/refine_spa.py +10 -9
- servalcat/refine/refine_xtal.py +31 -26
- servalcat/refine/xtal.py +4 -4
- servalcat/refmac/refmac_wrapper.py +36 -2
- servalcat/spa/fofc.py +4 -4
- servalcat/spa/run_refmac.py +3 -3
- servalcat/utils/commands.py +4 -4
- servalcat/utils/fileio.py +78 -39
- servalcat/utils/hkl.py +39 -4
- servalcat/utils/logger.py +1 -1
- servalcat/utils/model.py +14 -6
- servalcat/utils/symmetry.py +5 -2
- servalcat/xtal/sigmaa.py +113 -51
- servalcat/xtal/twin.py +66 -44
- {servalcat-0.4.99.dist-info → servalcat-0.4.105.dist-info}/METADATA +5 -5
- {servalcat-0.4.99.dist-info → servalcat-0.4.105.dist-info}/RECORD +22 -22
- {servalcat-0.4.99.dist-info → servalcat-0.4.105.dist-info}/WHEEL +1 -1
- {servalcat-0.4.99.dist-info → servalcat-0.4.105.dist-info}/entry_points.txt +0 -0
- {servalcat-0.4.99.dist-info → servalcat-0.4.105.dist-info}/licenses/LICENSE +0 -0
servalcat/utils/hkl.py
CHANGED
|
@@ -493,6 +493,31 @@ class HklData:
|
|
|
493
493
|
self.df = self.df[~sel]
|
|
494
494
|
# remove_nonpositive()
|
|
495
495
|
|
|
496
|
+
def mask_invalid_obs_values(self, labels):
|
|
497
|
+
assert 1 < len(labels) < 6
|
|
498
|
+
assert labels[1].startswith("SIG")
|
|
499
|
+
def do_mask(label, target_labels):
|
|
500
|
+
sel = self.df[label] <= 0
|
|
501
|
+
n_bad = sel.sum()
|
|
502
|
+
if n_bad > 0:
|
|
503
|
+
logger.writeln("Removing {} reflections with {}<=0".format(n_bad, label))
|
|
504
|
+
self.df.loc[sel, target_labels] = numpy.nan
|
|
505
|
+
# If any element within target_labels is non-finite, mask all elements
|
|
506
|
+
self.df.loc[(~numpy.isfinite(self.df[target_labels])).any(axis=1), target_labels] = numpy.nan
|
|
507
|
+
|
|
508
|
+
if len(labels) < 4: # F/SIGF or I/SIGI
|
|
509
|
+
if labels[0].startswith("F"):
|
|
510
|
+
do_mask(labels[0], labels[:2]) # bad F
|
|
511
|
+
do_mask(labels[1], labels[:2]) # bad sigma
|
|
512
|
+
else: # I(+)/SIGI(+)/I(-)/SIGI(-) or F...
|
|
513
|
+
assert labels[3].startswith("SIG")
|
|
514
|
+
if labels[0].startswith("F"):
|
|
515
|
+
do_mask(labels[0], labels[:2]) # bad F+
|
|
516
|
+
do_mask(labels[2], labels[2:4]) # bad F-
|
|
517
|
+
do_mask(labels[1], labels[:2]) # bad sigma+
|
|
518
|
+
do_mask(labels[3], labels[2:4]) # bad sigma-
|
|
519
|
+
# mask_invalid_obs_values()
|
|
520
|
+
|
|
496
521
|
def remove_systematic_absences(self):
|
|
497
522
|
is_absent = self.sg.operations().systematic_absences(self.miller_array())
|
|
498
523
|
n_absent = numpy.sum(is_absent)
|
|
@@ -501,12 +526,22 @@ class HklData:
|
|
|
501
526
|
self.df = self.df[~is_absent]
|
|
502
527
|
# remove_systematic_absences()
|
|
503
528
|
|
|
504
|
-
def merge_anomalous(self, labs, newlabs):
|
|
529
|
+
def merge_anomalous(self, labs, newlabs, method="weighted"):
|
|
530
|
+
assert method in ("weighted", "simple")
|
|
505
531
|
assert len(labs) == 4 # i+,sigi+,i-,sigi- for example
|
|
506
532
|
assert len(newlabs) == 2
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
533
|
+
if method == "simple":
|
|
534
|
+
# skipna=True is default, so missing value is handled nicely.
|
|
535
|
+
self.df[newlabs[0]] = self.df[[labs[0], labs[2]]].mean(axis=1)
|
|
536
|
+
self.df[newlabs[1]] = self.df[[labs[1], labs[3]]].pow(2).mean(axis=1).pow(0.5)
|
|
537
|
+
else:
|
|
538
|
+
obs = self.df[[labs[0], labs[2]]].to_numpy()
|
|
539
|
+
weights = 1. / self.df[[labs[1], labs[3]]].to_numpy()**2
|
|
540
|
+
sum_w = numpy.nansum(weights, axis=1)
|
|
541
|
+
sum_w[sum_w == 0] = numpy.nan # mask when both are nan
|
|
542
|
+
self.df[newlabs[0]] = numpy.nansum(obs * weights, axis=1) / sum_w
|
|
543
|
+
self.df[newlabs[1]] = numpy.sqrt(1. / sum_w)
|
|
544
|
+
# merge_anomalous()
|
|
510
545
|
|
|
511
546
|
def as_asu_data(self, label=None, data=None, label_sigma=None):
|
|
512
547
|
if label is None: assert data is not None
|
servalcat/utils/logger.py
CHANGED
|
@@ -37,7 +37,7 @@ class Logger(object):
|
|
|
37
37
|
def write(self, l, end="", flush=True, fs=None, print_fs=sys.stdout):
|
|
38
38
|
if self.stopped: return
|
|
39
39
|
if self.prefix:
|
|
40
|
-
l = "
|
|
40
|
+
l = "".join(self.prefix + x for x in l.splitlines(keepends=True))
|
|
41
41
|
print(l, end=end, file=print_fs, flush=flush)
|
|
42
42
|
for f in (self.ofs, fs):
|
|
43
43
|
if f is not None:
|
servalcat/utils/model.py
CHANGED
|
@@ -73,11 +73,12 @@ def remove_charge(sts):
|
|
|
73
73
|
def check_atomsf(sts, source, mott_bethe=True):
|
|
74
74
|
assert source in ("xray", "electron", "neutron")
|
|
75
75
|
if source != "electron": mott_bethe = False
|
|
76
|
-
logger.writeln("Atomic scattering factors for {}".format("
|
|
76
|
+
logger.writeln("Atomic scattering factors for {}".format("xray (use Mott-Bethe to convert to electrons)" if mott_bethe else source))
|
|
77
77
|
if source != "xray" and not mott_bethe:
|
|
78
78
|
logger.writeln(" Note that charges will be ignored")
|
|
79
79
|
el_charges = {(cra.atom.element, cra.atom.charge) for st in sts for cra in st[0].all()}
|
|
80
80
|
elems = {x[0] for x in el_charges}
|
|
81
|
+
tmp = {}
|
|
81
82
|
if source == "xray" or mott_bethe:
|
|
82
83
|
shown = set()
|
|
83
84
|
for el, charge in sorted(el_charges, key=lambda x: (x[0].atomic_number, x[1])):
|
|
@@ -88,12 +89,16 @@ def check_atomsf(sts, source, mott_bethe=True):
|
|
|
88
89
|
charge = 0
|
|
89
90
|
if (el, charge) in shown: continue
|
|
90
91
|
label = el.name if charge == 0 else "{}{:+}".format(el.name, charge)
|
|
91
|
-
logger.writeln(" {} {}".format(label, tuple(sf.get_coefs())))
|
|
92
92
|
shown.add((el, charge))
|
|
93
|
+
tmp[label] = {**{f"{k}{i+1}": x for k in ("a", "b") for i, x in enumerate(getattr(sf, k))}, "c": sf.c}
|
|
93
94
|
else:
|
|
94
95
|
for el in sorted(elems, key=lambda x: x.atomic_number):
|
|
95
|
-
|
|
96
|
-
|
|
96
|
+
if source == "electron":
|
|
97
|
+
tmp[el.name] = {f"{k}{i+1}": x for k in ("a", "b") for i, x in enumerate(getattr(el.c4322, k))}
|
|
98
|
+
else:
|
|
99
|
+
tmp[el.name] = {"a": el.neutron92.get_coefs()[0]}
|
|
100
|
+
with logger.with_prefix(" "):
|
|
101
|
+
logger.writeln(pandas.DataFrame(tmp).T.to_string())
|
|
97
102
|
logger.writeln("")
|
|
98
103
|
# check_atomsf()
|
|
99
104
|
|
|
@@ -489,9 +494,10 @@ def filter_contacting_ncs(st, cutoff=5.):
|
|
|
489
494
|
st.setup_cell_images()
|
|
490
495
|
ns = gemmi.NeighborSearch(st[0], st.cell, cutoff*2).populate() # This is considered crystallographic cell if not 1 1 1. Undesirable result may be seen.
|
|
491
496
|
cs = gemmi.ContactSearch(cutoff)
|
|
497
|
+
cs.twice = True # since we need all image_idx
|
|
492
498
|
cs.ignore = gemmi.ContactSearch.Ignore.SameAsu
|
|
493
499
|
results = cs.find_contacts(ns)
|
|
494
|
-
indices =
|
|
500
|
+
indices = {r.image_idx for r in results}
|
|
495
501
|
logger.writeln(" contacting copies: {}".format(indices))
|
|
496
502
|
ops = [st.ncs[i-1] for i in indices] # XXX is this correct? maybe yes as long as identity operator is not there
|
|
497
503
|
st.ncs.clear()
|
|
@@ -553,7 +559,9 @@ def reset_adp(model, bfactor=None, adp_mode="iso"):
|
|
|
553
559
|
if adp_mode == "iso" or (adp_mode == "fix" and bfactor is not None):
|
|
554
560
|
cra.atom.aniso = gemmi.SMat33f(0,0,0,0,0,0)
|
|
555
561
|
elif adp_mode == "aniso":
|
|
556
|
-
if
|
|
562
|
+
if cra.atom.aniso.nonzero() and bfactor is None: # just in case
|
|
563
|
+
cra.atom.b_iso = numpy.mean(cra.atom.aniso.calculate_eigenvalues()) * u_to_b
|
|
564
|
+
else:
|
|
557
565
|
u = cra.atom.b_iso * b_to_u
|
|
558
566
|
cra.atom.aniso = gemmi.SMat33f(u, u, u, 0, 0, 0)
|
|
559
567
|
# reset_adp()
|
servalcat/utils/symmetry.py
CHANGED
|
@@ -39,8 +39,11 @@ def update_ncs_from_args(args, st, map_and_start=None, filter_contacting=False,
|
|
|
39
39
|
ncsops = ncsops_from_args(args, st.cell, map_and_start=map_and_start, st=st,
|
|
40
40
|
helical_min_n=helical_min_n, helical_max_n=helical_max_n)
|
|
41
41
|
|
|
42
|
-
st.ncs.
|
|
43
|
-
|
|
42
|
+
st.ncs = [x for x in ncsops if not x.tr.is_identity()]
|
|
43
|
+
# To write identity op to the output model
|
|
44
|
+
idop_id = next((x.id for x in ncsops if x.tr.is_identity()), None)
|
|
45
|
+
if idop_id:
|
|
46
|
+
st.info["_struct_ncs_oper.id"] = idop_id
|
|
44
47
|
|
|
45
48
|
if filter_contacting:
|
|
46
49
|
model.filter_contacting_ncs(st)
|
servalcat/xtal/sigmaa.py
CHANGED
|
@@ -30,10 +30,14 @@ def add_arguments(parser):
|
|
|
30
30
|
parser.description = 'Sigma-A parameter estimation for crystallographic data'
|
|
31
31
|
parser.add_argument('--hklin', required=True,
|
|
32
32
|
help='Input MTZ file')
|
|
33
|
+
parser.add_argument('--hklin_free',
|
|
34
|
+
help='Input MTZ file for test flags')
|
|
33
35
|
parser.add_argument('--spacegroup',
|
|
34
36
|
help='Override space group')
|
|
35
37
|
parser.add_argument('--labin',
|
|
36
|
-
help='MTZ
|
|
38
|
+
help='MTZ columns of --hklin for F,SIGF,FREE')
|
|
39
|
+
parser.add_argument('--labin_free',
|
|
40
|
+
help='MTZ column of --hklin_free')
|
|
37
41
|
parser.add_argument('--free', type=int,
|
|
38
42
|
help='flag number for test set')
|
|
39
43
|
parser.add_argument('--model', required=True, nargs="+", action="append",
|
|
@@ -97,6 +101,8 @@ def calc_r_and_cc(hkldata, centric_and_selections, twin_data=None):
|
|
|
97
101
|
else:
|
|
98
102
|
obs = obs_sqrt = hkldata.df.FP
|
|
99
103
|
calc = calc_sqrt = Fc
|
|
104
|
+
if "CC*" in stats: # swap the positions
|
|
105
|
+
stats.insert(len(stats.columns)-1, "CC*", stats.pop("CC*"))
|
|
100
106
|
if has_free:
|
|
101
107
|
for lab in (cclab, rlab):
|
|
102
108
|
for suf in ("work", "free"):
|
|
@@ -1037,6 +1043,7 @@ def calculate_maps_int(hkldata, b_aniso, fc_labs, D_labs, centric_and_selections
|
|
|
1037
1043
|
nmodels = len(fc_labs)
|
|
1038
1044
|
hkldata.df["FWT"] = 0j * numpy.nan
|
|
1039
1045
|
hkldata.df["DELFWT"] = 0j * numpy.nan
|
|
1046
|
+
hkldata.df["F_est"] = numpy.nan
|
|
1040
1047
|
hkldata.df["FOM"] = numpy.nan # FOM proxy, |<F>| / <|F|>
|
|
1041
1048
|
has_ano = "I(+)" in hkldata.df and "I(-)" in hkldata.df
|
|
1042
1049
|
if has_ano:
|
|
@@ -1056,12 +1063,10 @@ def calculate_maps_int(hkldata, b_aniso, fc_labs, D_labs, centric_and_selections
|
|
|
1056
1063
|
S = hkldata.df["S"].to_numpy()[cidxes]
|
|
1057
1064
|
f, m_proxy = expected_F_from_int(Io[cidxes], sigIo[cidxes], k_ani[cidxes], DFc[cidxes], eps[cidxes], c, S)
|
|
1058
1065
|
exp_ip = numpy.exp(numpy.angle(DFc[cidxes])*1j)
|
|
1059
|
-
|
|
1060
|
-
hkldata.df.loc[cidxes, "FWT"] = 2 * f * exp_ip - DFc[cidxes]
|
|
1061
|
-
else:
|
|
1062
|
-
hkldata.df.loc[cidxes, "FWT"] = f * exp_ip
|
|
1066
|
+
hkldata.df.loc[cidxes, "FWT"] = 2 * f * exp_ip - DFc[cidxes]
|
|
1063
1067
|
hkldata.df.loc[cidxes, "DELFWT"] = f * exp_ip - DFc[cidxes]
|
|
1064
1068
|
hkldata.df.loc[cidxes, "FOM"] = m_proxy
|
|
1069
|
+
hkldata.df.loc[cidxes, "F_est"] = f
|
|
1065
1070
|
if has_ano:
|
|
1066
1071
|
f_p, _ = expected_F_from_int(ano_data[cidxes,0], ano_data[cidxes,1],
|
|
1067
1072
|
k_ani[cidxes], DFc[cidxes], eps[cidxes], c, S)
|
|
@@ -1162,12 +1167,34 @@ def decide_mtz_labels(mtz, find_free=True, require=None):
|
|
|
1162
1167
|
return labin
|
|
1163
1168
|
# decide_mtz_labels()
|
|
1164
1169
|
|
|
1170
|
+
def decide_spacegroup(sg_user, sg_st, sg_hkl):
|
|
1171
|
+
assert sg_hkl is not None
|
|
1172
|
+
ret = None
|
|
1173
|
+
if sg_user is not None:
|
|
1174
|
+
ret = sg_user
|
|
1175
|
+
logger.writeln(f"Space group overridden by user. Using {ret.xhm()}")
|
|
1176
|
+
else:
|
|
1177
|
+
ret = sg_hkl
|
|
1178
|
+
if sg_hkl != sg_st:
|
|
1179
|
+
if sg_st and sg_st.laue_str() != sg_hkl.laue_str():
|
|
1180
|
+
raise RuntimeError("Crystal symmetry mismatch between model and data")
|
|
1181
|
+
logger.writeln("Warning: space group mismatch between model and mtz")
|
|
1182
|
+
if sg_st and sg_st.laue_str() == sg_hkl.laue_str():
|
|
1183
|
+
logger.writeln(" using space group from model")
|
|
1184
|
+
ret = sg_st
|
|
1185
|
+
else:
|
|
1186
|
+
logger.writeln(" using space group from mtz")
|
|
1187
|
+
logger.writeln("")
|
|
1188
|
+
|
|
1189
|
+
return ret
|
|
1190
|
+
# decide_spacegroup
|
|
1191
|
+
|
|
1165
1192
|
def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=None,
|
|
1166
1193
|
n_per_bin=None, use="all", max_bins=None, cif_index=0, keep_charges=False,
|
|
1167
|
-
allow_unusual_occupancies=False, space_group=None
|
|
1194
|
+
allow_unusual_occupancies=False, space_group=None,
|
|
1195
|
+
hklin_free=None, labin_free=None):
|
|
1168
1196
|
if labin: assert 1 < len(labin) < 6
|
|
1169
1197
|
assert use in ("all", "work", "test")
|
|
1170
|
-
assert n_bins or n_per_bin #if n_bins not set, n_per_bin should be given
|
|
1171
1198
|
|
|
1172
1199
|
if len(xyzins) > 0 and type(xyzins[0]) is gemmi.Structure:
|
|
1173
1200
|
sts = xyzins
|
|
@@ -1185,13 +1212,18 @@ def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=
|
|
|
1185
1212
|
assert len(xyzins) == 1
|
|
1186
1213
|
assert not sts
|
|
1187
1214
|
st, mtz = utils.fileio.read_small_molecule_files([hklin, xyzins[0]])
|
|
1215
|
+
if None in (st, mtz):
|
|
1216
|
+
raise SystemExit("Failed to read small molecule file(s)")
|
|
1188
1217
|
sts = [st]
|
|
1189
1218
|
|
|
1190
1219
|
for st in sts:
|
|
1191
1220
|
utils.model.check_occupancies(st, raise_error=not allow_unusual_occupancies)
|
|
1192
1221
|
|
|
1222
|
+
sg_use = decide_spacegroup(sg_user=gemmi.SpaceGroup(space_group) if space_group else None,
|
|
1223
|
+
sg_st=sts[0].find_spacegroup() if sts else None,
|
|
1224
|
+
sg_hkl=mtz.spacegroup)
|
|
1193
1225
|
if not labin:
|
|
1194
|
-
labin = decide_mtz_labels(mtz)
|
|
1226
|
+
labin = decide_mtz_labels(mtz, find_free=hklin_free is None)
|
|
1195
1227
|
col_types = {x.label:x.type for x in mtz.columns}
|
|
1196
1228
|
if labin[0] not in col_types:
|
|
1197
1229
|
raise RuntimeError("MTZ column not found: {}".format(labin[0]))
|
|
@@ -1201,10 +1233,31 @@ def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=
|
|
|
1201
1233
|
"K": ("anomalous intensity", ["I(+)","SIGI(+)", "I(-)", "SIGI(-)"], ["K", "M", "K", "M"])}
|
|
1202
1234
|
if col_types[labin[0]] not in labs_and_types:
|
|
1203
1235
|
raise RuntimeError("MTZ column {} is neither amplitude nor intensity".format(labin[0]))
|
|
1236
|
+
if col_types[labin[0]] == "J": # may be unmerged data
|
|
1237
|
+
ints = gemmi.Intensities()
|
|
1238
|
+
ints.set_data(mtz.cell, sg_use, mtz.make_miller_array(),
|
|
1239
|
+
mtz.array[:,mtz.column_labels().index(labin[0])],
|
|
1240
|
+
mtz.array[:,mtz.column_labels().index(labin[1])])
|
|
1241
|
+
dtype = ints.prepare_for_merging(gemmi.DataType.Mean) # do we want Anomalous?
|
|
1242
|
+
ints_bak = ints.clone() # for stats
|
|
1243
|
+
ints.merge_in_place(dtype)
|
|
1244
|
+
if (ints.nobs_array > 1).any():
|
|
1245
|
+
mtz = ints.prepare_merged_mtz(with_nobs=False)
|
|
1246
|
+
labin = mtz.column_labels()[3:]
|
|
1247
|
+
col_types = {x.label:x.type for x in mtz.columns}
|
|
1248
|
+
mult = ints.nobs_array.mean()
|
|
1249
|
+
logger.writeln(f"Input data were merged (multiplicity: {mult:.2f}). Overriding labin={','.join(labin)}")
|
|
1250
|
+
else:
|
|
1251
|
+
ints_bak = None
|
|
1252
|
+
else:
|
|
1253
|
+
ints_bak = None
|
|
1254
|
+
|
|
1204
1255
|
name, newlabels, require_types = labs_and_types[col_types[labin[0]]]
|
|
1205
1256
|
logger.writeln("Observation type: {}".format(name))
|
|
1206
1257
|
if len(newlabels) < len(labin): newlabels.append("FREE")
|
|
1207
1258
|
hkldata = utils.hkl.hkldata_from_mtz(mtz, labin, newlabels=newlabels, require_types=require_types)
|
|
1259
|
+
hkldata.sg = sg_use
|
|
1260
|
+
hkldata.mask_invalid_obs_values(newlabels)
|
|
1208
1261
|
if newlabels[0] == "F(+)":
|
|
1209
1262
|
hkldata.merge_anomalous(newlabels[:4], ["FP", "SIGFP"])
|
|
1210
1263
|
newlabels = ["FP", "SIGFP"] + newlabels[4:]
|
|
@@ -1214,13 +1267,7 @@ def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=
|
|
|
1214
1267
|
|
|
1215
1268
|
if hkldata.df.empty:
|
|
1216
1269
|
raise RuntimeError("No data in hkl data")
|
|
1217
|
-
|
|
1218
|
-
if space_group is None:
|
|
1219
|
-
sg_use = None
|
|
1220
|
-
else:
|
|
1221
|
-
sg_use = gemmi.SpaceGroup(space_group)
|
|
1222
|
-
logger.writeln(f"Space group overridden by user. Using {sg_use.xhm()}")
|
|
1223
|
-
|
|
1270
|
+
|
|
1224
1271
|
if sts:
|
|
1225
1272
|
assert source in ["electron", "xray", "neutron"]
|
|
1226
1273
|
for st in sts:
|
|
@@ -1230,23 +1277,8 @@ def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=
|
|
|
1230
1277
|
logger.writeln("Warning: unit cell mismatch between model and reflection data")
|
|
1231
1278
|
logger.writeln(" using unit cell from mtz")
|
|
1232
1279
|
|
|
1233
|
-
for st in sts: st.cell = hkldata.cell # mtz cell is used in any case
|
|
1234
|
-
|
|
1235
|
-
sg_st = sts[0].find_spacegroup() # may be None
|
|
1236
|
-
if sg_use is None:
|
|
1237
|
-
sg_use = hkldata.sg
|
|
1238
|
-
if hkldata.sg != sg_st:
|
|
1239
|
-
if st.cell.is_crystal() and sg_st and sg_st.laue_str() != hkldata.sg.laue_str():
|
|
1240
|
-
raise RuntimeError("Crystal symmetry mismatch between model and data")
|
|
1241
|
-
logger.writeln("Warning: space group mismatch between model and mtz")
|
|
1242
|
-
if sg_st and sg_st.laue_str() == hkldata.sg.laue_str():
|
|
1243
|
-
logger.writeln(" using space group from model")
|
|
1244
|
-
sg_use = sg_st
|
|
1245
|
-
else:
|
|
1246
|
-
logger.writeln(" using space group from mtz")
|
|
1247
|
-
logger.writeln("")
|
|
1248
|
-
|
|
1249
1280
|
for st in sts:
|
|
1281
|
+
st.cell = hkldata.cell # mtz cell is used in any case
|
|
1250
1282
|
st.spacegroup_hm = sg_use.xhm()
|
|
1251
1283
|
st.setup_cell_images()
|
|
1252
1284
|
|
|
@@ -1254,22 +1286,36 @@ def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=
|
|
|
1254
1286
|
utils.model.remove_charge(sts)
|
|
1255
1287
|
utils.model.check_atomsf(sts, source)
|
|
1256
1288
|
|
|
1257
|
-
if sg_use is not None:
|
|
1258
|
-
hkldata.sg = sg_use
|
|
1259
|
-
if newlabels[0] == "FP":
|
|
1260
|
-
hkldata.remove_nonpositive(newlabels[0])
|
|
1261
|
-
hkldata.remove_nonpositive(newlabels[1])
|
|
1262
1289
|
hkldata.switch_to_asu()
|
|
1263
1290
|
hkldata.remove_systematic_absences()
|
|
1264
1291
|
#hkldata.df = hkldata.df.astype({name: 'float64' for name in ["I","SIGI","FP","SIGFP"] if name in hkldata.df})
|
|
1265
|
-
|
|
1266
|
-
if d_min is None and hkldata.d_min_max()[0] !=
|
|
1267
|
-
d_min =
|
|
1292
|
+
d_min_max_data = hkldata.d_min_max(newlabels)
|
|
1293
|
+
if d_min is None and hkldata.d_min_max()[0] != d_min_max_data[0]:
|
|
1294
|
+
d_min = d_min_max_data[0]
|
|
1268
1295
|
logger.writeln(f"Changing resolution to {d_min:.3f} A")
|
|
1269
1296
|
if (d_min, d_max).count(None) != 2:
|
|
1270
1297
|
hkldata = hkldata.copy(d_min=d_min, d_max=d_max)
|
|
1271
1298
|
if hkldata.df.empty:
|
|
1272
1299
|
raise RuntimeError("No data left in hkl data")
|
|
1300
|
+
|
|
1301
|
+
if hklin_free is not None:
|
|
1302
|
+
mtz2 = utils.fileio.read_mmhkl(hklin_free)
|
|
1303
|
+
if labin_free and labin_free not in mtz2.column_labels():
|
|
1304
|
+
raise RuntimeError(f"specified label ({labin_free}) not found in {hklin_free}")
|
|
1305
|
+
if not labin_free:
|
|
1306
|
+
tmp = utils.hkl.mtz_find_free_columns(mtz2)
|
|
1307
|
+
if tmp:
|
|
1308
|
+
labin_free = tmp[0]
|
|
1309
|
+
else:
|
|
1310
|
+
raise RuntimeError(f"Test flag label not found in {hklin_free}")
|
|
1311
|
+
tmp = utils.hkl.hkldata_from_mtz(mtz2, [labin_free], newlabels=["FREE"])
|
|
1312
|
+
tmp.sg = sg_use
|
|
1313
|
+
tmp.switch_to_asu()
|
|
1314
|
+
tmp.remove_systematic_absences()
|
|
1315
|
+
tmp = tmp.copy(d_min=d_min_max_data[0], d_max=d_min_max_data[1])
|
|
1316
|
+
hkldata.complete()
|
|
1317
|
+
tmp.complete()
|
|
1318
|
+
hkldata.merge(tmp.df[["H","K","L","FREE"]])
|
|
1273
1319
|
|
|
1274
1320
|
hkldata.complete()
|
|
1275
1321
|
hkldata.sort_by_resolution()
|
|
@@ -1277,9 +1323,20 @@ def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=
|
|
|
1277
1323
|
hkldata.calc_centric()
|
|
1278
1324
|
|
|
1279
1325
|
if "FREE" in hkldata.df and free is None:
|
|
1280
|
-
free = hkldata.guess_free_number(newlabels[0])
|
|
1326
|
+
free = hkldata.guess_free_number(newlabels[0]) # also check NaN
|
|
1281
1327
|
|
|
1282
1328
|
if n_bins is None:
|
|
1329
|
+
if n_per_bin is None:
|
|
1330
|
+
if use == "all" or "FREE" not in hkldata.df:
|
|
1331
|
+
n_per_bin = 100
|
|
1332
|
+
use = "all"
|
|
1333
|
+
elif use == "work":
|
|
1334
|
+
n_per_bin = 100
|
|
1335
|
+
elif use == "test":
|
|
1336
|
+
n_per_bin = 50
|
|
1337
|
+
else:
|
|
1338
|
+
raise RuntimeError(f"should not happen: {use=}")
|
|
1339
|
+
|
|
1283
1340
|
sel = hkldata.df[newlabels[0]].notna()
|
|
1284
1341
|
if use == "work":
|
|
1285
1342
|
sel &= hkldata.df.FREE != free
|
|
@@ -1292,8 +1349,6 @@ def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=
|
|
|
1292
1349
|
logger.writeln("n_per_bin={} requested for {}. n_bins set to {}".format(n_per_bin, use, n_bins))
|
|
1293
1350
|
|
|
1294
1351
|
hkldata.setup_binning(n_bins=n_bins)
|
|
1295
|
-
logger.writeln("Data completeness: {:.2f}%".format(hkldata.completeness()*100.))
|
|
1296
|
-
|
|
1297
1352
|
fc_labs = ["FC{}".format(i) for i, _ in enumerate(sts)]
|
|
1298
1353
|
|
|
1299
1354
|
# Create a centric selection table for faster look up
|
|
@@ -1343,6 +1398,14 @@ def process_input(hklin, labin, n_bins, free, xyzins, source, d_max=None, d_min=
|
|
|
1343
1398
|
stats.loc[i_bin, "n_test"] = n_test
|
|
1344
1399
|
|
|
1345
1400
|
stats["completeness"] = stats["n_obs"] / stats["n_all"] * 100
|
|
1401
|
+
logger.writeln("Data completeness: {:.2%}".format(stats["n_obs"].sum() / stats["n_all"].sum()))
|
|
1402
|
+
if ints_bak is not None:
|
|
1403
|
+
binner = gemmi.Binner()
|
|
1404
|
+
binner.setup(n_bins, gemmi.Binner.Method.Dstar2, ints_bak)
|
|
1405
|
+
bin_stats = ints_bak.calculate_merging_stats(binner, use_weights="X")
|
|
1406
|
+
stats["CC1/2"] = [stats.cc_half() for stats in bin_stats]
|
|
1407
|
+
hkldata.binned_df["CC*"] = numpy.sqrt(2 * stats["CC1/2"] / (1 + stats["CC1/2"]))
|
|
1408
|
+
|
|
1346
1409
|
logger.writeln(stats.to_string())
|
|
1347
1410
|
return hkldata, sts, fc_labs, centric_and_selections, free
|
|
1348
1411
|
# process_input()
|
|
@@ -1473,13 +1536,11 @@ def calculate_maps(hkldata, b_aniso, centric_and_selections, fc_labs, D_labs, lo
|
|
|
1473
1536
|
Sigma = 2 * SigFo**2 + epsilon * S
|
|
1474
1537
|
X = 2 * Fo * DFc_abs / Sigma
|
|
1475
1538
|
m = gemmi.bessel_i1_over_i0(X)
|
|
1476
|
-
hkldata.df.loc[cidxes, "FWT"] = (2 * m * Fo - DFc_abs) * expip
|
|
1477
1539
|
else:
|
|
1478
1540
|
Sigma = SigFo**2 + epsilon * S
|
|
1479
1541
|
X = Fo * DFc_abs / Sigma
|
|
1480
1542
|
m = numpy.tanh(X)
|
|
1481
|
-
|
|
1482
|
-
|
|
1543
|
+
hkldata.df.loc[cidxes, "FWT"] = (2 * m * Fo - DFc_abs) * expip
|
|
1483
1544
|
hkldata.df.loc[cidxes, "DELFWT"] = (m * Fo - DFc_abs) * expip
|
|
1484
1545
|
hkldata.df.loc[cidxes, "FOM"] = m
|
|
1485
1546
|
hkldata.df.loc[cidxes, "X"] = X
|
|
@@ -1500,7 +1561,8 @@ def calculate_maps(hkldata, b_aniso, centric_and_selections, fc_labs, D_labs, lo
|
|
|
1500
1561
|
Fc = hkldata.df.FC.to_numpy()[idxes] * k_ani[idxes]
|
|
1501
1562
|
Fo = hkldata.df.FP.to_numpy()[idxes]
|
|
1502
1563
|
mean_DFc2 = numpy.nanmean(numpy.abs((Ds[idxes,:] * Fcs[idxes,:]).sum(axis=1) * k_ani[idxes])**2)
|
|
1503
|
-
|
|
1564
|
+
with numpy.errstate(divide="ignore"):
|
|
1565
|
+
mean_log_DFcs = numpy.log(numpy.nanmean(numpy.abs(Ds[idxes,:] * Fcs[idxes,:] * k_ani[idxes,None]), axis=0)).tolist()
|
|
1504
1566
|
mean_Ds = numpy.nanmean(Ds[idxes,:], axis=0).tolist()
|
|
1505
1567
|
if sum(nrefs) > 0:
|
|
1506
1568
|
r = numpy.nansum(numpy.abs(numpy.abs(Fc)-Fo)) / numpy.nansum(Fo)
|
|
@@ -1535,7 +1597,6 @@ def calculate_maps(hkldata, b_aniso, centric_and_selections, fc_labs, D_labs, lo
|
|
|
1535
1597
|
# calculate_maps()
|
|
1536
1598
|
|
|
1537
1599
|
def main(args):
|
|
1538
|
-
n_per_bin = {"all": 500, "work": 500, "test": 50}[args.use]
|
|
1539
1600
|
try:
|
|
1540
1601
|
hkldata, sts, fc_labs, centric_and_selections,free = process_input(hklin=args.hklin,
|
|
1541
1602
|
labin=args.labin.split(",") if args.labin else None,
|
|
@@ -1545,16 +1606,17 @@ def main(args):
|
|
|
1545
1606
|
source=args.source,
|
|
1546
1607
|
d_max=args.d_max,
|
|
1547
1608
|
d_min=args.d_min,
|
|
1548
|
-
n_per_bin=n_per_bin,
|
|
1549
1609
|
use=args.use,
|
|
1550
1610
|
max_bins=30,
|
|
1551
1611
|
keep_charges=args.keep_charges,
|
|
1552
|
-
space_group=args.spacegroup
|
|
1612
|
+
space_group=args.spacegroup,
|
|
1613
|
+
hklin_free=args.hklin_free,
|
|
1614
|
+
labin_free=args.labin_free)
|
|
1553
1615
|
except RuntimeError as e:
|
|
1554
1616
|
raise SystemExit("Error: {}".format(e))
|
|
1555
1617
|
|
|
1556
1618
|
if args.twin:
|
|
1557
|
-
twin_data = find_twin_domains_from_data(hkldata)
|
|
1619
|
+
twin_data, _ = find_twin_domains_from_data(hkldata)
|
|
1558
1620
|
else:
|
|
1559
1621
|
twin_data = None
|
|
1560
1622
|
if twin_data:
|
|
@@ -1621,7 +1683,7 @@ def main(args):
|
|
|
1621
1683
|
if twin_data:
|
|
1622
1684
|
labs = ["F_est", "F_exp"]
|
|
1623
1685
|
elif is_int:
|
|
1624
|
-
labs = ["I", "SIGI"]
|
|
1686
|
+
labs = ["I", "SIGI", "F_est"]
|
|
1625
1687
|
else:
|
|
1626
1688
|
labs = ["FP", "SIGFP"]
|
|
1627
1689
|
labs.extend(["FOM", "FWT", "DELFWT", "FC", "DFC"])
|
servalcat/xtal/twin.py
CHANGED
|
@@ -21,39 +21,42 @@ def find_twin_domains_from_data(hkldata, max_oblique=5, min_alpha=0.05):
|
|
|
21
21
|
#for op in ops:
|
|
22
22
|
# logger.writeln(f" {op.triplet()}")
|
|
23
23
|
if not ops:
|
|
24
|
-
|
|
24
|
+
logger.writeln("")
|
|
25
|
+
return None, None
|
|
25
26
|
twin_data = ext.TwinData()
|
|
26
27
|
twin_data.setup(hkldata.miller_array(), hkldata.df.bin, hkldata.sg, hkldata.cell, ops)
|
|
27
28
|
if "I" in hkldata.df:
|
|
28
29
|
Io = hkldata.df.I.to_numpy()
|
|
29
30
|
else:
|
|
30
31
|
Io = hkldata.df.FP.to_numpy()**2
|
|
31
|
-
alphas = []
|
|
32
32
|
ccs, nums = [], []
|
|
33
|
+
tmp = []
|
|
33
34
|
for i_bin, bin_idxes in hkldata.binned():
|
|
34
35
|
ratios = [1.]
|
|
35
36
|
ccs.append([])
|
|
36
37
|
nums.append([])
|
|
38
|
+
rs = []
|
|
37
39
|
for i_op, op in enumerate(ops):
|
|
40
|
+
cc = r = numpy.nan
|
|
38
41
|
ii = numpy.array(twin_data.pairs(i_op, i_bin))
|
|
39
|
-
val = numpy.all(numpy.isfinite(Io[ii]), axis=1)
|
|
40
|
-
if numpy.sum(val)
|
|
41
|
-
cc = numpy.nan
|
|
42
|
-
else:
|
|
42
|
+
val = numpy.all(numpy.isfinite(Io[ii]), axis=1) if ii.size != 0 else []
|
|
43
|
+
if numpy.sum(val) != 0:
|
|
43
44
|
cc = numpy.corrcoef(Io[ii][val].T)[0,1]
|
|
44
|
-
|
|
45
|
-
|
|
45
|
+
r = numpy.sum(numpy.abs(Io[ii][val, 0] - Io[ii][val, 1])) / numpy.sum(Io[ii][val])
|
|
46
|
+
ratio = (1 - numpy.sqrt(1 - cc**2)) / cc
|
|
47
|
+
ratios.append(ratio)
|
|
46
48
|
ccs[-1].append(cc)
|
|
49
|
+
rs.append(r)
|
|
47
50
|
nums[-1].append(len(val))
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
+
tmp.append(rs + ccs[-1] + nums[-1] + (numpy.array(ratios) / numpy.nansum(ratios)).tolist()[1:])
|
|
52
|
+
df = pandas.DataFrame(tmp, columns=[f"{n}_op{i+1}" for n in ("R", "CC", "num", "raw_est") for i in range(len(ops))])
|
|
53
|
+
with logger.with_prefix(" "):
|
|
54
|
+
logger.writeln(df.to_string(float_format="%.4f"))
|
|
51
55
|
ccs = numpy.array(ccs)
|
|
52
56
|
nums = numpy.array(nums)
|
|
53
57
|
tmp = [{"Operator": gemmi.Op().triplet(),
|
|
54
58
|
"R_twin_obs": 0,
|
|
55
|
-
"CC_mean": 1
|
|
56
|
-
"Alpha_from_CC": alphas[0]}]
|
|
59
|
+
"CC_mean": 1}]
|
|
57
60
|
for i_op, op in enumerate(ops):
|
|
58
61
|
ii = numpy.array(twin_data.pairs(i_op))
|
|
59
62
|
val = numpy.all(numpy.isfinite(Io[ii]), axis=1)
|
|
@@ -61,61 +64,80 @@ def find_twin_domains_from_data(hkldata, max_oblique=5, min_alpha=0.05):
|
|
|
61
64
|
r_obs = numpy.nan
|
|
62
65
|
else:
|
|
63
66
|
r_obs = numpy.sum(numpy.abs(Io[ii][val, 0] - Io[ii][val, 1])) / numpy.sum(Io[ii][val])
|
|
67
|
+
cc = numpy.sum(nums[:,i_op] * ccs[:,i_op]) / numpy.sum(nums[:,i_op])
|
|
64
68
|
tmp.append({"Operator": op.triplet(),
|
|
65
|
-
"CC_mean":
|
|
69
|
+
"CC_mean": cc,
|
|
66
70
|
"R_twin_obs": r_obs,
|
|
67
|
-
"Alpha_from_CC": alphas[i_op+1],
|
|
68
71
|
})
|
|
69
72
|
df = pandas.DataFrame(tmp)
|
|
70
|
-
|
|
73
|
+
df["Alpha_from_CC"] = (1 - numpy.sqrt(1 - df["CC_mean"]**2)) / df["CC_mean"]
|
|
74
|
+
df["Alpha_from_CC"] /= numpy.nansum(df["Alpha_from_CC"])
|
|
75
|
+
logger.writeln("\n Initial twin fraction estimates:")
|
|
76
|
+
with logger.with_prefix(" "):
|
|
77
|
+
logger.writeln(df.to_string(float_format="%.2f"))
|
|
71
78
|
|
|
72
|
-
|
|
73
|
-
if
|
|
74
|
-
logger.writeln(" No twinning detected")
|
|
75
|
-
return
|
|
79
|
+
sel = df["Alpha_from_CC"].to_numpy() > min_alpha
|
|
80
|
+
if sel[1:].sum() == 0:
|
|
81
|
+
logger.writeln(" No twinning detected\n")
|
|
82
|
+
return None, None
|
|
76
83
|
|
|
77
|
-
if
|
|
78
|
-
ops = [ops[i
|
|
79
|
-
logger.writeln(" Twin operators after filtering small fractions")
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
logger.writeln(df.to_string(float_format="%.2f"))
|
|
84
|
+
if not sel.all():
|
|
85
|
+
ops = [ops[i] for i in range(len(ops)) if sel[i+1]]
|
|
86
|
+
logger.writeln(f"\n Twin operators after filtering small fractions (<= {min_alpha})")
|
|
87
|
+
df = df[sel]
|
|
88
|
+
df["Alpha_from_CC"] /= numpy.nansum(df["Alpha_from_CC"])
|
|
89
|
+
with logger.with_prefix(" "):
|
|
90
|
+
logger.writeln(df.to_string(float_format="%.2f"))
|
|
85
91
|
twin_data = ext.TwinData()
|
|
86
92
|
twin_data.setup(hkldata.miller_array(), hkldata.df.bin, hkldata.sg, hkldata.cell, ops)
|
|
87
|
-
twin_data.alphas =
|
|
93
|
+
twin_data.alphas = df["Alpha_from_CC"].tolist()
|
|
88
94
|
if "I" not in hkldata.df:
|
|
89
95
|
logger.writeln('Generating "observed" intensities for twin refinement: Io = Fo**2, SigIo = 2*F*SigFo')
|
|
90
96
|
hkldata.df["I"] = hkldata.df.FP**2
|
|
91
97
|
hkldata.df["SIGI"] = 2 * hkldata.df.FP * hkldata.df.SIGFP
|
|
92
|
-
|
|
98
|
+
logger.writeln("")
|
|
99
|
+
return twin_data, df
|
|
93
100
|
|
|
94
101
|
# find_twin_domains_from_data()
|
|
95
102
|
|
|
96
103
|
def estimate_twin_fractions_from_model(twin_data, hkldata):
|
|
97
104
|
logger.writeln("Estimating twin fractions")
|
|
98
105
|
Ic = numpy.abs(twin_data.f_calc.sum(axis=1))**2
|
|
99
|
-
|
|
106
|
+
idx_all = twin_data.twin_related(hkldata.sg)
|
|
107
|
+
Ic_all = Ic[idx_all]
|
|
108
|
+
Ic_all[(idx_all < 0).any(axis=1)] = numpy.nan
|
|
100
109
|
rr = twin_data.obs_related_asu()
|
|
101
110
|
tmp = []
|
|
111
|
+
P_list, cc_oc_list, weight_list = [], [], []
|
|
112
|
+
n_ops = len(twin_data.ops) + 1
|
|
113
|
+
tidxes = numpy.triu_indices(n_ops, 1)
|
|
102
114
|
for i_bin, bin_idxes in hkldata.binned():
|
|
103
|
-
cc_o_c = []
|
|
104
115
|
i_tmp = Ic_all[numpy.asarray(twin_data.bin)==i_bin,:]
|
|
116
|
+
i_tmp = i_tmp[numpy.isfinite(i_tmp).all(axis=1)]
|
|
105
117
|
P = numpy.corrcoef(i_tmp.T)
|
|
106
118
|
iobs = hkldata.df.I.to_numpy()[bin_idxes]
|
|
107
119
|
ic_bin = Ic[rr[bin_idxes,:]]
|
|
108
|
-
val = numpy.isfinite(iobs) & numpy.isfinite(ic_bin).all(axis=1)
|
|
120
|
+
val = numpy.isfinite(iobs) & numpy.isfinite(ic_bin).all(axis=1) & numpy.all(rr[bin_idxes,:]>=0, axis=1)
|
|
109
121
|
iobs, ic_bin = iobs[val], ic_bin[val,:]
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
mean_alphas /= numpy.sum(mean_alphas)
|
|
118
|
-
logger.write(" Estimated fractions from data-model correlations: ")
|
|
119
|
-
logger.writeln(" ".join("%.2f"%x for x in mean_alphas))
|
|
120
|
-
twin_data.alphas = mean_alphas
|
|
122
|
+
cc_oc = [numpy.corrcoef(iobs, ic_bin[:,i])[0,1] for i in range(n_ops)]
|
|
123
|
+
P_list.append(P)
|
|
124
|
+
cc_oc_list.append(cc_oc)
|
|
125
|
+
weight_list.append(numpy.sum(val))
|
|
126
|
+
frac_est = numpy.dot(numpy.linalg.pinv(P), cc_oc)
|
|
127
|
+
frac_est /= frac_est.sum()
|
|
128
|
+
tmp.append(P[tidxes].tolist() + cc_oc + [weight_list[-1]] + frac_est.tolist())
|
|
121
129
|
|
|
130
|
+
P = numpy.average(P_list, axis=0, weights=weight_list)
|
|
131
|
+
cc_oc = numpy.average(cc_oc_list, axis=0, weights=weight_list)
|
|
132
|
+
frac_est = numpy.dot(numpy.linalg.pinv(P), cc_oc)
|
|
133
|
+
frac_est = numpy.maximum(0, frac_est)
|
|
134
|
+
frac_est /= frac_est.sum()
|
|
135
|
+
df = pandas.DataFrame(tmp, columns=[f"cc_{i+1}_{j+1}" for i, j in zip(*tidxes)] +
|
|
136
|
+
[f"cc_o_{i+1}" for i in range(n_ops)] +
|
|
137
|
+
["nref"] + [f"raw_est_{i+1}" for i in range(n_ops)])
|
|
138
|
+
with logger.with_prefix(" "):
|
|
139
|
+
logger.writeln(df.to_string(float_format="%.4f"))
|
|
140
|
+
logger.write(" Final twin fraction estimate: ")
|
|
141
|
+
logger.writeln(" ".join("%.2f"%x for x in frac_est))
|
|
142
|
+
twin_data.alphas = frac_est
|
|
143
|
+
return df
|