servalcat 0.4.88__cp313-cp313-macosx_11_0_arm64.whl → 0.4.100__cp313-cp313-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of servalcat might be problematic. Click here for more details.
- servalcat/__init__.py +2 -2
- servalcat/ext.cpython-313-darwin.so +0 -0
- servalcat/refine/refine.py +125 -42
- servalcat/refine/refine_geom.py +24 -11
- servalcat/refine/refine_spa.py +55 -31
- servalcat/refine/refine_xtal.py +31 -22
- servalcat/refine/spa.py +12 -4
- servalcat/refine/xtal.py +15 -12
- servalcat/refmac/refmac_wrapper.py +3 -11
- servalcat/spa/fofc.py +9 -3
- servalcat/spa/fsc.py +8 -10
- servalcat/spa/run_refmac.py +16 -11
- servalcat/spa/translate.py +2 -2
- servalcat/utils/commands.py +154 -4
- servalcat/utils/fileio.py +15 -8
- servalcat/utils/hkl.py +63 -26
- servalcat/utils/logger.py +25 -1
- servalcat/utils/maps.py +2 -2
- servalcat/utils/model.py +22 -15
- servalcat/utils/refmac.py +1 -1
- servalcat/utils/restraints.py +27 -28
- servalcat/utils/symmetry.py +5 -5
- servalcat/xtal/french_wilson.py +7 -5
- servalcat/xtal/sigmaa.py +69 -45
- servalcat/xtal/twin.py +73 -44
- {servalcat-0.4.88.dist-info → servalcat-0.4.100.dist-info}/METADATA +4 -4
- servalcat-0.4.100.dist-info/RECORD +45 -0
- {servalcat-0.4.88.dist-info → servalcat-0.4.100.dist-info}/WHEEL +1 -1
- servalcat-0.4.88.dist-info/RECORD +0 -45
- {servalcat-0.4.88.dist-info → servalcat-0.4.100.dist-info}/entry_points.txt +0 -0
- {servalcat-0.4.88.dist-info → servalcat-0.4.100.dist-info}/licenses/LICENSE +0 -0
servalcat/utils/hkl.py
CHANGED
|
@@ -7,7 +7,6 @@ Mozilla Public License, version 2.0; see LICENSE.
|
|
|
7
7
|
"""
|
|
8
8
|
from __future__ import absolute_import, division, print_function, generators
|
|
9
9
|
import numpy
|
|
10
|
-
import numpy.lib.recfunctions
|
|
11
10
|
import scipy.optimize
|
|
12
11
|
import pandas
|
|
13
12
|
import gemmi
|
|
@@ -17,27 +16,28 @@ dtypes64 = dict(i=numpy.int64, u=numpy.uint64, f=numpy.float64, c=numpy.complex1
|
|
|
17
16
|
to64 = lambda x: x.astype(dtypes64.get(x.dtype.kind, x.dtype))
|
|
18
17
|
|
|
19
18
|
def r_factor(fo, fc):
|
|
20
|
-
|
|
19
|
+
denom = numpy.nansum(fo)
|
|
20
|
+
if denom == 0:
|
|
21
21
|
return numpy.nan
|
|
22
|
-
return numpy.nansum(numpy.abs(fo-fc)) /
|
|
22
|
+
return numpy.nansum(numpy.abs(fo-fc)) / denom
|
|
23
23
|
def correlation(obs, calc):
|
|
24
|
-
if obs.size == 0:
|
|
25
|
-
return numpy.nan
|
|
26
24
|
sel = numpy.isfinite(obs)
|
|
25
|
+
if obs.size == 0 or numpy.all(~sel):
|
|
26
|
+
return numpy.nan
|
|
27
27
|
return numpy.corrcoef(obs[sel], calc[sel])[0,1]
|
|
28
28
|
|
|
29
29
|
def df_from_asu_data(asu_data, label):
|
|
30
|
-
df = pandas.DataFrame(data=asu_data.miller_array,
|
|
30
|
+
df = pandas.DataFrame(data=asu_data.miller_array.astype(numpy.int32),
|
|
31
31
|
columns=["H","K","L"])
|
|
32
|
-
if asu_data
|
|
33
|
-
df[label] = to64(asu_data.value_array[
|
|
34
|
-
df["SIG"+label] = to64(asu_data.value_array[
|
|
32
|
+
if type(asu_data) is gemmi.ValueSigmaAsuData:
|
|
33
|
+
df[label] = to64(asu_data.value_array[:,0])
|
|
34
|
+
df["SIG"+label] = to64(asu_data.value_array[:,1])
|
|
35
35
|
else:
|
|
36
36
|
df[label] = to64(asu_data.value_array)
|
|
37
37
|
return df
|
|
38
38
|
|
|
39
39
|
def df_from_raw(miller_array, value_array, label):
|
|
40
|
-
df = pandas.DataFrame(data=miller_array,
|
|
40
|
+
df = pandas.DataFrame(data=miller_array.astype(numpy.int32),
|
|
41
41
|
columns=["H","K","L"])
|
|
42
42
|
df[label] = to64(value_array)
|
|
43
43
|
return df
|
|
@@ -93,7 +93,7 @@ def hkldata_from_mtz(mtz, labels, newlabels=None, require_types=None):
|
|
|
93
93
|
if mismatches:
|
|
94
94
|
raise RuntimeError("MTZ column types mismatch: {}".format(" ".join(mismatches)))
|
|
95
95
|
|
|
96
|
-
df = pandas.DataFrame(data=
|
|
96
|
+
df = pandas.DataFrame(data=mtz.array, columns=mtz.column_labels())
|
|
97
97
|
df = df.astype({col: 'int32' for col in col_types if col_types[col] == "H"})
|
|
98
98
|
df = df.astype({col: 'Int64' for col in col_types if col_types[col] in ("B", "Y", "I")}) # pandas's nullable int
|
|
99
99
|
for lab in set(mtz.column_labels()).difference(labels+["H","K","L"]):
|
|
@@ -176,7 +176,7 @@ def mtz_selected(mtz, columns):
|
|
|
176
176
|
dataset_id=col_dict[col].dataset_id, expand_data=False)
|
|
177
177
|
|
|
178
178
|
idxes = [col_idxes[col] for col in columns]
|
|
179
|
-
data =
|
|
179
|
+
data = mtz.array[:, idxes]
|
|
180
180
|
mtz2.set_data(data)
|
|
181
181
|
return mtz2
|
|
182
182
|
# mtz_selected()
|
|
@@ -199,6 +199,8 @@ def decide_n_bins(n_per_bin, s_array, power=2, min_bins=1, max_bins=50):
|
|
|
199
199
|
def fft_map(cell, sg, miller_array, data, grid_size=None, sample_rate=3):
|
|
200
200
|
if data is not None:
|
|
201
201
|
data = data.astype(numpy.complex64) # we may want to keep complex128?
|
|
202
|
+
if type(data) is pandas.core.series.Series:
|
|
203
|
+
data = data.to_numpy()
|
|
202
204
|
asu = gemmi.ComplexAsuData(cell, sg, miller_array, data)
|
|
203
205
|
if grid_size is None:
|
|
204
206
|
ma = asu.transform_f_phi_to_map(sample_rate=sample_rate, exact_size=(0, 0, 0)) # half_l=True
|
|
@@ -223,7 +225,7 @@ class HklData:
|
|
|
223
225
|
def switch_to_asu(self):
|
|
224
226
|
# Need to care phases
|
|
225
227
|
assert not any(numpy.iscomplexobj(self.df[x]) for x in self.df)
|
|
226
|
-
hkl = self.miller_array()
|
|
228
|
+
hkl = self.miller_array()
|
|
227
229
|
self.sg.switch_to_asu(hkl)
|
|
228
230
|
self.df[["H","K","L"]] = hkl
|
|
229
231
|
# in some environment type changes to int64 even though hkl's dtype is int32
|
|
@@ -265,11 +267,11 @@ class HklData:
|
|
|
265
267
|
# merge_asu_data()
|
|
266
268
|
|
|
267
269
|
def miller_array(self):
|
|
268
|
-
return self.df[["H","K","L"]]
|
|
270
|
+
return self.df[["H","K","L"]].to_numpy()
|
|
269
271
|
|
|
270
272
|
def s_array(self):
|
|
271
273
|
hkl = self.miller_array()
|
|
272
|
-
return numpy.dot(hkl, self.cell.
|
|
274
|
+
return numpy.dot(hkl, self.cell.frac.mat.array)
|
|
273
275
|
|
|
274
276
|
def ssq_mat(self):
|
|
275
277
|
# k_aniso = exp(-s^T B_aniso s / 4)
|
|
@@ -288,8 +290,8 @@ class HklData:
|
|
|
288
290
|
s2 = 1 / self.d_spacings()**2
|
|
289
291
|
return numpy.exp(-b_iso / 4 * s2)
|
|
290
292
|
if b_cart is not None:
|
|
291
|
-
b_star = b_cart.transformed_by(self.cell.
|
|
292
|
-
return numpy.exp(-b_star.r_u_r(self.miller_array()
|
|
293
|
+
b_star = b_cart.transformed_by(self.cell.frac.mat)
|
|
294
|
+
return numpy.exp(-b_star.r_u_r(self.miller_array()) / 4)
|
|
293
295
|
|
|
294
296
|
def calc_d(self):
|
|
295
297
|
self.df["d"] = self.cell.calculate_d_array(self.miller_array())
|
|
@@ -314,8 +316,10 @@ class HklData:
|
|
|
314
316
|
self.df.sort_values("d", ascending=ascending, inplace=True)
|
|
315
317
|
# sort_by_resolution()
|
|
316
318
|
|
|
317
|
-
def d_min_max(self):
|
|
319
|
+
def d_min_max(self, labs=None):
|
|
318
320
|
d = self.d_spacings()
|
|
321
|
+
if labs:
|
|
322
|
+
d = d[~self.df[labs].isna().any(axis=1)]
|
|
319
323
|
return numpy.min(d), numpy.max(d)
|
|
320
324
|
# d_min_max()
|
|
321
325
|
|
|
@@ -489,6 +493,31 @@ class HklData:
|
|
|
489
493
|
self.df = self.df[~sel]
|
|
490
494
|
# remove_nonpositive()
|
|
491
495
|
|
|
496
|
+
def mask_invalid_obs_values(self, labels):
|
|
497
|
+
assert 1 < len(labels) < 6
|
|
498
|
+
assert labels[1].startswith("SIG")
|
|
499
|
+
def do_mask(label, target_labels):
|
|
500
|
+
sel = self.df[label] <= 0
|
|
501
|
+
n_bad = sel.sum()
|
|
502
|
+
if n_bad > 0:
|
|
503
|
+
logger.writeln("Removing {} reflections with {}<=0".format(n_bad, label))
|
|
504
|
+
self.df.loc[sel, target_labels] = numpy.nan
|
|
505
|
+
# If any element within target_labels is non-finite, mask all elements
|
|
506
|
+
self.df.loc[(~numpy.isfinite(self.df[target_labels])).any(axis=1), target_labels] = numpy.nan
|
|
507
|
+
|
|
508
|
+
if len(labels) < 4: # F/SIGF or I/SIGI
|
|
509
|
+
if labels[0].startswith("F"):
|
|
510
|
+
do_mask(labels[0], labels[:2]) # bad F
|
|
511
|
+
do_mask(labels[1], labels[:2]) # bad sigma
|
|
512
|
+
else: # I(+)/SIGI(+)/I(-)/SIGI(-) or F...
|
|
513
|
+
assert labels[3].startswith("SIG")
|
|
514
|
+
if labels[0].startswith("F"):
|
|
515
|
+
do_mask(labels[0], labels[:2]) # bad F+
|
|
516
|
+
do_mask(labels[2], labels[2:4]) # bad F-
|
|
517
|
+
do_mask(labels[1], labels[:2]) # bad sigma+
|
|
518
|
+
do_mask(labels[3], labels[2:4]) # bad sigma-
|
|
519
|
+
# mask_invalid_obs_values()
|
|
520
|
+
|
|
492
521
|
def remove_systematic_absences(self):
|
|
493
522
|
is_absent = self.sg.operations().systematic_absences(self.miller_array())
|
|
494
523
|
n_absent = numpy.sum(is_absent)
|
|
@@ -497,12 +526,22 @@ class HklData:
|
|
|
497
526
|
self.df = self.df[~is_absent]
|
|
498
527
|
# remove_systematic_absences()
|
|
499
528
|
|
|
500
|
-
def merge_anomalous(self, labs, newlabs):
|
|
529
|
+
def merge_anomalous(self, labs, newlabs, method="weighted"):
|
|
530
|
+
assert method in ("weighted", "simple")
|
|
501
531
|
assert len(labs) == 4 # i+,sigi+,i-,sigi- for example
|
|
502
532
|
assert len(newlabs) == 2
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
533
|
+
if method == "simple":
|
|
534
|
+
# skipna=True is default, so missing value is handled nicely.
|
|
535
|
+
self.df[newlabs[0]] = self.df[[labs[0], labs[2]]].mean(axis=1)
|
|
536
|
+
self.df[newlabs[1]] = self.df[[labs[1], labs[3]]].pow(2).mean(axis=1).pow(0.5)
|
|
537
|
+
else:
|
|
538
|
+
obs = self.df[[labs[0], labs[2]]].to_numpy()
|
|
539
|
+
weights = 1. / self.df[[labs[1], labs[3]]].to_numpy()**2
|
|
540
|
+
sum_w = numpy.nansum(weights, axis=1)
|
|
541
|
+
sum_w[sum_w == 0] = numpy.nan # mask when both are nan
|
|
542
|
+
self.df[newlabs[0]] = numpy.nansum(obs * weights, axis=1) / sum_w
|
|
543
|
+
self.df[newlabs[1]] = numpy.sqrt(1. / sum_w)
|
|
544
|
+
# merge_anomalous()
|
|
506
545
|
|
|
507
546
|
def as_asu_data(self, label=None, data=None, label_sigma=None):
|
|
508
547
|
if label is None: assert data is not None
|
|
@@ -511,9 +550,7 @@ class HklData:
|
|
|
511
550
|
if label_sigma is not None:
|
|
512
551
|
assert data is None
|
|
513
552
|
assert not numpy.iscomplexobj(self.df[label])
|
|
514
|
-
|
|
515
|
-
data = numpy.lib.recfunctions.unstructured_to_structured(self.df[[label,label_sigma]].to_numpy(),
|
|
516
|
-
numpy.dtype([("value", numpy.float32), ("sigma", numpy.float32)]))
|
|
553
|
+
data = self.df[[label,label_sigma]].to_numpy()
|
|
517
554
|
elif data is None:
|
|
518
555
|
data = self.df[label]
|
|
519
556
|
|
|
@@ -532,7 +569,7 @@ class HklData:
|
|
|
532
569
|
|
|
533
570
|
def fft_map(self, label=None, data=None, grid_size=None, sample_rate=3):
|
|
534
571
|
if data is None:
|
|
535
|
-
data = self.df[label]
|
|
572
|
+
data = self.df[label].to_numpy()
|
|
536
573
|
return fft_map(self.cell, self.sg, self.miller_array(), data, grid_size, sample_rate)
|
|
537
574
|
# fft_map()
|
|
538
575
|
|
servalcat/utils/logger.py
CHANGED
|
@@ -18,12 +18,15 @@ class Logger(object):
|
|
|
18
18
|
def __init__(self, file_out=None, append=True):
|
|
19
19
|
self.ofs = None
|
|
20
20
|
self.stopped = False
|
|
21
|
+
self.prefix = ""
|
|
21
22
|
if file_out:
|
|
22
23
|
self.set_file(file_out, append)
|
|
23
24
|
# __init__()
|
|
24
25
|
def stop_logging(self): self.stopped = True
|
|
25
26
|
def start_logging(self): self.stopped = False
|
|
26
|
-
|
|
27
|
+
def set_prefix(self, p): self.prefix = p
|
|
28
|
+
def clear_prefix(self): self.prefix = ""
|
|
29
|
+
|
|
27
30
|
def set_file(self, file_out, append=True):
|
|
28
31
|
try:
|
|
29
32
|
self.ofs = open(file_out, "a" if append else "w")
|
|
@@ -33,6 +36,8 @@ class Logger(object):
|
|
|
33
36
|
|
|
34
37
|
def write(self, l, end="", flush=True, fs=None, print_fs=sys.stdout):
|
|
35
38
|
if self.stopped: return
|
|
39
|
+
if self.prefix:
|
|
40
|
+
l = "".join(self.prefix + x for x in l.splitlines(keepends=True))
|
|
36
41
|
print(l, end=end, file=print_fs, flush=flush)
|
|
37
42
|
for f in (self.ofs, fs):
|
|
38
43
|
if f is not None:
|
|
@@ -69,6 +74,25 @@ close = _logger.close
|
|
|
69
74
|
flush = _logger.flush
|
|
70
75
|
stop = _logger.stop_logging
|
|
71
76
|
start = _logger.start_logging
|
|
77
|
+
set_prefix = _logger.set_prefix
|
|
78
|
+
clear_prefix = _logger.clear_prefix
|
|
79
|
+
|
|
80
|
+
def with_prefix(prefix):
|
|
81
|
+
class WithPrefix(object): # should keep original prefix and restore?
|
|
82
|
+
def __enter__(self):
|
|
83
|
+
_logger.set_prefix(prefix)
|
|
84
|
+
return _logger
|
|
85
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
86
|
+
_logger.clear_prefix()
|
|
87
|
+
return WithPrefix()
|
|
88
|
+
|
|
89
|
+
def silent():
|
|
90
|
+
class Silent(object):
|
|
91
|
+
def write(self, *args, **kwargs):
|
|
92
|
+
pass
|
|
93
|
+
def flush(self):
|
|
94
|
+
pass
|
|
95
|
+
return Silent()
|
|
72
96
|
|
|
73
97
|
def dependency_versions():
|
|
74
98
|
import gemmi
|
servalcat/utils/maps.py
CHANGED
|
@@ -268,9 +268,9 @@ def optimize_peak(grid, ini_pos):
|
|
|
268
268
|
logger.writeln("Finding peak using interpolation..")
|
|
269
269
|
x = grid.unit_cell.fractionalize(ini_pos)
|
|
270
270
|
logger.writeln(" x0: [{}, {}, {}]".format(*x.tolist()))
|
|
271
|
-
logger.writeln(" f0: {}".format(-grid.
|
|
271
|
+
logger.writeln(" f0: {}".format(-grid.interpolate_value(x, order=3)))
|
|
272
272
|
|
|
273
|
-
res = scipy.optimize.minimize(fun=lambda x:-grid.
|
|
273
|
+
res = scipy.optimize.minimize(fun=lambda x:-grid.interpolate_value(gemmi.Fractional(*x), order=3),
|
|
274
274
|
x0=x.tolist(),
|
|
275
275
|
jac=lambda x:-numpy.array(grid.tricubic_interpolation_der(gemmi.Fractional(*x))[1:])
|
|
276
276
|
)
|
servalcat/utils/model.py
CHANGED
|
@@ -73,11 +73,12 @@ def remove_charge(sts):
|
|
|
73
73
|
def check_atomsf(sts, source, mott_bethe=True):
|
|
74
74
|
assert source in ("xray", "electron", "neutron")
|
|
75
75
|
if source != "electron": mott_bethe = False
|
|
76
|
-
logger.writeln("Atomic scattering factors for {}".format("
|
|
76
|
+
logger.writeln("Atomic scattering factors for {}".format("xray (use Mott-Bethe to convert to electrons)" if mott_bethe else source))
|
|
77
77
|
if source != "xray" and not mott_bethe:
|
|
78
78
|
logger.writeln(" Note that charges will be ignored")
|
|
79
79
|
el_charges = {(cra.atom.element, cra.atom.charge) for st in sts for cra in st[0].all()}
|
|
80
80
|
elems = {x[0] for x in el_charges}
|
|
81
|
+
tmp = {}
|
|
81
82
|
if source == "xray" or mott_bethe:
|
|
82
83
|
shown = set()
|
|
83
84
|
for el, charge in sorted(el_charges, key=lambda x: (x[0].atomic_number, x[1])):
|
|
@@ -88,12 +89,16 @@ def check_atomsf(sts, source, mott_bethe=True):
|
|
|
88
89
|
charge = 0
|
|
89
90
|
if (el, charge) in shown: continue
|
|
90
91
|
label = el.name if charge == 0 else "{}{:+}".format(el.name, charge)
|
|
91
|
-
logger.writeln(" {} {}".format(label, tuple(sf.get_coefs())))
|
|
92
92
|
shown.add((el, charge))
|
|
93
|
+
tmp[label] = {**{f"{k}{i+1}": x for k in ("a", "b") for i, x in enumerate(getattr(sf, k))}, "c": sf.c}
|
|
93
94
|
else:
|
|
94
95
|
for el in sorted(elems, key=lambda x: x.atomic_number):
|
|
95
|
-
|
|
96
|
-
|
|
96
|
+
if source == "electron":
|
|
97
|
+
tmp[el.name] = {f"{k}{i+1}": x for k in ("a", "b") for i, x in enumerate(getattr(el.c4322, k))}
|
|
98
|
+
else:
|
|
99
|
+
tmp[el.name] = {"a": el.neutron92.get_coefs()[0]}
|
|
100
|
+
with logger.with_prefix(" "):
|
|
101
|
+
logger.writeln(pandas.DataFrame(tmp).T.to_string())
|
|
97
102
|
logger.writeln("")
|
|
98
103
|
# check_atomsf()
|
|
99
104
|
|
|
@@ -347,8 +352,8 @@ def translate_into_box(st, origin=None, apply_shift=True):
|
|
|
347
352
|
if origin is None: origin = gemmi.Position(0,0,0)
|
|
348
353
|
|
|
349
354
|
# apply unit cell translations to put model into a box (unit cell)
|
|
350
|
-
omat =
|
|
351
|
-
fmat =
|
|
355
|
+
omat = st.cell.orth.mat.array
|
|
356
|
+
fmat = st.cell.frac.mat.array.transpose()
|
|
352
357
|
com = numpy.array((st[0].calculate_center_of_mass() - origin).tolist())
|
|
353
358
|
shift = sum([omat[:,i]*numpy.floor(1-numpy.dot(com, fmat[:,i])) for i in range(3)])
|
|
354
359
|
tr = gemmi.Transform(gemmi.Mat33(), gemmi.Vec3(*shift))
|
|
@@ -443,7 +448,7 @@ def find_special_positions(st, special_pos_threshold=0.2, fix_occ=True, fix_pos=
|
|
|
443
448
|
logger.writeln(" correcting aniso= {}".format(tostr(atom.aniso.elements_pdb())))
|
|
444
449
|
logger.writeln(" aniso_viol= {}".format(tostr(diff)))
|
|
445
450
|
|
|
446
|
-
mats = [st.cell.orth.combine(st.cell.images[i-1]).combine(st.cell.frac).mat for i in images]
|
|
451
|
+
mats = [st.cell.orth.combine(st.cell.images[i-1]).combine(st.cell.frac).mat.array for i in images]
|
|
447
452
|
mat_total = (numpy.identity(3) + sum(numpy.array(m) for m in mats)) / n_images
|
|
448
453
|
mat_total_aniso = (numpy.identity(6) + sum(mat33_as66(m.tolist()) for m in mats)) / n_images
|
|
449
454
|
mat_total_aniso = numpy.linalg.pinv(mat_total_aniso)
|
|
@@ -553,7 +558,9 @@ def reset_adp(model, bfactor=None, adp_mode="iso"):
|
|
|
553
558
|
if adp_mode == "iso" or (adp_mode == "fix" and bfactor is not None):
|
|
554
559
|
cra.atom.aniso = gemmi.SMat33f(0,0,0,0,0,0)
|
|
555
560
|
elif adp_mode == "aniso":
|
|
556
|
-
if
|
|
561
|
+
if cra.atom.aniso.nonzero() and bfactor is None: # just in case
|
|
562
|
+
cra.atom.b_iso = numpy.mean(cra.atom.aniso.calculate_eigenvalues()) * u_to_b
|
|
563
|
+
else:
|
|
557
564
|
u = cra.atom.b_iso * b_to_u
|
|
558
565
|
cra.atom.aniso = gemmi.SMat33f(u, u, u, 0, 0, 0)
|
|
559
566
|
# reset_adp()
|
|
@@ -630,7 +637,7 @@ def to_dataframe(st):
|
|
|
630
637
|
for cra in m.all():
|
|
631
638
|
c,r,a = cra.chain, cra.residue, cra.atom
|
|
632
639
|
# TODO need support r.het_flag, r.flag, a.calc_flag, a.flag, a.serial?
|
|
633
|
-
app("model", m.
|
|
640
|
+
app("model", m.num)
|
|
634
641
|
app("chain", c.name)
|
|
635
642
|
app("resn", r.name)
|
|
636
643
|
app("subchain", r.subchain)
|
|
@@ -665,8 +672,8 @@ def from_dataframe(df, st=None): # Slow!
|
|
|
665
672
|
for i in range(len(st)):
|
|
666
673
|
del st[0]
|
|
667
674
|
|
|
668
|
-
for
|
|
669
|
-
st.add_model(gemmi.Model(
|
|
675
|
+
for m_num, dm in df.groupby("model"):
|
|
676
|
+
st.add_model(gemmi.Model(m_num))
|
|
670
677
|
m = st[-1]
|
|
671
678
|
for c_name, dc in dm.groupby("chain"):
|
|
672
679
|
m.add_chain(gemmi.Chain(c_name))
|
|
@@ -704,7 +711,7 @@ def from_dataframe(df, st=None): # Slow!
|
|
|
704
711
|
|
|
705
712
|
def st_from_positions(positions, bs=None, qs=None):
|
|
706
713
|
st = gemmi.Structure()
|
|
707
|
-
st.add_model(gemmi.Model(
|
|
714
|
+
st.add_model(gemmi.Model(1))
|
|
708
715
|
st[0].add_chain(gemmi.Chain("A"))
|
|
709
716
|
c = st[0][0]
|
|
710
717
|
if bs is None: bs = (0. for _ in range(len(positions)))
|
|
@@ -727,7 +734,7 @@ def st_from_positions(positions, bs=None, qs=None):
|
|
|
727
734
|
|
|
728
735
|
def invert_model(st):
|
|
729
736
|
# invert x-axis
|
|
730
|
-
A =
|
|
737
|
+
A = st.cell.orth.mat.array
|
|
731
738
|
center = numpy.sum(A,axis=1) / 2
|
|
732
739
|
center = gemmi.Vec3(*center)
|
|
733
740
|
mat = gemmi.Mat33([[-1,0,0],[0,1,0],[0,0,1]])
|
|
@@ -742,14 +749,14 @@ def cx_to_mx(ss): #SmallStructure to Structure
|
|
|
742
749
|
st = gemmi.Structure()
|
|
743
750
|
st.spacegroup_hm = ss.spacegroup.xhm()
|
|
744
751
|
st.cell = ss.cell
|
|
745
|
-
st.add_model(gemmi.Model(
|
|
752
|
+
st.add_model(gemmi.Model(1))
|
|
746
753
|
st[-1].add_chain(gemmi.Chain("A"))
|
|
747
754
|
st[-1][-1].add_residue(gemmi.Residue())
|
|
748
755
|
st[-1][-1][-1].seqid.num = 1
|
|
749
756
|
st[-1][-1][-1].name = "00"
|
|
750
757
|
|
|
751
758
|
ruc = ss.cell.reciprocal()
|
|
752
|
-
cif2cart = ss.cell.
|
|
759
|
+
cif2cart = ss.cell.orth.mat.multiply_by_diagonal(gemmi.Vec3(ruc.a, ruc.b, ruc.c))
|
|
753
760
|
as_smat33f = lambda x: gemmi.SMat33f(x.u11, x.u22, x.u33, x.u12, x.u13, x.u23)
|
|
754
761
|
|
|
755
762
|
for site in ss.sites:
|
servalcat/utils/refmac.py
CHANGED
|
@@ -22,7 +22,7 @@ from servalcat.utils import fileio
|
|
|
22
22
|
|
|
23
23
|
re_version = re.compile("#.* Refmac *version ([^ ]+) ")
|
|
24
24
|
re_error = re.compile('(warn|error *[:]|error *==|^error)', re.IGNORECASE)
|
|
25
|
-
re_outlier_start = re.compile("\*\*\*\*.*outliers")
|
|
25
|
+
re_outlier_start = re.compile(r"\*\*\*\*.*outliers")
|
|
26
26
|
|
|
27
27
|
def check_version(exe="refmac5"):
|
|
28
28
|
ver = ()
|
servalcat/utils/restraints.py
CHANGED
|
@@ -10,7 +10,6 @@ from servalcat.utils import logger
|
|
|
10
10
|
from servalcat.refmac import refmac_keywords
|
|
11
11
|
from servalcat import ext
|
|
12
12
|
import os
|
|
13
|
-
import io
|
|
14
13
|
import gemmi
|
|
15
14
|
import string
|
|
16
15
|
import random
|
|
@@ -87,14 +86,13 @@ def load_monomer_library(st, monomer_dir=None, cif_files=None, stop_for_unknowns
|
|
|
87
86
|
if cif_files is None:
|
|
88
87
|
cif_files = []
|
|
89
88
|
|
|
89
|
+
monlib = gemmi.MonLib()
|
|
90
90
|
if monomer_dir and not ignore_monomer_dir:
|
|
91
91
|
if not os.path.isdir(monomer_dir):
|
|
92
92
|
raise RuntimeError("not a directory: {}".format(monomer_dir))
|
|
93
93
|
|
|
94
94
|
logger.writeln("Reading monomers from {}".format(monomer_dir))
|
|
95
|
-
monlib
|
|
96
|
-
else:
|
|
97
|
-
monlib = gemmi.MonLib()
|
|
95
|
+
monlib.read_monomer_lib(monomer_dir, resnames, logger)
|
|
98
96
|
|
|
99
97
|
for f in cif_files:
|
|
100
98
|
logger.writeln("Reading monomer: {}".format(f))
|
|
@@ -109,8 +107,8 @@ def load_monomer_library(st, monomer_dir=None, cif_files=None, stop_for_unknowns
|
|
|
109
107
|
|
|
110
108
|
# Check if bond length values are included
|
|
111
109
|
# This is to fail if cif file is e.g. from PDB website
|
|
112
|
-
if
|
|
113
|
-
raise RuntimeError("
|
|
110
|
+
if b.find_values("_chem_comp_bond.comp_id") and not b.find_values("_chem_comp_bond.value_dist"):
|
|
111
|
+
raise RuntimeError(f"Bond length information for {name} is missing from {f}. Please generate restraints using a tool like acedrg.")
|
|
114
112
|
|
|
115
113
|
for row in b.find("_chem_link.", ["id"]):
|
|
116
114
|
link_id = row.str(0)
|
|
@@ -148,7 +146,7 @@ def load_monomer_library(st, monomer_dir=None, cif_files=None, stop_for_unknowns
|
|
|
148
146
|
logger.writeln(" it is strongly recommended to generate them using AceDRG.")
|
|
149
147
|
|
|
150
148
|
if update_old_atom_names:
|
|
151
|
-
|
|
149
|
+
monlib.update_old_atom_names(st, logger)
|
|
152
150
|
|
|
153
151
|
if params:
|
|
154
152
|
update_torsions(monlib, params.get("restr", {}).get("torsion_include", {}))
|
|
@@ -158,6 +156,7 @@ def load_monomer_library(st, monomer_dir=None, cif_files=None, stop_for_unknowns
|
|
|
158
156
|
|
|
159
157
|
def fix_elements_in_model(monlib, st):
|
|
160
158
|
monlib_els = {m: {a.id: a.el for a in monlib.monomers[m].atoms} for m in monlib.monomers}
|
|
159
|
+
lookup = {x.atom: x for x in st[0].all()}
|
|
161
160
|
for chain in st[0]:
|
|
162
161
|
for res in chain:
|
|
163
162
|
d = monlib_els.get(res.name)
|
|
@@ -167,7 +166,7 @@ def fix_elements_in_model(monlib, st):
|
|
|
167
166
|
continue
|
|
168
167
|
el = d[at.name]
|
|
169
168
|
if at.element != el:
|
|
170
|
-
logger.writeln(f"WARNING: correcting element of {
|
|
169
|
+
logger.writeln(f"WARNING: correcting element of {lookup[at]} to {el.name}")
|
|
171
170
|
at.element = el
|
|
172
171
|
# correct_elements_in_model()
|
|
173
172
|
|
|
@@ -334,10 +333,9 @@ def prepare_topology(st, monlib, h_change, ignore_unknown_links=False, raise_err
|
|
|
334
333
|
keywords = []
|
|
335
334
|
# these checks can be done after sorting links
|
|
336
335
|
logger.writeln("Creating restraints..")
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
for l in sio.getvalue().splitlines(): logger.writeln(" " + l)
|
|
336
|
+
with logger.with_prefix(" "):
|
|
337
|
+
topo = gemmi.prepare_topology(st, monlib, h_change=h_change, warnings=logger, reorder=False,
|
|
338
|
+
ignore_unknown_links=ignore_unknown_links, use_cispeps=use_cispeps)
|
|
341
339
|
unknown_cc = set()
|
|
342
340
|
link_related = set()
|
|
343
341
|
nan_hydr = set()
|
|
@@ -665,34 +663,35 @@ def prepare_ncs_restraints(st, rms_loc_nlen=5, min_nalign=10, max_rms_loc=2.0):
|
|
|
665
663
|
q = [x.name for x in pols[i][1]]
|
|
666
664
|
for j in range(i+1, len(pols)):
|
|
667
665
|
al = gemmi.align_sequence_to_polymer(q, pols[j][1], pt, scoring)
|
|
668
|
-
if 0: # debug
|
|
669
|
-
wrap_width = 100
|
|
670
|
-
logger.writeln(f"seq1: {pols[i][0].name} {pols[i][1][0].seqid}..{pols[i][1][-1].seqid}")
|
|
671
|
-
logger.writeln(f"seq2: {pols[j][0].name} {pols[j][1][0].seqid}..{pols[j][1][-1].seqid}")
|
|
672
|
-
logger.writeln(f"match_count: {al.match_count}")
|
|
673
|
-
s1 = gemmi.one_letter_code(q)
|
|
674
|
-
p_seq = gemmi.one_letter_code(pols[j][1].extract_sequence())
|
|
675
|
-
p1, p2 = al.add_gaps(s1, 1), al.add_gaps(p_seq, 2)
|
|
676
|
-
for k in range(0, len(p1), wrap_width):
|
|
677
|
-
logger.writeln(" seq. {}".format(p1[k:k+wrap_width]))
|
|
678
|
-
logger.writeln(" {}".format(al.match_string[k:k+wrap_width]))
|
|
679
|
-
logger.writeln(" model {}\n".format(p2[k:k+wrap_width]))
|
|
680
666
|
if al.match_count < min_nalign: continue
|
|
681
667
|
su = gemmi.calculate_superposition(pols[i][1], pols[j][1], pt, gemmi.SupSelect.All)
|
|
682
|
-
obj = ext.NcsList.Ncs(al, pols[i][1], pols[j][1])
|
|
668
|
+
obj = ext.NcsList.Ncs(al, pols[i][1], pols[j][1], pols[i][0].name, pols[j][0].name)
|
|
683
669
|
obj.calculate_local_rms(rms_loc_nlen)
|
|
684
670
|
if len(obj.local_rms) == 0 or numpy.all(numpy.isnan(obj.local_rms)):
|
|
685
671
|
continue
|
|
686
672
|
ave_local_rms = numpy.nanmean(obj.local_rms)
|
|
687
673
|
if ave_local_rms > max_rms_loc: continue
|
|
688
674
|
ncslist.ncss.append(obj)
|
|
689
|
-
al_res.append({"chain_1": "{} ({}..{})".format(
|
|
690
|
-
"chain_2": "{} ({}..{})".format(
|
|
675
|
+
al_res.append({"chain_1": "{} ({}..{})".format(obj.chains[0], obj.seqids[0][0], obj.seqids[-1][0]),
|
|
676
|
+
"chain_2": "{} ({}..{})".format(obj.chains[1], obj.seqids[0][1], obj.seqids[-1][1]),
|
|
691
677
|
"aligned": al.match_count,
|
|
692
678
|
"identity": al.calculate_identity(1),
|
|
693
679
|
"rms": su.rmsd,
|
|
694
680
|
"ave(rmsloc)": ave_local_rms,
|
|
695
681
|
})
|
|
682
|
+
if al_res[-1]["identity"] < 100:
|
|
683
|
+
wrap_width = 100
|
|
684
|
+
logger.writeln(f"seq1: {pols[i][0].name} {pols[i][1][0].seqid}..{pols[i][1][-1].seqid}")
|
|
685
|
+
logger.writeln(f"seq2: {pols[j][0].name} {pols[j][1][0].seqid}..{pols[j][1][-1].seqid}")
|
|
686
|
+
logger.writeln(f"match_count: {al.match_count} (identity: {al_res[-1]['identity']:.2f})")
|
|
687
|
+
s1 = gemmi.one_letter_code(q)
|
|
688
|
+
p_seq = gemmi.one_letter_code(pols[j][1].extract_sequence())
|
|
689
|
+
p1, p2 = al.add_gaps(s1, 1), al.add_gaps(p_seq, 2)
|
|
690
|
+
for k in range(0, len(p1), wrap_width):
|
|
691
|
+
logger.writeln(" seq1 {}".format(p1[k:k+wrap_width]))
|
|
692
|
+
logger.writeln(" {}".format(al.match_string[k:k+wrap_width]))
|
|
693
|
+
logger.writeln(" seq2 {}\n".format(p2[k:k+wrap_width]))
|
|
694
|
+
|
|
696
695
|
ncslist.set_pairs()
|
|
697
696
|
df = pandas.DataFrame(al_res)
|
|
698
697
|
df.index += 1
|
|
@@ -755,7 +754,7 @@ class MetalCoordination:
|
|
|
755
754
|
logger.writeln(" (from ener_lib)")
|
|
756
755
|
else:
|
|
757
756
|
logger.writeln(" ".join("{:.4f} ({} coord)".format(x["median"], x["coord"]) for x in vals))
|
|
758
|
-
ideals[el] = [(x["median"], x["mad"]) for x in vals if x["mad"] > 0]
|
|
757
|
+
ideals[el] = [(x["median"], max(0.02, x["mad"]*1.5)) for x in vals if x["mad"] > 0]
|
|
759
758
|
logger.writeln("")
|
|
760
759
|
for i, am in enumerate(coords[metal]):
|
|
761
760
|
logger.writeln(" site {}: {}".format(i+1, lookup[am]))
|
servalcat/utils/symmetry.py
CHANGED
|
@@ -55,7 +55,7 @@ def ncsops_from_args(args, cell, map_and_start=None, st=None, helical_min_n=None
|
|
|
55
55
|
start_xyz = numpy.zeros(3)
|
|
56
56
|
|
|
57
57
|
if args.center is None:
|
|
58
|
-
A =
|
|
58
|
+
A = cell.orth.mat.array
|
|
59
59
|
center = numpy.sum(A, axis=1) / 2 #+ start_xyz
|
|
60
60
|
logger.writeln("Center: {}".format(center))
|
|
61
61
|
else:
|
|
@@ -156,7 +156,7 @@ def show_operators_axis_angle(ops):
|
|
|
156
156
|
def show_ncs_operators_axis_angle(ops):
|
|
157
157
|
# ops: List of gemmi.NcsOp
|
|
158
158
|
for i, op in enumerate(ops):
|
|
159
|
-
op2 =
|
|
159
|
+
op2 = op.tr.mat.array
|
|
160
160
|
ax, ang = generate_operators.Rotation2AxisAngle_general(op2)
|
|
161
161
|
axlab = "[{: .4f}, {: .4f}, {: .4f}]".format(*ax)
|
|
162
162
|
trlab = "[{: 9.4f}, {: 9.4f}, {: 9.4f}]".format(*op.tr.vec.tolist())
|
|
@@ -210,7 +210,7 @@ def generate_helical_operators(start_xyz, center, axsym, deltaphi, deltaz, axis1
|
|
|
210
210
|
|
|
211
211
|
def make_NcsOps_from_matrices(matrices, cell=None, center=None):
|
|
212
212
|
if center is None:
|
|
213
|
-
A =
|
|
213
|
+
A = cell.orth.mat.array
|
|
214
214
|
center = numpy.sum(A,axis=1) / 2
|
|
215
215
|
|
|
216
216
|
center = gemmi.Vec3(*center)
|
|
@@ -225,9 +225,9 @@ def make_NcsOps_from_matrices(matrices, cell=None, center=None):
|
|
|
225
225
|
# make_NcsOps_from_matrices()
|
|
226
226
|
|
|
227
227
|
def find_center_of_origin(mat, vec): # may not be unique.
|
|
228
|
-
tmp = numpy.identity(3) - numpy.array(mat)
|
|
228
|
+
tmp = numpy.identity(3) - numpy.array(mat.array)
|
|
229
229
|
ret = numpy.dot(numpy.linalg.pinv(tmp), vec.tolist())
|
|
230
|
-
resid = vec.tolist() - (numpy.dot(mat, -ret) + ret)
|
|
230
|
+
resid = vec.tolist() - (numpy.dot(mat.array, -ret) + ret)
|
|
231
231
|
return gemmi.Vec3(*ret), gemmi.Vec3(*resid)
|
|
232
232
|
# find_center_of_origin()
|
|
233
233
|
|
servalcat/xtal/french_wilson.py
CHANGED
|
@@ -101,14 +101,14 @@ def determine_Sigma_and_aniso(hkldata):
|
|
|
101
101
|
S = hkldata.binned_df.loc[i_bin, "S"]
|
|
102
102
|
f0 = numpy.nansum(integr.ll_int(hkldata.df.I.to_numpy()[idxes], hkldata.df.SIGI.to_numpy()[idxes], k_ani[idxes],
|
|
103
103
|
S * hkldata.df.epsilon.to_numpy()[idxes],
|
|
104
|
-
|
|
104
|
+
numpy.zeros(len(idxes)), hkldata.df.centric.to_numpy()[idxes]+1))
|
|
105
105
|
shift = numpy.exp(ll_shift_bin_S(hkldata.df.I.to_numpy()[idxes], hkldata.df.SIGI.to_numpy()[idxes], k_ani[idxes],
|
|
106
106
|
S, hkldata.df.centric.to_numpy()[idxes]+1, hkldata.df.epsilon.to_numpy()[idxes]))
|
|
107
107
|
for k in range(3):
|
|
108
108
|
ss = shift**(1. / 2**k)
|
|
109
109
|
f1 = numpy.nansum(integr.ll_int(hkldata.df.I.to_numpy()[idxes], hkldata.df.SIGI.to_numpy()[idxes], k_ani[idxes],
|
|
110
110
|
S * ss * hkldata.df.epsilon.to_numpy()[idxes],
|
|
111
|
-
|
|
111
|
+
numpy.zeros(len(idxes)), hkldata.df.centric.to_numpy()[idxes]+1))
|
|
112
112
|
#logger.writeln("bin {:3d} f0 = {:.3e} shift = {:.3e} df = {:.3e}".format(i_bin, f0, ss, f1 - f0))
|
|
113
113
|
if f1 < f0:
|
|
114
114
|
hkldata.binned_df.loc[i_bin, "S"] = S * ss
|
|
@@ -145,7 +145,7 @@ def ll_all_B(x, ssqmat, hkldata, adpdirs):
|
|
|
145
145
|
for i_bin, idxes in hkldata.binned():
|
|
146
146
|
ret += numpy.nansum(integr.ll_int(hkldata.df.I.to_numpy()[idxes], hkldata.df.SIGI.to_numpy()[idxes], k_ani[idxes],
|
|
147
147
|
hkldata.binned_df.S[i_bin] * hkldata.df.epsilon.to_numpy()[idxes],
|
|
148
|
-
|
|
148
|
+
numpy.zeros(len(idxes)), hkldata.df.centric.to_numpy()[idxes]+1))
|
|
149
149
|
return ret
|
|
150
150
|
|
|
151
151
|
def ll_shift_bin_S(Io, sigIo, k_ani, S, c, eps, exp_trans=True):
|
|
@@ -176,10 +176,12 @@ def ll_shift_B(x, ssqmat, hkldata, adpdirs):
|
|
|
176
176
|
|
|
177
177
|
def expected_F_from_int(Io, sigo, k_ani, eps, c, S):
|
|
178
178
|
to = Io / sigo - sigo / c / k_ani**2 / S / eps
|
|
179
|
+
tf = numpy.zeros(Io.size)
|
|
180
|
+
sig1 = numpy.ones(Io.size)
|
|
179
181
|
k_num = numpy.where(c == 1, 0.5, 0.)
|
|
180
|
-
F = numpy.sqrt(sigo) * ext.integ_J_ratio(k_num, k_num - 0.5, False, to,
|
|
182
|
+
F = numpy.sqrt(sigo) * ext.integ_J_ratio(k_num, k_num - 0.5, False, to, tf, sig1, c,
|
|
181
183
|
integr.exp2_threshold, integr.h, integr.N, integr.ewmax)
|
|
182
|
-
Fsq = sigo * ext.integ_J_ratio(k_num + 0.5, k_num - 0.5, False, to,
|
|
184
|
+
Fsq = sigo * ext.integ_J_ratio(k_num + 0.5, k_num - 0.5, False, to, tf, sig1, c,
|
|
183
185
|
integr.exp2_threshold, integr.h, integr.N, integr.ewmax)
|
|
184
186
|
varF = Fsq - F**2
|
|
185
187
|
return F, numpy.sqrt(varF)
|