PyPI - servalcat - Versions diffs - 0.4.88__cp310-cp310-win_amd64.whl → 0.4.100__cp310-cp310-win_amd64.whl - Mend

servalcat 0.4.88__cp310-cp310-win_amd64.whl → 0.4.100__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of servalcat might be problematic. Click here for more details.

Files changed (31) hide show

servalcat/__init__.py +2 -2
servalcat/ext.cp310-win_amd64.pyd +0 -0
servalcat/refine/refine.py +125 -42
servalcat/refine/refine_geom.py +24 -11
servalcat/refine/refine_spa.py +55 -31
servalcat/refine/refine_xtal.py +31 -22
servalcat/refine/spa.py +12 -4
servalcat/refine/xtal.py +15 -12
servalcat/refmac/refmac_wrapper.py +3 -11
servalcat/spa/fofc.py +9 -3
servalcat/spa/fsc.py +8 -10
servalcat/spa/run_refmac.py +16 -11
servalcat/spa/translate.py +2 -2
servalcat/utils/commands.py +154 -4
servalcat/utils/fileio.py +15 -8
servalcat/utils/hkl.py +63 -26
servalcat/utils/logger.py +25 -1
servalcat/utils/maps.py +2 -2
servalcat/utils/model.py +22 -15
servalcat/utils/refmac.py +1 -1
servalcat/utils/restraints.py +27 -28
servalcat/utils/symmetry.py +5 -5
servalcat/xtal/french_wilson.py +7 -5
servalcat/xtal/sigmaa.py +69 -45
servalcat/xtal/twin.py +73 -44
{servalcat-0.4.88.dist-info → servalcat-0.4.100.dist-info}/METADATA +4 -4
servalcat-0.4.100.dist-info/RECORD +45 -0
{servalcat-0.4.88.dist-info → servalcat-0.4.100.dist-info}/WHEEL +1 -1
servalcat-0.4.88.dist-info/RECORD +0 -45
{servalcat-0.4.88.dist-info → servalcat-0.4.100.dist-info}/entry_points.txt +0 -0
{servalcat-0.4.88.dist-info → servalcat-0.4.100.dist-info}/licenses/LICENSE +0 -0

servalcat/utils/hkl.py CHANGED Viewed

@@ -7,7 +7,6 @@ Mozilla Public License, version 2.0; see LICENSE.
 """
 from __future__ import absolute_import, division, print_function, generators
 import numpy
-import numpy.lib.recfunctions
 import scipy.optimize
 import pandas
 import gemmi
@@ -17,27 +16,28 @@ dtypes64 = dict(i=numpy.int64, u=numpy.uint64, f=numpy.float64, c=numpy.complex1
 to64 = lambda x: x.astype(dtypes64.get(x.dtype.kind, x.dtype))
 def r_factor(fo, fc):
-    if fo.size == 0:
+    denom = numpy.nansum(fo)
+    if denom == 0:
         return numpy.nan
-    return numpy.nansum(numpy.abs(fo-fc)) / numpy.nansum(fo)
+    return numpy.nansum(numpy.abs(fo-fc)) / denom
 def correlation(obs, calc):
-    if obs.size == 0:
-        return numpy.nan
     sel = numpy.isfinite(obs)
+    if obs.size == 0 or numpy.all(~sel):
+        return numpy.nan
     return numpy.corrcoef(obs[sel], calc[sel])[0,1]
 def df_from_asu_data(asu_data, label):
-    df = pandas.DataFrame(data=asu_data.miller_array,
+    df = pandas.DataFrame(data=asu_data.miller_array.astype(numpy.int32),
                           columns=["H","K","L"])
-    if asu_data.value_array.dtype.names == ('value', 'sigma'):
-        df[label] = to64(asu_data.value_array["value"])
-        df["SIG"+label] = to64(asu_data.value_array["sigma"])
+    if type(asu_data) is gemmi.ValueSigmaAsuData:
+        df[label] = to64(asu_data.value_array[:,0])
+        df["SIG"+label] = to64(asu_data.value_array[:,1])
     else:
         df[label] = to64(asu_data.value_array)
     return df
 def df_from_raw(miller_array, value_array, label):
-    df = pandas.DataFrame(data=miller_array,
+    df = pandas.DataFrame(data=miller_array.astype(numpy.int32),
                           columns=["H","K","L"])
     df[label] = to64(value_array)
     return df
@@ -93,7 +93,7 @@ def hkldata_from_mtz(mtz, labels, newlabels=None, require_types=None):
         if mismatches:
             raise RuntimeError("MTZ column types mismatch: {}".format(" ".join(mismatches)))
-    df = pandas.DataFrame(data=numpy.array(mtz, copy=False), columns=mtz.column_labels())
+    df = pandas.DataFrame(data=mtz.array, columns=mtz.column_labels())
     df = df.astype({col: 'int32' for col in col_types if col_types[col] == "H"})
     df = df.astype({col: 'Int64' for col in col_types if col_types[col] in ("B", "Y", "I")}) # pandas's nullable int
     for lab in set(mtz.column_labels()).difference(labels+["H","K","L"]):
@@ -176,7 +176,7 @@ def mtz_selected(mtz, columns):
                         dataset_id=col_dict[col].dataset_id, expand_data=False)
     idxes = [col_idxes[col] for col in columns]
-    data = numpy.array(mtz, copy=False)[:, idxes]
+    data = mtz.array[:, idxes]
     mtz2.set_data(data)
     return mtz2
 # mtz_selected()
@@ -199,6 +199,8 @@ def decide_n_bins(n_per_bin, s_array, power=2, min_bins=1, max_bins=50):
 def fft_map(cell, sg, miller_array, data, grid_size=None, sample_rate=3):
     if data is not None:
         data = data.astype(numpy.complex64) # we may want to keep complex128?
+    if type(data) is pandas.core.series.Series:
+        data = data.to_numpy()
     asu = gemmi.ComplexAsuData(cell, sg, miller_array, data)
     if grid_size is None:
         ma = asu.transform_f_phi_to_map(sample_rate=sample_rate, exact_size=(0, 0, 0)) # half_l=True
@@ -223,7 +225,7 @@ class HklData:
     def switch_to_asu(self):
         # Need to care phases
         assert not any(numpy.iscomplexobj(self.df[x]) for x in self.df)
-        hkl = self.miller_array().to_numpy()
+        hkl = self.miller_array()
         self.sg.switch_to_asu(hkl)
         self.df[["H","K","L"]] = hkl
         # in some environment type changes to int64 even though hkl's dtype is int32
@@ -265,11 +267,11 @@ class HklData:
     # merge_asu_data()
     def miller_array(self):
-        return self.df[["H","K","L"]]
+        return self.df[["H","K","L"]].to_numpy()
     def s_array(self):
         hkl = self.miller_array()
-        return numpy.dot(hkl, self.cell.fractionalization_matrix)
+        return numpy.dot(hkl, self.cell.frac.mat.array)
     def ssq_mat(self):
         # k_aniso = exp(-s^T B_aniso s / 4)
@@ -288,8 +290,8 @@ class HklData:
             s2 = 1 / self.d_spacings()**2
             return numpy.exp(-b_iso / 4 * s2)
         if b_cart is not None:
-            b_star = b_cart.transformed_by(self.cell.fractionalization_matrix)
-            return numpy.exp(-b_star.r_u_r(self.miller_array().to_numpy()) / 4)
+            b_star = b_cart.transformed_by(self.cell.frac.mat)
+            return numpy.exp(-b_star.r_u_r(self.miller_array()) / 4)
     def calc_d(self):
         self.df["d"] = self.cell.calculate_d_array(self.miller_array())
@@ -314,8 +316,10 @@ class HklData:
         self.df.sort_values("d", ascending=ascending, inplace=True)
     # sort_by_resolution()
-    def d_min_max(self):
+    def d_min_max(self, labs=None):
         d = self.d_spacings()
+        if labs:
+            d = d[~self.df[labs].isna().any(axis=1)]
         return numpy.min(d), numpy.max(d)
     # d_min_max()
@@ -489,6 +493,31 @@ class HklData:
             self.df = self.df[~sel]
     # remove_nonpositive()
+    def mask_invalid_obs_values(self, labels):
+        assert 1 < len(labels) < 6
+        assert labels[1].startswith("SIG")
+        def do_mask(label, target_labels):
+            sel = self.df[label] <= 0
+            n_bad = sel.sum()
+            if n_bad > 0:
+                logger.writeln("Removing {} reflections with {}<=0".format(n_bad, label))
+                self.df.loc[sel, target_labels] = numpy.nan
+            # If any element within target_labels is non-finite, mask all elements
+            self.df.loc[(~numpy.isfinite(self.df[target_labels])).any(axis=1), target_labels] = numpy.nan
+        if len(labels) < 4: # F/SIGF or I/SIGI
+            if labels[0].startswith("F"):
+                do_mask(labels[0], labels[:2]) # bad F
+            do_mask(labels[1], labels[:2]) # bad sigma
+        else: # I(+)/SIGI(+)/I(-)/SIGI(-) or F...
+            assert labels[3].startswith("SIG")
+            if labels[0].startswith("F"):
+                do_mask(labels[0], labels[:2]) # bad F+
+                do_mask(labels[2], labels[2:4]) # bad F-
+            do_mask(labels[1], labels[:2]) # bad sigma+
+            do_mask(labels[3], labels[2:4]) # bad sigma-
+    # mask_invalid_obs_values()
     def remove_systematic_absences(self):
         is_absent = self.sg.operations().systematic_absences(self.miller_array())
         n_absent = numpy.sum(is_absent)
@@ -497,12 +526,22 @@ class HklData:
             self.df = self.df[~is_absent]
     # remove_systematic_absences()
-    def merge_anomalous(self, labs, newlabs):
+    def merge_anomalous(self, labs, newlabs, method="weighted"):
+        assert method in ("weighted", "simple")
         assert len(labs) == 4 # i+,sigi+,i-,sigi- for example
         assert len(newlabs) == 2
-        # skipna=True is default, so missing value is handled nicely.
-        self.df[newlabs[0]] = self.df[[labs[0], labs[2]]].mean(axis=1)
-        self.df[newlabs[1]] = self.df[[labs[1], labs[3]]].pow(2).mean(axis=1).pow(0.5)
+        if method == "simple":
+            # skipna=True is default, so missing value is handled nicely.
+            self.df[newlabs[0]] = self.df[[labs[0], labs[2]]].mean(axis=1)
+            self.df[newlabs[1]] = self.df[[labs[1], labs[3]]].pow(2).mean(axis=1).pow(0.5)
+        else:
+            obs = self.df[[labs[0], labs[2]]].to_numpy()
+            weights = 1. / self.df[[labs[1], labs[3]]].to_numpy()**2
+            sum_w = numpy.nansum(weights, axis=1)
+            sum_w[sum_w == 0] = numpy.nan # mask when both are nan
+            self.df[newlabs[0]] = numpy.nansum(obs * weights, axis=1) / sum_w
+            self.df[newlabs[1]] = numpy.sqrt(1. / sum_w)
+    # merge_anomalous()
     def as_asu_data(self, label=None, data=None, label_sigma=None):
         if label is None: assert data is not None
@@ -511,9 +550,7 @@ class HklData:
         if label_sigma is not None:
             assert data is None
             assert not numpy.iscomplexobj(self.df[label])
-            sigma = self.df[label_sigma]
-            data = numpy.lib.recfunctions.unstructured_to_structured(self.df[[label,label_sigma]].to_numpy(),
-                                                                     numpy.dtype([("value", numpy.float32), ("sigma", numpy.float32)]))
+            data = self.df[[label,label_sigma]].to_numpy()
         elif data is None:
             data = self.df[label]
@@ -532,7 +569,7 @@ class HklData:
     def fft_map(self, label=None, data=None, grid_size=None, sample_rate=3):
         if data is None:
-            data = self.df[label]
+            data = self.df[label].to_numpy()
         return fft_map(self.cell, self.sg, self.miller_array(), data, grid_size, sample_rate)
     # fft_map()

servalcat/utils/logger.py CHANGED Viewed

@@ -18,12 +18,15 @@ class Logger(object):
     def __init__(self, file_out=None, append=True):
         self.ofs = None
         self.stopped = False
+        self.prefix = ""
         if file_out:
             self.set_file(file_out, append)
     # __init__()
     def stop_logging(self): self.stopped = True
     def start_logging(self): self.stopped = False
+    def set_prefix(self, p): self.prefix = p
+    def clear_prefix(self): self.prefix = ""
     def set_file(self, file_out, append=True):
         try:
             self.ofs = open(file_out, "a" if append else "w")
@@ -33,6 +36,8 @@ class Logger(object):
     def write(self, l, end="", flush=True, fs=None, print_fs=sys.stdout):
         if self.stopped: return
+        if self.prefix:
+            l = "".join(self.prefix + x for x in l.splitlines(keepends=True))
         print(l, end=end, file=print_fs, flush=flush)
         for f in (self.ofs, fs):
             if f is not None:
@@ -69,6 +74,25 @@ close = _logger.close
 flush = _logger.flush
 stop = _logger.stop_logging
 start = _logger.start_logging
+set_prefix = _logger.set_prefix
+clear_prefix = _logger.clear_prefix
+def with_prefix(prefix):
+    class WithPrefix(object): # should keep original prefix and restore?
+        def __enter__(self):
+            _logger.set_prefix(prefix)
+            return _logger
+        def __exit__(self, exc_type, exc_val, exc_tb):
+            _logger.clear_prefix()
+    return WithPrefix()
+def silent():
+    class Silent(object):
+        def write(self, *args, **kwargs):
+            pass
+        def flush(self):
+            pass
+    return Silent()
 def dependency_versions():
     import gemmi

servalcat/utils/maps.py CHANGED Viewed

@@ -268,9 +268,9 @@ def optimize_peak(grid, ini_pos):
     logger.writeln("Finding peak using interpolation..")
     x = grid.unit_cell.fractionalize(ini_pos)
     logger.writeln("       x0: [{}, {}, {}]".format(*x.tolist()))
-    logger.writeln("       f0: {}".format(-grid.tricubic_interpolation(x)))
+    logger.writeln("       f0: {}".format(-grid.interpolate_value(x, order=3)))
-    res = scipy.optimize.minimize(fun=lambda x:-grid.tricubic_interpolation(gemmi.Fractional(*x)),
+    res = scipy.optimize.minimize(fun=lambda x:-grid.interpolate_value(gemmi.Fractional(*x), order=3),
                                   x0=x.tolist(),
                                   jac=lambda x:-numpy.array(grid.tricubic_interpolation_der(gemmi.Fractional(*x))[1:])
                                   )

servalcat/utils/model.py CHANGED Viewed

@@ -73,11 +73,12 @@ def remove_charge(sts):
 def check_atomsf(sts, source, mott_bethe=True):
     assert source in ("xray", "electron", "neutron")
     if source != "electron": mott_bethe = False
-    logger.writeln("Atomic scattering factors for {}".format("electron (Mott-Bethe)" if mott_bethe else source))
+    logger.writeln("Atomic scattering factors for {}".format("xray (use Mott-Bethe to convert to electrons)" if mott_bethe else source))
     if source != "xray" and not mott_bethe:
         logger.writeln("  Note that charges will be ignored")
     el_charges = {(cra.atom.element, cra.atom.charge) for st in sts for cra in st[0].all()}
     elems = {x[0] for x in el_charges}
+    tmp = {}
     if source == "xray" or mott_bethe:
         shown = set()
         for el, charge in sorted(el_charges, key=lambda x: (x[0].atomic_number, x[1])):
@@ -88,12 +89,16 @@ def check_atomsf(sts, source, mott_bethe=True):
                 charge = 0
             if (el, charge) in shown: continue
             label = el.name if charge == 0 else "{}{:+}".format(el.name, charge)
-            logger.writeln("  {} {}".format(label, tuple(sf.get_coefs())))
             shown.add((el, charge))
+            tmp[label] = {**{f"{k}{i+1}": x for k in ("a", "b") for i, x in enumerate(getattr(sf, k))}, "c": sf.c}
     else:
         for el in sorted(elems, key=lambda x: x.atomic_number):
-            sf = el.c4322 if source == "electron" else el.neutron92
-            logger.writeln("  {} {}".format(el.name, tuple(sf.get_coefs())))
+            if source == "electron":
+                tmp[el.name] = {f"{k}{i+1}": x for k in ("a", "b") for i, x in enumerate(getattr(el.c4322, k))}
+            else:
+                tmp[el.name] = {"a": el.neutron92.get_coefs()[0]}
+    with logger.with_prefix("  "):
+        logger.writeln(pandas.DataFrame(tmp).T.to_string())
     logger.writeln("")
 # check_atomsf()
@@ -347,8 +352,8 @@ def translate_into_box(st, origin=None, apply_shift=True):
     if origin is None: origin = gemmi.Position(0,0,0)
     # apply unit cell translations to put model into a box (unit cell)
-    omat = numpy.array(st.cell.orthogonalization_matrix)
-    fmat = numpy.array(st.cell.fractionalization_matrix).transpose()
+    omat = st.cell.orth.mat.array
+    fmat = st.cell.frac.mat.array.transpose()
     com = numpy.array((st[0].calculate_center_of_mass() - origin).tolist())
     shift = sum([omat[:,i]*numpy.floor(1-numpy.dot(com, fmat[:,i])) for i in range(3)])
     tr = gemmi.Transform(gemmi.Mat33(), gemmi.Vec3(*shift))
@@ -443,7 +448,7 @@ def find_special_positions(st, special_pos_threshold=0.2, fix_occ=True, fix_pos=
             logger.writeln("  correcting aniso= {}".format(tostr(atom.aniso.elements_pdb())))
             logger.writeln("        aniso_viol= {}".format(tostr(diff)))
-        mats = [st.cell.orth.combine(st.cell.images[i-1]).combine(st.cell.frac).mat for i in images]
+        mats = [st.cell.orth.combine(st.cell.images[i-1]).combine(st.cell.frac).mat.array for i in images]
         mat_total = (numpy.identity(3) + sum(numpy.array(m) for m in mats)) / n_images
         mat_total_aniso = (numpy.identity(6) + sum(mat33_as66(m.tolist()) for m in mats)) / n_images
         mat_total_aniso = numpy.linalg.pinv(mat_total_aniso)
@@ -553,7 +558,9 @@ def reset_adp(model, bfactor=None, adp_mode="iso"):
         if adp_mode == "iso" or (adp_mode == "fix" and bfactor is not None):
             cra.atom.aniso = gemmi.SMat33f(0,0,0,0,0,0)
         elif adp_mode == "aniso":
-            if not cra.atom.aniso.nonzero() or bfactor is not None:
+            if cra.atom.aniso.nonzero() and bfactor is None: # just in case
+                cra.atom.b_iso = numpy.mean(cra.atom.aniso.calculate_eigenvalues()) * u_to_b
+            else:
                 u = cra.atom.b_iso * b_to_u
                 cra.atom.aniso = gemmi.SMat33f(u, u, u, 0, 0, 0)
 # reset_adp()
@@ -630,7 +637,7 @@ def to_dataframe(st):
         for cra in m.all():
             c,r,a = cra.chain, cra.residue, cra.atom
             # TODO need support r.het_flag, r.flag, a.calc_flag, a.flag, a.serial?
-            app("model", m.name)
+            app("model", m.num)
             app("chain", c.name)
             app("resn", r.name)
             app("subchain", r.subchain)
@@ -665,8 +672,8 @@ def from_dataframe(df, st=None): # Slow!
         for i in range(len(st)):
             del st[0]
-    for m_name, dm in df.groupby("model"):
-        st.add_model(gemmi.Model(m_name))
+    for m_num, dm in df.groupby("model"):
+        st.add_model(gemmi.Model(m_num))
         m = st[-1]
         for c_name, dc in dm.groupby("chain"):
             m.add_chain(gemmi.Chain(c_name))
@@ -704,7 +711,7 @@ def from_dataframe(df, st=None): # Slow!
 def st_from_positions(positions, bs=None, qs=None):
     st = gemmi.Structure()
-    st.add_model(gemmi.Model("1"))
+    st.add_model(gemmi.Model(1))
     st[0].add_chain(gemmi.Chain("A"))
     c = st[0][0]
     if bs is None: bs = (0. for _ in range(len(positions)))
@@ -727,7 +734,7 @@ def st_from_positions(positions, bs=None, qs=None):
 def invert_model(st):
     # invert x-axis
-    A = numpy.array(st.cell.orthogonalization_matrix.tolist())
+    A = st.cell.orth.mat.array
     center = numpy.sum(A,axis=1) / 2
     center = gemmi.Vec3(*center)
     mat = gemmi.Mat33([[-1,0,0],[0,1,0],[0,0,1]])
@@ -742,14 +749,14 @@ def cx_to_mx(ss): #SmallStructure to Structure
     st = gemmi.Structure()
     st.spacegroup_hm = ss.spacegroup.xhm()
     st.cell = ss.cell
-    st.add_model(gemmi.Model("1"))
+    st.add_model(gemmi.Model(1))
     st[-1].add_chain(gemmi.Chain("A"))
     st[-1][-1].add_residue(gemmi.Residue())
     st[-1][-1][-1].seqid.num = 1
     st[-1][-1][-1].name = "00"
     ruc = ss.cell.reciprocal()
-    cif2cart = ss.cell.orthogonalization_matrix.multiply_by_diagonal(gemmi.Vec3(ruc.a, ruc.b, ruc.c))
+    cif2cart = ss.cell.orth.mat.multiply_by_diagonal(gemmi.Vec3(ruc.a, ruc.b, ruc.c))
     as_smat33f = lambda x: gemmi.SMat33f(x.u11, x.u22, x.u33, x.u12, x.u13, x.u23)
     for site in ss.sites:

servalcat/utils/refmac.py CHANGED Viewed

@@ -22,7 +22,7 @@ from servalcat.utils import fileio
 re_version = re.compile("#.* Refmac *version ([^ ]+) ")
 re_error = re.compile('(warn|error *[:]|error *==|^error)', re.IGNORECASE)
-re_outlier_start = re.compile("\*\*\*\*.*outliers")
+re_outlier_start = re.compile(r"\*\*\*\*.*outliers")
 def check_version(exe="refmac5"):
     ver = ()

servalcat/utils/restraints.py CHANGED Viewed

@@ -10,7 +10,6 @@ from servalcat.utils import logger
 from servalcat.refmac import refmac_keywords
 from servalcat import ext
 import os
-import io
 import gemmi
 import string
 import random
@@ -87,14 +86,13 @@ def load_monomer_library(st, monomer_dir=None, cif_files=None, stop_for_unknowns
     if cif_files is None:
         cif_files = []
+    monlib = gemmi.MonLib()
     if monomer_dir and not ignore_monomer_dir:
         if not os.path.isdir(monomer_dir):
             raise RuntimeError("not a directory: {}".format(monomer_dir))
         logger.writeln("Reading monomers from {}".format(monomer_dir))
-        monlib = gemmi.read_monomer_lib(monomer_dir, resnames, ignore_missing=True)
-    else:
-        monlib = gemmi.MonLib()
+        monlib.read_monomer_lib(monomer_dir, resnames, logger)
     for f in cif_files:
         logger.writeln("Reading monomer: {}".format(f))
@@ -109,8 +107,8 @@ def load_monomer_library(st, monomer_dir=None, cif_files=None, stop_for_unknowns
                 # Check if bond length values are included
                 # This is to fail if cif file is e.g. from PDB website
-                if len(atom_id_list) > 1 and not b.find_values("_chem_comp_bond.value_dist"):
-                    raise RuntimeError("{} does not contain bond length value for {}. You need to generate restraints (e.g. using acedrg).".format(f, name))
+                if b.find_values("_chem_comp_bond.comp_id") and not b.find_values("_chem_comp_bond.value_dist"):
+                    raise RuntimeError(f"Bond length information for {name} is missing from {f}. Please generate restraints using a tool like acedrg.")
             for row in b.find("_chem_link.", ["id"]):
                 link_id = row.str(0)
@@ -148,7 +146,7 @@ def load_monomer_library(st, monomer_dir=None, cif_files=None, stop_for_unknowns
             logger.writeln("         it is strongly recommended to generate them using AceDRG.")
     if update_old_atom_names:
-        logger.write(monlib.update_old_atom_names(st))
+        monlib.update_old_atom_names(st, logger)
     if params:
         update_torsions(monlib, params.get("restr", {}).get("torsion_include", {}))
@@ -158,6 +156,7 @@ def load_monomer_library(st, monomer_dir=None, cif_files=None, stop_for_unknowns
 def fix_elements_in_model(monlib, st):
     monlib_els = {m: {a.id: a.el for a in monlib.monomers[m].atoms} for m in monlib.monomers}
+    lookup = {x.atom: x for x in st[0].all()}
     for chain in st[0]:
         for res in chain:
             d = monlib_els.get(res.name)
@@ -167,7 +166,7 @@ def fix_elements_in_model(monlib, st):
                     continue
                 el = d[at.name]
                 if at.element != el:
-                    logger.writeln(f"WARNING: correcting element of {st[0].get_cra(at)} to {el.name}")
+                    logger.writeln(f"WARNING: correcting element of {lookup[at]} to {el.name}")
                     at.element = el
 # correct_elements_in_model()
@@ -334,10 +333,9 @@ def prepare_topology(st, monlib, h_change, ignore_unknown_links=False, raise_err
         keywords = []
     # these checks can be done after sorting links
     logger.writeln("Creating restraints..")
-    sio = io.StringIO()
-    topo = gemmi.prepare_topology(st, monlib, h_change=h_change, warnings=sio, reorder=False,
-                                  ignore_unknown_links=ignore_unknown_links, use_cispeps=use_cispeps)
-    for l in sio.getvalue().splitlines(): logger.writeln(" " + l)
+    with logger.with_prefix("  "):
+        topo = gemmi.prepare_topology(st, monlib, h_change=h_change, warnings=logger, reorder=False,
+                                      ignore_unknown_links=ignore_unknown_links, use_cispeps=use_cispeps)
     unknown_cc = set()
     link_related = set()
     nan_hydr = set()
@@ -665,34 +663,35 @@ def prepare_ncs_restraints(st, rms_loc_nlen=5, min_nalign=10, max_rms_loc=2.0):
             q = [x.name for x in pols[i][1]]
             for j in range(i+1, len(pols)):
                 al = gemmi.align_sequence_to_polymer(q, pols[j][1], pt, scoring)
-                if 0: # debug
-                    wrap_width = 100
-                    logger.writeln(f"seq1: {pols[i][0].name} {pols[i][1][0].seqid}..{pols[i][1][-1].seqid}")
-                    logger.writeln(f"seq2: {pols[j][0].name} {pols[j][1][0].seqid}..{pols[j][1][-1].seqid}")
-                    logger.writeln(f"match_count: {al.match_count}")
-                    s1 = gemmi.one_letter_code(q)
-                    p_seq = gemmi.one_letter_code(pols[j][1].extract_sequence())
-                    p1, p2 = al.add_gaps(s1, 1), al.add_gaps(p_seq, 2)
-                    for k in range(0, len(p1), wrap_width):
-                        logger.writeln(" seq.  {}".format(p1[k:k+wrap_width]))
-                        logger.writeln("       {}".format(al.match_string[k:k+wrap_width]))
-                        logger.writeln(" model {}\n".format(p2[k:k+wrap_width]))
                 if al.match_count < min_nalign: continue
                 su = gemmi.calculate_superposition(pols[i][1], pols[j][1], pt, gemmi.SupSelect.All)
-                obj = ext.NcsList.Ncs(al, pols[i][1], pols[j][1])
+                obj = ext.NcsList.Ncs(al, pols[i][1], pols[j][1], pols[i][0].name, pols[j][0].name)
                 obj.calculate_local_rms(rms_loc_nlen)
                 if len(obj.local_rms) == 0 or numpy.all(numpy.isnan(obj.local_rms)):
                     continue
                 ave_local_rms = numpy.nanmean(obj.local_rms)
                 if ave_local_rms > max_rms_loc: continue
                 ncslist.ncss.append(obj)
-                al_res.append({"chain_1": "{} ({}..{})".format(pols[i][0].name, pols[i][1][0].seqid, pols[i][1][-1].seqid),
-                               "chain_2": "{} ({}..{})".format(pols[j][0].name, pols[j][1][0].seqid, pols[j][1][-1].seqid),
+                al_res.append({"chain_1": "{} ({}..{})".format(obj.chains[0], obj.seqids[0][0], obj.seqids[-1][0]),
+                               "chain_2": "{} ({}..{})".format(obj.chains[1], obj.seqids[0][1], obj.seqids[-1][1]),
                                "aligned": al.match_count,
                                "identity": al.calculate_identity(1),
                                "rms": su.rmsd,
                                "ave(rmsloc)": ave_local_rms,
                                })
+                if al_res[-1]["identity"] < 100:
+                    wrap_width = 100
+                    logger.writeln(f"seq1: {pols[i][0].name} {pols[i][1][0].seqid}..{pols[i][1][-1].seqid}")
+                    logger.writeln(f"seq2: {pols[j][0].name} {pols[j][1][0].seqid}..{pols[j][1][-1].seqid}")
+                    logger.writeln(f"match_count: {al.match_count} (identity: {al_res[-1]['identity']:.2f})")
+                    s1 = gemmi.one_letter_code(q)
+                    p_seq = gemmi.one_letter_code(pols[j][1].extract_sequence())
+                    p1, p2 = al.add_gaps(s1, 1), al.add_gaps(p_seq, 2)
+                    for k in range(0, len(p1), wrap_width):
+                        logger.writeln(" seq1 {}".format(p1[k:k+wrap_width]))
+                        logger.writeln("      {}".format(al.match_string[k:k+wrap_width]))
+                        logger.writeln(" seq2 {}\n".format(p2[k:k+wrap_width]))
     ncslist.set_pairs()
     df = pandas.DataFrame(al_res)
     df.index += 1
@@ -755,7 +754,7 @@ class MetalCoordination:
                     logger.writeln(" (from ener_lib)")
                 else:
                     logger.writeln(" ".join("{:.4f} ({} coord)".format(x["median"], x["coord"]) for x in vals))
-                    ideals[el] = [(x["median"], x["mad"]) for x in vals if x["mad"] > 0]
+                    ideals[el] = [(x["median"], max(0.02, x["mad"]*1.5)) for x in vals if x["mad"] > 0]
             logger.writeln("")
             for i, am in enumerate(coords[metal]):
                 logger.writeln("  site {}: {}".format(i+1, lookup[am]))

servalcat/utils/symmetry.py CHANGED Viewed

@@ -55,7 +55,7 @@ def ncsops_from_args(args, cell, map_and_start=None, st=None, helical_min_n=None
         start_xyz = numpy.zeros(3)
     if args.center is None:
-        A = numpy.array(cell.orthogonalization_matrix.tolist())
+        A = cell.orth.mat.array
         center = numpy.sum(A, axis=1) / 2 #+ start_xyz
         logger.writeln("Center: {}".format(center))
     else:
@@ -156,7 +156,7 @@ def show_operators_axis_angle(ops):
 def show_ncs_operators_axis_angle(ops):
     # ops: List of gemmi.NcsOp
     for i, op in enumerate(ops):
-        op2 = numpy.array(op.tr.mat.tolist())
+        op2 = op.tr.mat.array
         ax, ang = generate_operators.Rotation2AxisAngle_general(op2)
         axlab = "[{: .4f}, {: .4f}, {: .4f}]".format(*ax)
         trlab = "[{: 9.4f}, {: 9.4f}, {: 9.4f}]".format(*op.tr.vec.tolist())
@@ -210,7 +210,7 @@ def generate_helical_operators(start_xyz, center, axsym, deltaphi, deltaz, axis1
 def make_NcsOps_from_matrices(matrices, cell=None, center=None):
     if center is None:
-        A = numpy.array(cell.orthogonalization_matrix.tolist())
+        A = cell.orth.mat.array
         center = numpy.sum(A,axis=1) / 2
     center = gemmi.Vec3(*center)
@@ -225,9 +225,9 @@ def make_NcsOps_from_matrices(matrices, cell=None, center=None):
 # make_NcsOps_from_matrices()
 def find_center_of_origin(mat, vec): # may not be unique.
-    tmp = numpy.identity(3) - numpy.array(mat)
+    tmp = numpy.identity(3) - numpy.array(mat.array)
     ret = numpy.dot(numpy.linalg.pinv(tmp), vec.tolist())
-    resid = vec.tolist() - (numpy.dot(mat, -ret) + ret)
+    resid = vec.tolist() - (numpy.dot(mat.array, -ret) + ret)
     return gemmi.Vec3(*ret), gemmi.Vec3(*resid)
 # find_center_of_origin()

servalcat/xtal/french_wilson.py CHANGED Viewed

@@ -101,14 +101,14 @@ def determine_Sigma_and_aniso(hkldata):
                 S = hkldata.binned_df.loc[i_bin, "S"]
                 f0 = numpy.nansum(integr.ll_int(hkldata.df.I.to_numpy()[idxes], hkldata.df.SIGI.to_numpy()[idxes], k_ani[idxes],
                                                 S * hkldata.df.epsilon.to_numpy()[idxes],
-                                                0, hkldata.df.centric.to_numpy()[idxes]+1))
+                                                numpy.zeros(len(idxes)), hkldata.df.centric.to_numpy()[idxes]+1))
                 shift = numpy.exp(ll_shift_bin_S(hkldata.df.I.to_numpy()[idxes], hkldata.df.SIGI.to_numpy()[idxes], k_ani[idxes],
                                                  S, hkldata.df.centric.to_numpy()[idxes]+1, hkldata.df.epsilon.to_numpy()[idxes]))
                 for k in range(3):
                     ss = shift**(1. / 2**k)
                     f1 = numpy.nansum(integr.ll_int(hkldata.df.I.to_numpy()[idxes], hkldata.df.SIGI.to_numpy()[idxes], k_ani[idxes],
                                                     S * ss * hkldata.df.epsilon.to_numpy()[idxes],
-                                                    0, hkldata.df.centric.to_numpy()[idxes]+1))
+                                                    numpy.zeros(len(idxes)), hkldata.df.centric.to_numpy()[idxes]+1))
                     #logger.writeln("bin {:3d} f0 = {:.3e} shift = {:.3e} df = {:.3e}".format(i_bin, f0, ss, f1 - f0))
                     if f1 < f0:
                         hkldata.binned_df.loc[i_bin, "S"] = S * ss
@@ -145,7 +145,7 @@ def ll_all_B(x, ssqmat, hkldata, adpdirs):
     for i_bin, idxes in hkldata.binned():
         ret += numpy.nansum(integr.ll_int(hkldata.df.I.to_numpy()[idxes], hkldata.df.SIGI.to_numpy()[idxes], k_ani[idxes],
                                           hkldata.binned_df.S[i_bin] * hkldata.df.epsilon.to_numpy()[idxes],
-                                          0, hkldata.df.centric.to_numpy()[idxes]+1))
+                                          numpy.zeros(len(idxes)), hkldata.df.centric.to_numpy()[idxes]+1))
     return ret
 def ll_shift_bin_S(Io, sigIo, k_ani, S, c, eps, exp_trans=True):
@@ -176,10 +176,12 @@ def ll_shift_B(x, ssqmat, hkldata, adpdirs):
 def expected_F_from_int(Io, sigo, k_ani, eps, c, S):
     to = Io / sigo - sigo / c / k_ani**2 / S / eps
+    tf = numpy.zeros(Io.size)
+    sig1 = numpy.ones(Io.size)
     k_num = numpy.where(c == 1,  0.5, 0.)
-    F = numpy.sqrt(sigo) * ext.integ_J_ratio(k_num, k_num - 0.5, False, to, 0., 1., c,
+    F = numpy.sqrt(sigo) * ext.integ_J_ratio(k_num, k_num - 0.5, False, to, tf, sig1, c,
                                              integr.exp2_threshold, integr.h, integr.N, integr.ewmax)
-    Fsq = sigo * ext.integ_J_ratio(k_num + 0.5, k_num - 0.5, False, to, 0., 1., c,
+    Fsq = sigo * ext.integ_J_ratio(k_num + 0.5, k_num - 0.5, False, to, tf, sig1, c,
                                    integr.exp2_threshold, integr.h, integr.N, integr.ewmax)
     varF = Fsq - F**2
     return F, numpy.sqrt(varF)