PyPI - servalcat - Versions diffs - 0.4.72__cp312-cp312-macosx_11_0_arm64.whl → 0.4.99__cp312-cp312-macosx_11_0_arm64.whl - Mend

servalcat 0.4.72__cp312-cp312-macosx_11_0_arm64.whl → 0.4.99__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of servalcat might be problematic. Click here for more details.

Files changed (33) hide show

servalcat/__init__.py +2 -2
servalcat/ext.cpython-312-darwin.so +0 -0
servalcat/refine/refine.py +152 -67
servalcat/refine/refine_geom.py +32 -13
servalcat/refine/refine_spa.py +70 -40
servalcat/refine/refine_xtal.py +45 -13
servalcat/refine/spa.py +15 -4
servalcat/refine/xtal.py +147 -98
servalcat/refmac/exte.py +7 -5
servalcat/refmac/refmac_keywords.py +11 -9
servalcat/refmac/refmac_wrapper.py +87 -60
servalcat/spa/fofc.py +20 -3
servalcat/spa/fsc.py +11 -11
servalcat/spa/run_refmac.py +27 -12
servalcat/spa/translate.py +2 -2
servalcat/utils/commands.py +154 -4
servalcat/utils/fileio.py +20 -10
servalcat/utils/hkl.py +43 -29
servalcat/utils/logger.py +25 -1
servalcat/utils/maps.py +2 -2
servalcat/utils/model.py +23 -10
servalcat/utils/refmac.py +20 -1
servalcat/utils/restraints.py +34 -25
servalcat/utils/symmetry.py +5 -5
servalcat/xtal/french_wilson.py +39 -31
servalcat/xtal/sigmaa.py +382 -152
servalcat/xtal/twin.py +121 -0
{servalcat-0.4.72.dist-info → servalcat-0.4.99.dist-info}/METADATA +4 -4
servalcat-0.4.99.dist-info/RECORD +45 -0
{servalcat-0.4.72.dist-info → servalcat-0.4.99.dist-info}/WHEEL +1 -1
servalcat-0.4.72.dist-info/RECORD +0 -44
{servalcat-0.4.72.dist-info → servalcat-0.4.99.dist-info}/entry_points.txt +0 -0
{servalcat-0.4.72.dist-info → servalcat-0.4.99.dist-info}/licenses/LICENSE +0 -0

servalcat/utils/refmac.py CHANGED Viewed

@@ -22,7 +22,7 @@ from servalcat.utils import fileio
 re_version = re.compile("#.* Refmac *version ([^ ]+) ")
 re_error = re.compile('(warn|error *[:]|error *==|^error)', re.IGNORECASE)
-re_outlier_start = re.compile("\*\*\*\*.*outliers")
+re_outlier_start = re.compile(r"\*\*\*\*.*outliers")
 def check_version(exe="refmac5"):
     ver = ()
@@ -148,6 +148,8 @@ class FixForRefmac:
         self.MAXNUM = 9999
         self.fixes = []
         self.resn_old_new = []
+        self.res_labels = []
+        self.entities = None
     def fix_before_topology(self, st, topo, fix_microheterogeneity=True, fix_resimax=True, fix_nonpolymer=True, add_gaps=False):
         self.chainids = set(chain.name for chain in st[0])
@@ -385,6 +387,14 @@ class FixForRefmac:
         st.shorten_ccd_codes()
         self.resn_old_new = [x for x in st.shortened_ccd_codes]
+    def store_res_labels(self, st):
+        self.res_labels = []
+        self.entities = gemmi.EntityList(st.entities)
+        for chain in st[0]:
+            self.res_labels.append([])
+            for res in chain:
+                self.res_labels[-1].append((res.subchain, res.entity_id, res.label_seq))
     def fix_model(self, st, changedict):
         chain_newid = set()
         for chain in st[0]:
@@ -411,6 +421,15 @@ class FixForRefmac:
             st.shortened_ccd_codes = self.resn_old_new
             st.restore_full_ccd_codes()
+        if self.res_labels:
+            st.entities = self.entities
+            #print(f"debug {len(self.res_labels)}")
+            #print(f"debug {[x.name for x in st[0]]}")
+            assert len(self.res_labels) == len(st[0])
+            for ic, chain in enumerate(st[0]):
+                assert len(self.res_labels[ic]) == len(chain)
+                for ir, res in enumerate(chain):
+                    res.subchain, res.entity_id, res.label_seq = self.res_labels[ic][ir]
 class Refmac:
     def __init__(self, **kwargs):

servalcat/utils/restraints.py CHANGED Viewed

@@ -10,7 +10,6 @@ from servalcat.utils import logger
 from servalcat.refmac import refmac_keywords
 from servalcat import ext
 import os
-import io
 import gemmi
 import string
 import random
@@ -87,14 +86,13 @@ def load_monomer_library(st, monomer_dir=None, cif_files=None, stop_for_unknowns
     if cif_files is None:
         cif_files = []
+    monlib = gemmi.MonLib()
     if monomer_dir and not ignore_monomer_dir:
         if not os.path.isdir(monomer_dir):
             raise RuntimeError("not a directory: {}".format(monomer_dir))
         logger.writeln("Reading monomers from {}".format(monomer_dir))
-        monlib = gemmi.read_monomer_lib(monomer_dir, resnames, ignore_missing=True)
-    else:
-        monlib = gemmi.MonLib()
+        monlib.read_monomer_lib(monomer_dir, resnames, logger)
     for f in cif_files:
         logger.writeln("Reading monomer: {}".format(f))
@@ -109,8 +107,8 @@ def load_monomer_library(st, monomer_dir=None, cif_files=None, stop_for_unknowns
                 # Check if bond length values are included
                 # This is to fail if cif file is e.g. from PDB website
-                if len(atom_id_list) > 1 and not b.find_values("_chem_comp_bond.value_dist"):
-                    raise RuntimeError("{} does not contain bond length value for {}. You need to generate restraints (e.g. using acedrg).".format(f, name))
+                if b.find_values("_chem_comp_bond.comp_id") and not b.find_values("_chem_comp_bond.value_dist"):
+                    raise RuntimeError(f"Bond length information for {name} is missing from {f}. Please generate restraints using a tool like acedrg.")
             for row in b.find("_chem_link.", ["id"]):
                 link_id = row.str(0)
@@ -148,7 +146,7 @@ def load_monomer_library(st, monomer_dir=None, cif_files=None, stop_for_unknowns
             logger.writeln("         it is strongly recommended to generate them using AceDRG.")
     if update_old_atom_names:
-        logger.write(monlib.update_old_atom_names(st))
+        monlib.update_old_atom_names(st, logger)
     if params:
         update_torsions(monlib, params.get("restr", {}).get("torsion_include", {}))
@@ -158,6 +156,7 @@ def load_monomer_library(st, monomer_dir=None, cif_files=None, stop_for_unknowns
 def fix_elements_in_model(monlib, st):
     monlib_els = {m: {a.id: a.el for a in monlib.monomers[m].atoms} for m in monlib.monomers}
+    lookup = {x.atom: x for x in st[0].all()}
     for chain in st[0]:
         for res in chain:
             d = monlib_els.get(res.name)
@@ -167,7 +166,7 @@ def fix_elements_in_model(monlib, st):
                     continue
                 el = d[at.name]
                 if at.element != el:
-                    logger.writeln(f"WARNING: correcting element of {st[0].get_cra(at)} to {el.name}")
+                    logger.writeln(f"WARNING: correcting element of {lookup[at]} to {el.name}")
                     at.element = el
 # correct_elements_in_model()
@@ -334,10 +333,9 @@ def prepare_topology(st, monlib, h_change, ignore_unknown_links=False, raise_err
         keywords = []
     # these checks can be done after sorting links
     logger.writeln("Creating restraints..")
-    sio = io.StringIO()
-    topo = gemmi.prepare_topology(st, monlib, h_change=h_change, warnings=sio, reorder=False,
-                                  ignore_unknown_links=ignore_unknown_links, use_cispeps=use_cispeps)
-    for l in sio.getvalue().splitlines(): logger.writeln(" " + l)
+    with logger.with_prefix("  "):
+        topo = gemmi.prepare_topology(st, monlib, h_change=h_change, warnings=logger, reorder=False,
+                                      ignore_unknown_links=ignore_unknown_links, use_cispeps=use_cispeps)
     unknown_cc = set()
     link_related = set()
     nan_hydr = set()
@@ -521,7 +519,7 @@ def find_and_fix_links(st, monlib, bond_margin=1.3, find_metal_links=True, add_f
                                                 cra2.residue, cra2.atom.name, cra2.atom.altloc)
             if link:
                 con.link_id = link.id
-            elif find_metal_links and con.type == gemmi.ConnectionType.MetalC:
+            elif con.type == gemmi.ConnectionType.MetalC:
                 logger.writeln(" Metal link will be added: {} dist= {:.2f}".format(atoms_str, dist))
                 if cra2.atom.element.is_metal:
                     inv = True # make metal first
@@ -587,6 +585,8 @@ def find_and_fix_links(st, monlib, bond_margin=1.3, find_metal_links=True, add_f
             logger.writeln(" {}Metal link found: {} dist= {:.2f} max_ideal= {:.2f}".format("*" if will_be_added else " ",
                                                                                            atoms_str,
                                                                                            r.dist, max_ideal))
+        else:
+            continue
         n_found += 1
         if not will_be_added: continue
         con = gemmi.Connection()
@@ -653,12 +653,7 @@ def prepare_ncs_restraints(st, rms_loc_nlen=5, min_nalign=10, max_rms_loc=2.0):
                       gemmi.PolymerType.Dna, gemmi.PolymerType.Rna, gemmi.PolymerType.DnaRnaHybrid):
             polymers.setdefault(p_type, []).append((chain, rs))
-    scoring = gemmi.AlignmentScoring()
-    scoring.match = 0
-    scoring.mismatch = -1
-    scoring.gapo = 0
-    scoring.gape = -1
+    scoring = gemmi.AlignmentScoring("p") # AlignmentScoring::partial_model
     al_res = []
     ncslist = ext.NcsList()
     for pt in polymers:
@@ -670,19 +665,33 @@ def prepare_ncs_restraints(st, rms_loc_nlen=5, min_nalign=10, max_rms_loc=2.0):
                 al = gemmi.align_sequence_to_polymer(q, pols[j][1], pt, scoring)
                 if al.match_count < min_nalign: continue
                 su = gemmi.calculate_superposition(pols[i][1], pols[j][1], pt, gemmi.SupSelect.All)
-                obj = ext.NcsList.Ncs(al, pols[i][1], pols[j][1])
+                obj = ext.NcsList.Ncs(al, pols[i][1], pols[j][1], pols[i][0].name, pols[j][0].name)
                 obj.calculate_local_rms(rms_loc_nlen)
-                if len(obj.local_rms) == 0: continue
-                ave_local_rms = numpy.mean(obj.local_rms)
+                if len(obj.local_rms) == 0 or numpy.all(numpy.isnan(obj.local_rms)):
+                    continue
+                ave_local_rms = numpy.nanmean(obj.local_rms)
                 if ave_local_rms > max_rms_loc: continue
                 ncslist.ncss.append(obj)
-                al_res.append({"chain_1": "{} ({}..{})".format(pols[i][0].name, pols[i][1][0].seqid, pols[i][1][-1].seqid),
-                               "chain_2": "{} ({}..{})".format(pols[j][0].name, pols[j][1][0].seqid, pols[j][1][-1].seqid),
+                al_res.append({"chain_1": "{} ({}..{})".format(obj.chains[0], obj.seqids[0][0], obj.seqids[-1][0]),
+                               "chain_2": "{} ({}..{})".format(obj.chains[1], obj.seqids[0][1], obj.seqids[-1][1]),
                                "aligned": al.match_count,
                                "identity": al.calculate_identity(1),
                                "rms": su.rmsd,
                                "ave(rmsloc)": ave_local_rms,
                                })
+                if al_res[-1]["identity"] < 100:
+                    wrap_width = 100
+                    logger.writeln(f"seq1: {pols[i][0].name} {pols[i][1][0].seqid}..{pols[i][1][-1].seqid}")
+                    logger.writeln(f"seq2: {pols[j][0].name} {pols[j][1][0].seqid}..{pols[j][1][-1].seqid}")
+                    logger.writeln(f"match_count: {al.match_count} (identity: {al_res[-1]['identity']:.2f})")
+                    s1 = gemmi.one_letter_code(q)
+                    p_seq = gemmi.one_letter_code(pols[j][1].extract_sequence())
+                    p1, p2 = al.add_gaps(s1, 1), al.add_gaps(p_seq, 2)
+                    for k in range(0, len(p1), wrap_width):
+                        logger.writeln(" seq1 {}".format(p1[k:k+wrap_width]))
+                        logger.writeln("      {}".format(al.match_string[k:k+wrap_width]))
+                        logger.writeln(" seq2 {}\n".format(p2[k:k+wrap_width]))
     ncslist.set_pairs()
     df = pandas.DataFrame(al_res)
     df.index += 1
@@ -745,7 +754,7 @@ class MetalCoordination:
                     logger.writeln(" (from ener_lib)")
                 else:
                     logger.writeln(" ".join("{:.4f} ({} coord)".format(x["median"], x["coord"]) for x in vals))
-                    ideals[el] = [(x["median"], x["mad"]) for x in vals if x["mad"] > 0]
+                    ideals[el] = [(x["median"], max(0.02, x["mad"]*1.5)) for x in vals if x["mad"] > 0]
             logger.writeln("")
             for i, am in enumerate(coords[metal]):
                 logger.writeln("  site {}: {}".format(i+1, lookup[am]))

servalcat/utils/symmetry.py CHANGED Viewed

@@ -55,7 +55,7 @@ def ncsops_from_args(args, cell, map_and_start=None, st=None, helical_min_n=None
         start_xyz = numpy.zeros(3)
     if args.center is None:
-        A = numpy.array(cell.orthogonalization_matrix.tolist())
+        A = cell.orth.mat.array
         center = numpy.sum(A, axis=1) / 2 #+ start_xyz
         logger.writeln("Center: {}".format(center))
     else:
@@ -156,7 +156,7 @@ def show_operators_axis_angle(ops):
 def show_ncs_operators_axis_angle(ops):
     # ops: List of gemmi.NcsOp
     for i, op in enumerate(ops):
-        op2 = numpy.array(op.tr.mat.tolist())
+        op2 = op.tr.mat.array
         ax, ang = generate_operators.Rotation2AxisAngle_general(op2)
         axlab = "[{: .4f}, {: .4f}, {: .4f}]".format(*ax)
         trlab = "[{: 9.4f}, {: 9.4f}, {: 9.4f}]".format(*op.tr.vec.tolist())
@@ -210,7 +210,7 @@ def generate_helical_operators(start_xyz, center, axsym, deltaphi, deltaz, axis1
 def make_NcsOps_from_matrices(matrices, cell=None, center=None):
     if center is None:
-        A = numpy.array(cell.orthogonalization_matrix.tolist())
+        A = cell.orth.mat.array
         center = numpy.sum(A,axis=1) / 2
     center = gemmi.Vec3(*center)
@@ -225,9 +225,9 @@ def make_NcsOps_from_matrices(matrices, cell=None, center=None):
 # make_NcsOps_from_matrices()
 def find_center_of_origin(mat, vec): # may not be unique.
-    tmp = numpy.identity(3) - numpy.array(mat)
+    tmp = numpy.identity(3) - numpy.array(mat.array)
     ret = numpy.dot(numpy.linalg.pinv(tmp), vec.tolist())
-    resid = vec.tolist() - (numpy.dot(mat, -ret) + ret)
+    resid = vec.tolist() - (numpy.dot(mat.array, -ret) + ret)
     return gemmi.Vec3(*ret), gemmi.Vec3(*resid)
 # find_center_of_origin()

servalcat/xtal/french_wilson.py CHANGED Viewed

@@ -101,14 +101,14 @@ def determine_Sigma_and_aniso(hkldata):
                 S = hkldata.binned_df.loc[i_bin, "S"]
                 f0 = numpy.nansum(integr.ll_int(hkldata.df.I.to_numpy()[idxes], hkldata.df.SIGI.to_numpy()[idxes], k_ani[idxes],
                                                 S * hkldata.df.epsilon.to_numpy()[idxes],
-                                                0, hkldata.df.centric.to_numpy()[idxes]+1))
+                                                numpy.zeros(len(idxes)), hkldata.df.centric.to_numpy()[idxes]+1))
                 shift = numpy.exp(ll_shift_bin_S(hkldata.df.I.to_numpy()[idxes], hkldata.df.SIGI.to_numpy()[idxes], k_ani[idxes],
                                                  S, hkldata.df.centric.to_numpy()[idxes]+1, hkldata.df.epsilon.to_numpy()[idxes]))
                 for k in range(3):
                     ss = shift**(1. / 2**k)
                     f1 = numpy.nansum(integr.ll_int(hkldata.df.I.to_numpy()[idxes], hkldata.df.SIGI.to_numpy()[idxes], k_ani[idxes],
                                                     S * ss * hkldata.df.epsilon.to_numpy()[idxes],
-                                                    0, hkldata.df.centric.to_numpy()[idxes]+1))
+                                                    numpy.zeros(len(idxes)), hkldata.df.centric.to_numpy()[idxes]+1))
                     #logger.writeln("bin {:3d} f0 = {:.3e} shift = {:.3e} df = {:.3e}".format(i_bin, f0, ss, f1 - f0))
                     if f1 < f0:
                         hkldata.binned_df.loc[i_bin, "S"] = S * ss
@@ -145,7 +145,7 @@ def ll_all_B(x, ssqmat, hkldata, adpdirs):
     for i_bin, idxes in hkldata.binned():
         ret += numpy.nansum(integr.ll_int(hkldata.df.I.to_numpy()[idxes], hkldata.df.SIGI.to_numpy()[idxes], k_ani[idxes],
                                           hkldata.binned_df.S[i_bin] * hkldata.df.epsilon.to_numpy()[idxes],
-                                          0, hkldata.df.centric.to_numpy()[idxes]+1))
+                                          numpy.zeros(len(idxes)), hkldata.df.centric.to_numpy()[idxes]+1))
     return ret
 def ll_shift_bin_S(Io, sigIo, k_ani, S, c, eps, exp_trans=True):
@@ -174,52 +174,57 @@ def ll_shift_B(x, ssqmat, hkldata, adpdirs):
     g, H = numpy.dot(g, adpdirs.T), numpy.dot(adpdirs, numpy.dot(H, adpdirs.T))
     return -numpy.dot(g, numpy.linalg.pinv(H))
+def expected_F_from_int(Io, sigo, k_ani, eps, c, S):
+    to = Io / sigo - sigo / c / k_ani**2 / S / eps
+    tf = numpy.zeros(Io.size)
+    sig1 = numpy.ones(Io.size)
+    k_num = numpy.where(c == 1,  0.5, 0.)
+    F = numpy.sqrt(sigo) * ext.integ_J_ratio(k_num, k_num - 0.5, False, to, tf, sig1, c,
+                                             integr.exp2_threshold, integr.h, integr.N, integr.ewmax)
+    Fsq = sigo * ext.integ_J_ratio(k_num + 0.5, k_num - 0.5, False, to, tf, sig1, c,
+                                   integr.exp2_threshold, integr.h, integr.N, integr.ewmax)
+    varF = Fsq - F**2
+    return F, numpy.sqrt(varF)
 def french_wilson(hkldata, B_aniso, labout=None):
     if labout is None: labout = ["F", "SIGF"]
-    hkldata.df[labout[0]] = numpy.nan
-    hkldata.df[labout[1]] = numpy.nan
-    hkldata.df["to1"] = numpy.nan
     k_ani = hkldata.debye_waller_factors(b_cart=B_aniso)
+    has_ano = "I(+)" in hkldata.df and "I(-)" in hkldata.df
+    if has_ano:
+        ano_data = hkldata.df[["I(+)", "SIGI(+)", "I(-)", "SIGI(-)"]].to_numpy()
+        if len(labout) == 2:
+            labout += [f"{labout[0]}(+)", f"{labout[1]}(+)", f"{labout[0]}(-)", f"{labout[1]}(-)"]
+    hkldata.df[labout] = numpy.nan
     for i_bin, idxes in hkldata.binned():
         S = hkldata.binned_df.S[i_bin]
         c = hkldata.df.centric.to_numpy()[idxes] + 1 # 1 for acentric, 2 for centric
         Io = hkldata.df.I.to_numpy()[idxes]
         sigo = hkldata.df.SIGI.to_numpy()[idxes]
         eps = hkldata.df.epsilon.to_numpy()[idxes]
-        to = Io / sigo - sigo / c / k_ani[idxes]**2 / S / eps
-        k_num = numpy.where(c == 1,  0.5, 0.)
-        F = numpy.sqrt(sigo) * ext.integ_J_ratio(k_num, k_num - 0.5, False, to, 0., 1., c,
-                                                 integr.exp2_threshold, integr.h, integr.N, integr.ewmax)
-        Fsq = sigo * ext.integ_J_ratio(k_num + 0.5, k_num - 0.5, False, to, 0., 1., c,
-                                       integr.exp2_threshold, integr.h, integr.N, integr.ewmax)
-        varF = Fsq - F**2
+        F, sigF = expected_F_from_int(Io, sigo, k_ani[idxes], eps, c, S)
         hkldata.df.loc[idxes, labout[0]] = F
-        hkldata.df.loc[idxes, labout[1]] = numpy.sqrt(varF)
-        hkldata.df.loc[idxes, "to1"] = to
+        hkldata.df.loc[idxes, labout[1]] = sigF
+        if has_ano:
+            Fp, sigFp = expected_F_from_int(ano_data[idxes,0], ano_data[idxes,1], k_ani[idxes], eps, c, S)
+            Fm, sigFm = expected_F_from_int(ano_data[idxes,2], ano_data[idxes,3], k_ani[idxes], eps, c, S)
+            hkldata.df.loc[idxes, labout[2]] = Fp
+            hkldata.df.loc[idxes, labout[3]] = sigFp
+            hkldata.df.loc[idxes, labout[4]] = Fm
+            hkldata.df.loc[idxes, labout[5]] = sigFm
 def main(args):
     if not args.output_prefix:
         args.output_prefix = utils.fileio.splitext(os.path.basename(args.hklin))[0] + "_fw"
+    try:
+        mtz = utils.fileio.read_mmhkl(args.hklin, cif_index=args.hklin_index)
+    except RuntimeError as e:
+        raise SystemExit("Error: {}".format(e))
     if not args.labin:
-        try:
-            mtz = utils.fileio.read_mmhkl(args.hklin, cif_index=args.hklin_index)
-        except RuntimeError as e:
-            raise SystemExit("Error: {}".format(e))
-        dlabs = utils.hkl.mtz_find_data_columns(mtz)
-        if dlabs["J"]:
-            labin = dlabs["J"][0]
-        else:
-            raise SystemExit("Intensity not found from mtz")
-        flabs = utils.hkl.mtz_find_free_columns(mtz)
-        if flabs:
-            labin += [flabs[0]]
-        logger.writeln("MTZ columns automatically selected: {}".format(labin))
+        labin = sigmaa.decide_mtz_labels(mtz, require=("K", "J"))
     else:
         labin = args.labin.split(",")
     try:
-        hkldata, _, _, _, _ = sigmaa.process_input(hklin=args.hklin,
+        hkldata, _, _, _, _ = sigmaa.process_input(hklin=mtz,
                                                    labin=labin,
                                                    n_bins=args.nbins,
                                                    free=None,
@@ -237,6 +242,9 @@ def main(args):
     mtz_out = args.output_prefix+".mtz"
     lab_out = ["F", "SIGF", "I", "SIGI"]
     labo_types = {"F":"F", "SIGF":"Q", "I":"J", "SIGI":"Q"}
+    if "I(+)" in hkldata.df and "I(-)" in hkldata.df:
+        lab_out += ["F(+)", "SIGF(+)", "F(-)", "SIGF(-)"]
+        labo_types.update({"F(+)":"G", "SIGF(+)":"L", "F(-)":"G", "SIGF(-)":"L"})
     if len(labin) == 3:
         lab_out.append("FREE")
         labo_types[lab_out[-1]] = "I"