PyPI - servalcat - Versions diffs - 0.4.39__cp39-cp39-win_amd64.whl → 0.4.60__cp39-cp39-win_amd64.whl - Mend

servalcat 0.4.39__cp39-cp39-win_amd64.whl → 0.4.60__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of servalcat might be problematic. Click here for more details.

Files changed (31) hide show

servalcat/__init__.py +2 -2
servalcat/ext.cp39-win_amd64.pyd +0 -0
servalcat/refine/refine.py +265 -45
servalcat/refine/refine_geom.py +68 -40
servalcat/refine/refine_spa.py +10 -2
servalcat/refine/refine_xtal.py +25 -6
servalcat/refine/spa.py +4 -3
servalcat/refine/xtal.py +10 -8
servalcat/refmac/exte.py +11 -7
servalcat/refmac/refmac_keywords.py +106 -87
servalcat/refmac/refmac_wrapper.py +76 -15
servalcat/spa/fofc.py +7 -4
servalcat/spa/run_refmac.py +19 -14
servalcat/utils/commands.py +45 -22
servalcat/utils/fileio.py +37 -36
servalcat/utils/generate_operators.py +2 -2
servalcat/utils/hkl.py +20 -5
servalcat/utils/model.py +7 -10
servalcat/utils/refmac.py +20 -7
servalcat/utils/restraints.py +119 -9
servalcat/xtal/run_refmac_small.py +55 -63
servalcat/xtal/sigmaa.py +112 -64
servalcat-0.4.60.dist-info/METADATA +56 -0
servalcat-0.4.60.dist-info/RECORD +44 -0
{servalcat-0.4.39.dist-info → servalcat-0.4.60.dist-info}/WHEEL +1 -1
{servalcat-0.4.39.dist-info → servalcat-0.4.60.dist-info}/entry_points.txt +2 -1
servalcat-0.4.39.dist-info/METADATA +0 -16
servalcat-0.4.39.dist-info/RECORD +0 -45
servalcat-0.4.39.dist-info/top_level.txt +0 -1
/servalcat/{command_line.py → __main__.py} +0 -0
{servalcat-0.4.39.dist-info → servalcat-0.4.60.dist-info/licenses}/LICENSE +0 -0

servalcat/__init__.py CHANGED Viewed

@@ -6,5 +6,5 @@ This software is released under the
 Mozilla Public License, version 2.0; see LICENSE.
 """
-__version__ = '0.4.39'
-__date__ = '2023-11-02'
+__version__ = '0.4.60'
+__date__ = '2024-02-26'

servalcat/ext.cp39-win_amd64.pyd CHANGED Viewed

Binary file

servalcat/refine/refine.py CHANGED Viewed

@@ -29,12 +29,18 @@ b_to_u = utils.model.b_to_u
 class Geom:
     def __init__(self, st, topo, monlib, adpr_w=1, shake_rms=0,
-                 refmac_keywords=None, unrestrained=False, use_nucleus=False):
+                 refmac_keywords=None, unrestrained=False, use_nucleus=False,
+                 ncslist=None, atom_pos=None):
         self.st = st
         self.atoms = [None for _ in range(self.st[0].count_atom_sites())]
         for cra in self.st[0].all(): self.atoms[cra.atom.serial-1] = cra.atom
+        if atom_pos is not None:
+            self.atom_pos = atom_pos
+        else:
+            self.atom_pos = list(range(len(self.atoms)))
+        self.n_refine_atoms = max(self.atom_pos) + 1
         self.lookup = {x.atom: x for x in self.st[0].all()}
-        self.geom = ext.Geometry(self.st, monlib.ener_lib)
+        self.geom = ext.Geometry(self.st, self.atom_pos, monlib.ener_lib)
         self.specs = utils.model.find_special_positions(self.st)
         #cs_count = len(self.st.find_spacegroup().operations())
         for atom, images, matp, mata in self.specs:
@@ -55,13 +61,17 @@ class Geom:
         if refmac_keywords:
             exte.read_external_restraints(refmac_keywords, self.st, self.geom)
             kwds = parse_keywords(refmac_keywords)
-            for k in ("wbond", "wangle", "wtors", "wplane", "wchir", "wvdw"):
+            for k in ("wbond", "wangle", "wtors", "wplane", "wchir", "wvdw", "wncs"):
                 if k in kwds:
                     self.calc_kwds[k] = kwds[k]
                     logger.writeln("setting geometry weight {}= {}".format(k, kwds[k]))
+            self.group_occ = GroupOccupancy(self.st, kwds.get("occu"))
+        else:
+            self.group_occ = GroupOccupancy(self.st, None)
         self.geom.finalize_restraints()
-        self.outlier_sigmas = dict(bond=5, angle=5, torsion=5, vdw=5, chir=5, plane=5, staca=5, stacd=5, per_atom=5)
+        self.outlier_sigmas = dict(bond=5, angle=5, torsion=5, vdw=5, ncs=5, chir=5, plane=5, staca=5, stacd=5, per_atom=5)
         self.parents = {}
+        self.ncslist = ncslist
     # __init__()
     def check_chemtypes(self, enerlib_path, topo):
@@ -87,7 +97,9 @@ class Geom:
     # set_h_parents()
     def setup_nonbonded(self, refine_xyz):
         skip_critical_dist = not refine_xyz or self.unrestrained
-        self.geom.setup_nonbonded(skip_critical_dist=skip_critical_dist)
+        self.geom.setup_nonbonded(skip_critical_dist=skip_critical_dist, group_idxes=self.group_occ.group_idxes)
+        if self.ncslist:
+            self.geom.setup_ncsr(self.ncslist)
     def calc(self, target_only):
         return self.geom.calc(check_only=target_only, **self.calc_kwds)
     def calc_adp_restraint(self, target_only):
@@ -116,6 +128,7 @@ class Geom:
                              staca=self.geom.reporting.get_stacking_angle_outliers,
                              stacd=self.geom.reporting.get_stacking_dist_outliers,
                              vdw=self.geom.reporting.get_vdw_outliers,
+                             #ncs=self.geom.reporting.get_ncsr_outliers, # not useful?
                              )
             labs = dict(bond="Bond distances",
                         angle="Bond angles",
@@ -124,7 +137,8 @@ class Geom:
                         plane="Planar groups",
                         staca="Stacking plane angles",
                         stacd="Stacking plane distances",
-                        vdw="VDW repulsions")
+                        vdw="VDW repulsions",
+                        ncs="Local NCS restraints")
             for k in get_table:
                 kwgs = {"min_z": self.outlier_sigmas[k]}
@@ -132,7 +146,7 @@ class Geom:
                 table = get_table[k](**kwgs)
                 if table["z"]:
                     for kk in table:
-                        if kk.startswith(("atom", "plane")):
+                        if kk.startswith(("atom", "plane", "1_atom", "2_atom")):
                             table[kk] = [str(self.lookup[x]) for x in table[kk]]
                     df = pandas.DataFrame(table)
                     df = df.reindex(df.z.abs().sort_values(ascending=False).index)
@@ -183,8 +197,187 @@ def show_binstats(df, cycle_number):
     logger.writeln(lstr)
 # show_binstats()
+class GroupOccupancy:
+    # TODO max may not be one. should check multiplicity
+    def __init__(self, st, params):
+        self.groups = []
+        self.consts = []
+        self.group_idxes = [0 for _ in range(st[0].count_atom_sites())]
+        self.ncycle = 0
+        if not params or not params.get("groups"):
+            return
+        logger.writeln("Occupancy groups:")
+        self.atom_pos = [-1 for _ in range(st[0].count_atom_sites())]
+        count = 0
+        for igr in params["groups"]:
+            self.groups.append([[], []]) # list of [indexes, atoms]
+            n_curr = count
+            for sel in params["groups"][igr]:
+                sel_chains = sel.get("chains")
+                sel_from = sel.get("resi_from")
+                sel_to = sel.get("resi_to")
+                sel_seq = sel.get("resi")
+                sel_atom = sel.get("atom")
+                sel_alt = sel.get("alt")
+                for chain in st[0]:
+                    if sel_chains and chain.name not in sel_chains:
+                        continue
+                    flag = False
+                    for res in chain:
+                        if sel_seq and res.seqid != sel_seq:
+                            continue
+                        if sel_from and res.seqid == sel_from:
+                            flag = True
+                        if sel_from and not flag:
+                            continue
+                        for atom in res:
+                            if sel_atom and atom.name != sel_atom:
+                                continue
+                            if sel_alt and atom.altloc != sel_alt:
+                                continue
+                            self.atom_pos[atom.serial-1] = count
+                            self.groups[-1][0].append(count)
+                            self.groups[-1][1].append(atom)
+                            self.group_idxes[atom.serial-1] = len(self.groups)
+                            count += 1
+                        if sel_to and res.seqid == sel_to:
+                            flag = False
+            logger.writeln(" id= {} atoms= {}".format(igr, count - n_curr))
+        igr_idxes = {igr:i for i, igr in enumerate(params["groups"])}
+        self.consts = [(is_comp, [igr_idxes[g] for g in gids])
+                       for is_comp, gids in params["const"]]
+        self.ncycle = params.get("ncycle", 5)
+    # __init__()
+    def constraint(self, x):
+        # x: occupancy parameters
+        ret = []
+        for is_comp, ids in self.consts:
+            x_sum = numpy.sum(x[ids])
+            if is_comp or x_sum > 1:
+                ret.append(x_sum - 1)
+            else:
+                ret.append(0.)
+        return numpy.array(ret)
+    def ensure_constraints(self):
+        vals = []
+        for _, atoms in self.groups:
+            occ = numpy.mean([a.occ for a in atoms])
+            vals.append(occ)
+        for is_comp, idxes in self.consts:
+            sum_occ = sum(vals[i] for i in idxes)
+            if not is_comp and sum_occ < 1:
+                sum_occ = 1. # do nothing
+            for i in idxes:
+                #logger.writeln("Imposing constraints: {} {}".format(vals[i], vals[i]/sum_occ))
+                vals[i] /= sum_occ
+        for occ, (_, atoms) in zip(vals, self.groups):
+            for a in atoms: a.occ = occ
+    def get_x(self):
+        return numpy.array([atoms[0].occ for _, atoms in self.groups])
+    def set_x(self, x):
+        for p, (_, atoms) in zip(x, self.groups):
+            for a in atoms:
+                a.occ = p
+    def target(self, x, ll, ls, u):
+        self.set_x(x)
+        ll.update_fc()
+        c = self.constraint(x)
+        f = ll.calc_target() - numpy.dot(ls, c) + 0.5 * u * numpy.sum(c**2)
+        return f
+    def grad(self, x, ll, ls, u, refine_h):
+        c = self.constraint(x)
+        ll.calc_grad(self.atom_pos, refine_xyz=False, adp_mode=0, refine_occ=True, refine_h=refine_h, specs=None)
+        #print("grad=", ll.ll.vn)
+        #print("diag=", ll.ll.am)
+        assert len(ll.ll.vn) == len(ll.ll.am)
+        vn = []
+        diag = []
+        for idxes, atoms in self.groups:
+            if not refine_h:
+                idxes = [i for i, a in zip(idxes, atoms) if not a.is_hydrogen()]
+            vn.append(numpy.sum(numpy.array(ll.ll.vn)[idxes]))
+            diag.append(numpy.sum(numpy.array(ll.ll.am)[idxes]))
+        vn, diag = numpy.array(vn), numpy.array(diag)
+        for i, (is_comp, idxes) in enumerate(self.consts):
+            dcdx = numpy.zeros(len(self.groups))
+            dcdx[idxes] = 1.
+            if is_comp or c[i] != 0:
+                vn -= (ls[i] - u * c[i]) * dcdx
+            diag += u * dcdx**2
+        return vn, diag
+    def refine(self, ll, refine_h, alpha=1.1):
+        # Refinement of grouped occupancies using augmented Lagrangian
+        # f(x) = LL(x) - sum_j (lambda_j c_j(x)) + u/2 sum_j (c_j(x))^2
+        # with c_j(x) = 0 constraints
+        if not self.groups:
+            return
+        logger.writeln("\n== Group occupancy refinement ==")
+        self.ensure_constraints() # make sure constrained groups have the same occupancies.
+        ls = 0 * numpy.ones(len(self.consts)) # Lagrange multiplier
+        u = 10000. # penalty parameter. in Refmac 1/0.01**2
+        x0 = self.get_x()
+        #logger.writeln("  parameters: {}".format(len(x0)))
+        f0 = self.target(x0, ll, ls, u)
+        ret = []
+        for cyc in range(self.ncycle):
+            ret.append({"Ncyc": cyc+1, "f0": f0})
+            logger.writeln("occ_{}_f0= {:.4e}".format(cyc, f0))
+            vn, diag = self.grad(x0, ll, ls, u, refine_h)
+            diag[diag < 1e-6] = 1.
+            dx = -vn / diag
+            if 0:
+                ofs = open("debug.dat", "w")
+                for scale in (-1, -0.5, 0, 0.1, 0.2, 0.3, 0.4, 0.5, 1, 2):
+                    self.set_x(x0 + scale * dx)
+                    ll.update_fc()
+                    c = self.constraint(x0 + dx)
+                    f = ll.calc_target() + numpy.dot(ls, c) + 0.5 * u * numpy.sum(c**2)
+                    ofs.write("{} {}\n".format(scale, f))
+                ofs.close()
+                import scipy.optimize
+                print(scipy.optimize.line_search(f=lambda x: self.target(x, ll, ls, u),
+                                                 myfprime= lambda x: self.grad(ll, ls, u, refine_h)[0],
+                                                 xk= x0,
+                                                 pk= dx))
+                quit()
+            scale = 1
+            for i in range(3):
+                scale = 1/2**i
+                f1 = self.target(x0 + dx * scale, ll, ls, u)
+                logger.writeln("occ_{}_f1, {}= {:.4e}".format(cyc, i, f1))
+                if f1 < f0: break
+            else:
+                logger.writeln("WARNING: function not minimised")
+                #self.set_x(x0) # Refmac accepts it even when function increases
+            c = self.constraint(x0 + dx * scale)
+            ret[-1]["f1"] = f1
+            ret[-1]["shift_scale"] = scale
+            f0 = f1
+            x0 = x0 + dx * scale
+            ls -= u * c
+            u = alpha * u
+            ret[-1]["const_viol"] = list(c)
+            ret[-1]["lambda_new"] = list(ls)
+        self.ensure_constraints()
+        ll.update_fc()
+        f = ll.calc_target()
+        logger.writeln("final -LL= {}".format(f))
+        return ret
 class Refine:
-    def __init__(self, st, geom, ll=None, refine_xyz=True, adp_mode=1, refine_h=False, unrestrained=False):
+    def __init__(self, st, geom, ll=None, refine_xyz=True, adp_mode=1, refine_h=False, refine_occ=False,
+                 unrestrained=False, refmac_keywords=None):
         assert adp_mode in (0, 1, 2) # 0=fix, 1=iso, 2=aniso
         assert geom is not None
         self.st = st # clone()?
@@ -194,11 +387,13 @@ class Refine:
         self.gamma = 0
         self.adp_mode = 0 if self.ll is None else adp_mode
         self.refine_xyz = refine_xyz
+        self.refine_occ = refine_occ
         self.unrestrained = unrestrained
         self.refine_h = refine_h
         self.h_inherit_parent_adp = self.adp_mode > 0 and not self.refine_h and self.st[0].has_hydrogen()
         if self.h_inherit_parent_adp:
             self.geom.set_h_parents()
+        assert self.geom.group_occ.groups or self.n_params() > 0
     # __init__()
     def print_weights(self): # TODO unfinished
@@ -216,7 +411,7 @@ class Refine:
                 raise LookupError("unknown adpr_mode")
     def scale_shifts(self, dx, scale):
-        n_atoms = len(self.atoms)
+        n_atoms = self.geom.n_refine_atoms
         #ave_shift = numpy.mean(dx)
         #max_shift = numpy.maximum(dx)
         #rms_shift = numpy.std(dx)
@@ -224,19 +419,31 @@ class Refine:
         shift_allow_low  = -1.0
         shift_max_allow_B = 30.0
         shift_min_allow_B = -30.0
+        shift_max_allow_q = 0.5
+        shift_min_allow_q = -0.5
         dx = scale * dx
-        offset_b = 0
+        offset_b = n_atoms * 3 if self.refine_xyz else 0
+        offset_q = offset_b + n_atoms * {0: 0, 1: 1, 2: 6}[self.adp_mode]
         if self.refine_xyz:
-            dxx = dx[:n_atoms*3]
+            dxx = dx[:offset_b]
+            logger.writeln("min(dx) = {}".format(numpy.min(dxx)))
+            logger.writeln("max(dx) = {}".format(numpy.max(dxx)))
+            logger.writeln("mean(dx)= {}".format(numpy.mean(dxx)))
             dxx[dxx > shift_allow_high] = shift_allow_high
             dxx[dxx < shift_allow_low] = shift_allow_low
-            offset_b = n_atoms*3
         if self.adp_mode == 1:
-            dxb = dx[offset_b:]
+            dxb = dx[offset_b:offset_q]
+            logger.writeln("min(dB) = {}".format(numpy.min(dxb)))
+            logger.writeln("max(dB) = {}".format(numpy.max(dxb)))
+            logger.writeln("mean(dB)= {}".format(numpy.mean(dxb)))
             dxb[dxb > shift_max_allow_B] = shift_max_allow_B
             dxb[dxb < shift_min_allow_B] = shift_min_allow_B
         elif self.adp_mode == 2:
-            dxb = dx[offset_b:]
+            dxb = dx[offset_b:offset_q]
+            # TODO this is misleading
+            logger.writeln("min(dB) = {}".format(numpy.min(dxb)))
+            logger.writeln("max(dB) = {}".format(numpy.max(dxb)))
+            logger.writeln("mean(dB)= {}".format(numpy.mean(dxb)))
             for i in range(len(dxb)//6):
                 j = i * 6
                 a = numpy.array([[dxb[j],   dxb[j+3], dxb[j+4]],
@@ -247,29 +454,43 @@ class Refine:
                 v[v < shift_min_allow_B] = shift_min_allow_B
                 a = Q.dot(numpy.diag(v)).dot(Q.T)
                 dxb[j:j+6] = a[0,0], a[1,1], a[2,2], a[0,1], a[0,2], a[1,2]
+        if self.refine_occ:
+            dxq = dx[offset_q:]
+            logger.writeln("min(dq) = {}".format(numpy.min(dxq)))
+            logger.writeln("max(dq) = {}".format(numpy.max(dxq)))
+            logger.writeln("mean(dq)= {}".format(numpy.mean(dxq)))
+            dxq[dxq > shift_max_allow_q] = shift_max_allow_q
+            dxq[dxq < shift_min_allow_q] = shift_min_allow_q
         return dx
     def n_params(self):
-        n_atoms = len(self.atoms)
+        n_atoms = self.geom.n_refine_atoms
         n_params = 0
         if self.refine_xyz: n_params += 3 * n_atoms
         if self.adp_mode == 1:
             n_params += n_atoms
         elif self.adp_mode == 2:
             n_params += 6 * n_atoms
+        if self.refine_occ:
+            n_params += n_atoms
         return n_params
     def set_x(self, x):
-        n_atoms = len(self.atoms)
+        n_atoms = self.geom.n_refine_atoms
         offset_b = n_atoms * 3 if self.refine_xyz else 0
-        for i in range(len(self.atoms)):
+        offset_q = offset_b + n_atoms * {0: 0, 1: 1, 2: 6}[self.adp_mode]
+        max_occ = {}
+        if self.refine_occ and self.geom.specs:
+            max_occ = {atom: 1./(len(images)+1) for atom, images, _, _ in self.geom.specs}
+        for i, j in enumerate(self.geom.atom_pos):
+            if j < 0: continue
             if self.refine_xyz:
-                self.atoms[i].pos.fromlist(x[3*i:3*i+3]) # faster than substituting pos.x,pos.y,pos.z
+                self.atoms[i].pos.fromlist(x[3*j:3*j+3]) # faster than substituting pos.x,pos.y,pos.z
             if self.adp_mode == 1:
-                self.atoms[i].b_iso = max(0.5, x[offset_b + i]) # minimum B = 0.5
+                self.atoms[i].b_iso = max(0.5, x[offset_b + j]) # minimum B = 0.5
             elif self.adp_mode == 2:
-                a = x[offset_b + 6 * i: offset_b + 6 * (i+1)]
+                a = x[offset_b + 6 * j: offset_b + 6 * (j+1)]
                 a = gemmi.SMat33d(*a)
                 M = numpy.array(a.as_mat33())
                 v, Q = numpy.linalg.eigh(M) # eig() may return complex due to numerical precision?
@@ -278,6 +499,8 @@ class Refine:
                 self.atoms[i].b_iso = M2.trace() / 3
                 M2 *= b_to_u
                 self.atoms[i].aniso = gemmi.SMat33f(M2[0,0], M2[1,1], M2[2,2], M2[0,1], M2[0,2], M2[1,2])
+            if self.refine_occ:
+                self.atoms[i].occ = min(max_occ.get(self.atoms[i], 1), max(1e-3, x[offset_q + j]))
         # Copy B of hydrogen from parent
         if self.h_inherit_parent_adp:
@@ -290,21 +513,26 @@ class Refine:
             self.ll.update_fc()
         self.geom.setup_nonbonded(self.refine_xyz) # if refine_xyz=False, no need to do it every time
-        self.geom.geom.setup_target(self.refine_xyz, self.adp_mode)
+        self.geom.geom.setup_target(self.refine_xyz, self.adp_mode, self.refine_occ)
         logger.writeln("vdws = {}".format(len(self.geom.geom.vdws)))
     def get_x(self):
-        n_atoms = len(self.atoms)
+        n_atoms = self.geom.n_refine_atoms
         offset_b = n_atoms * 3 if self.refine_xyz else 0
+        offset_q = offset_b + n_atoms * {0: 0, 1: 1, 2: 6}[self.adp_mode]
         x = numpy.zeros(self.n_params())
-        for i, a in enumerate(self.atoms):
+        for i, j in enumerate(self.geom.atom_pos):
+            if j < 0: continue
+            a = self.atoms[i]
             if self.refine_xyz:
-                x[3*i:3*(i+1)] = a.pos.tolist()
+                x[3*j:3*(j+1)] = a.pos.tolist()
             if self.adp_mode == 1:
-                x[offset_b + i] = self.atoms[i].b_iso
+                x[offset_b + j] = self.atoms[i].b_iso
             elif self.adp_mode == 2:
-                x[offset_b + 6*i : offset_b + 6*(i+1)] = self.atoms[i].aniso.elements_pdb()
-                x[offset_b + 6*i : offset_b + 6*(i+1)] *= u_to_b
+                x[offset_b + 6*j : offset_b + 6*(j+1)] = self.atoms[i].aniso.elements_pdb()
+                x[offset_b + 6*j : offset_b + 6*(j+1)] *= u_to_b
+            if self.refine_occ:
+                x[offset_q + j] = a.occ
         return x
     #@profile
@@ -317,7 +545,8 @@ class Refine:
             ll = self.ll.calc_target()
             logger.writeln(" ll= {}".format(ll))
             if not target_only:
-                self.ll.calc_grad(self.refine_xyz, self.adp_mode, self.refine_h, self.geom.geom.specials)
+                self.ll.calc_grad(self.geom.atom_pos, self.refine_xyz, self.adp_mode, self.refine_occ,
+                                  self.refine_h, self.geom.geom.specials)
         else:
             ll = 0
@@ -372,19 +601,6 @@ class Refine:
             M = scipy.sparse.diags(rdiag)
             dx, self.gamma = cgsolve.cgsolve_rm(A=am, v=vn, M=M, gamma=self.gamma)
-        if self.refine_xyz:
-            dxx = dx[:len(self.atoms)*3]
-            #logger.writeln("dx = {}".format(dxx))
-            logger.writeln("min(dx) = {}".format(numpy.min(dxx)))
-            logger.writeln("max(dx) = {}".format(numpy.max(dxx)))
-            logger.writeln("mean(dx)= {}".format(numpy.mean(dxx)))
-        if self.adp_mode > 0: # TODO for aniso
-            db = dx[len(self.atoms)*3 if self.refine_xyz else 0:]
-            #logger.writeln("dB = {}".format(db))
-            logger.writeln("min(dB) = {}".format(numpy.min(db)))
-            logger.writeln("max(dB) = {}".format(numpy.max(db)))
-            logger.writeln("mean(dB)= {}".format(numpy.mean(db)))
         if 0: # to check hessian scale
             with open("minimise_line.dat", "w") as ofs:
                 ofs.write("s f\n")
@@ -415,7 +631,7 @@ class Refine:
         self.print_weights()
         stats = [{"Ncyc": 0}]
         self.geom.setup_nonbonded(self.refine_xyz)
-        self.geom.geom.setup_target(self.refine_xyz, self.adp_mode)
+        self.geom.geom.setup_target(self.refine_xyz, self.adp_mode, self.refine_occ)
         logger.writeln("vdws = {}".format(len(self.geom.geom.vdws)))
         if self.refine_xyz and not self.unrestrained:
             stats[-1]["geom"] = self.geom.show_model_stats(show_outliers=True)["summary"]
@@ -429,11 +645,15 @@ class Refine:
             show_binstats(llstats["bin_stats"], 0)
         if self.adp_mode > 0:
             utils.model.adp_analysis(self.st)
+        occ_refine_flag = self.ll is not None and self.geom.group_occ.groups and self.geom.group_occ.ncycle > 0
         for i in range(ncycles):
             logger.writeln("\n====== CYCLE {:2d} ======\n".format(i+1))
-            is_ok, shift_scale, fval = self.run_cycle(weight=weight)
-            stats.append({"Ncyc": i+1, "shift_scale": shift_scale, "fval": fval, "fval_decreased": is_ok})
+            if self.refine_xyz or self.adp_mode > 0:
+                is_ok, shift_scale, fval = self.run_cycle(weight=weight)
+                stats.append({"Ncyc": len(stats), "shift_scale": shift_scale, "fval": fval, "fval_decreased": is_ok})
+            if occ_refine_flag:
+                stats[-1]["occ_refine"] = self.geom.group_occ.refine(self.ll, self.refine_h)
             if debug: utils.fileio.write_model(self.st, "refined_{:02d}".format(i+1), pdb=True)#, cif=True)
             if self.refine_xyz and not self.unrestrained:
                 stats[-1]["geom"] = self.geom.show_model_stats(show_outliers=(i==ncycles-1))["summary"]

servalcat/refine/refine_geom.py CHANGED Viewed

@@ -35,11 +35,14 @@ def add_arguments(parser):
                         help='Automatically add links')
     parser.add_argument('--randomize', type=float, default=0,
                         help='Shake coordinates with specified rmsd')
+    parser.add_argument('--ncsr', action='store_true',
+                        help='Use local NCS restraints')
     parser.add_argument('--keywords', nargs='+', action="append",
                         help="refmac keyword(s)")
     parser.add_argument('--keyword_file', nargs='+', action="append",
                         help="refmac keyword file(s)")
-    parser.add_argument('-o','--output_prefix')
+    parser.add_argument('-o','--output_prefix',
+                        help="Output prefix")
 # add_arguments()
@@ -49,6 +52,29 @@ def parse_args(arg_list):
     return parser.parse_args(arg_list)
 # parse_args()
+def add_program_info_to_dictionary(block, comp_id, program_name="servalcat", descriptor="optimization tool"):
+    tab = block.find("_pdbx_chem_comp_description_generator.", ["program_name", "program_version", "descriptor"])
+    # just overwrite version if it's there
+    for row in tab:
+        if row.str(0) == program_name and row.str(2) == descriptor:
+            row[1] = gemmi.cif.quote(servalcat.__version__)
+            return
+    loop = tab.loop
+    if not loop:
+        loop = block.init_loop("_pdbx_chem_comp_description_generator.", ["comp_id",
+                                                                          "program_name",
+                                                                          "program_version",
+                                                                          "descriptor"])
+    tags = [x[x.index(".")+1:] for x in loop.tags]
+    row = ["" for _ in range(len(tags))]
+    for tag, val in (("comp_id", comp_id),
+                     ("program_name", program_name),
+                     ("program_version", servalcat.__version__),
+                     ("descriptor", descriptor)):
+        if tag in tags: row[tags.index(tag)] = val
+    loop.add_row(gemmi.cif.quote_list(row))
+# add_program_info_to_dictionary()
 def refine_and_update_dictionary(cif_in, monomer_dir, output_prefix, randomize=0, ncycle1=10, ncycle2=30):
     doc = gemmi.cif.read(cif_in)
     for block in doc: # this block will be reused below
@@ -59,55 +85,52 @@ def refine_and_update_dictionary(cif_in, monomer_dir, output_prefix, randomize=0
     monlib = utils.restraints.load_monomer_library(st, monomer_dir=monomer_dir, # monlib is needed for ener_lib
                                                    cif_files=[cif_in],
                                                    stop_for_unknowns=True)
-    try:
-        topo, _ = utils.restraints.prepare_topology(st, monlib, h_change=gemmi.HydrogenChange.ReAdd,
-                                                    check_hydrogen=False)
-    except RuntimeError as e:
-        raise SystemExit("Error: {}".format(e))
+    all_stats = []
+    for i_macro in 0, 1:
+        try:
+            topo, _ = utils.restraints.prepare_topology(st, monlib, h_change=[gemmi.HydrogenChange.Remove, gemmi.HydrogenChange.ReAdd][i_macro],
+                                                        check_hydrogen=(i_macro == 1))
+        except RuntimeError as e:
+            raise SystemExit("Error: {}".format(e))
-    geom = Geom(st, topo, monlib, shake_rms=randomize)
-    refiner = Refine(st, geom)
-    logger.writeln("Running {} cycles with wchir=4 wvdw=2".format(ncycle1))
-    geom.calc_kwds["wchir"] = 4
-    geom.calc_kwds["wvdw"] = 2
-    refiner.run_cycles(ncycle1)
+        geom = Geom(st, topo, monlib, shake_rms=randomize)
+        refiner = Refine(st, geom)
+        logger.writeln("Running {} cycles with wchir=4 wvdw=2 {} hydrogen".format(ncycle1, ["without","with"][i_macro]))
+        geom.calc_kwds["wchir"] = 4
+        geom.calc_kwds["wvdw"] = 2
+        all_stats.append(refiner.run_cycles(ncycle1))
-    # re-add hydrogen may help
-    topo = gemmi.prepare_topology(st, monlib, h_change=gemmi.HydrogenChange.ReAdd,
-                                  warnings=logger)
-    geom = Geom(st, topo, monlib)
-    refiner = Refine(st, geom)
-    logger.writeln("Running {} cycles with wchir=1 wvdw=2".format(ncycle2))
-    geom.calc_kwds["wchir"] = 1
-    geom.calc_kwds["wvdw"] = 2
-    refiner.run_cycles(ncycle2)
+        logger.writeln("Running {} cycles with wchir=1 wvdw=2 {} hydrogen".format(ncycle2, ["without","with"][i_macro]))
+        geom.calc_kwds["wchir"] = 1
+        geom.calc_kwds["wvdw"] = 2
+        all_stats.append(refiner.run_cycles(ncycle2))
     # replace xyz
     pos = {cra.atom.name: cra.atom.pos.tolist() for cra in refiner.st[0].all()}
-    for row in block.find("_chem_comp_atom.", ["atom_id", "x", "y", "z"]):
+    for row in block.find("_chem_comp_atom.", ["atom_id", "?x", "?y", "?z",
+                                               "?pdbx_model_Cartn_x_ideal",
+                                               "?pdbx_model_Cartn_y_ideal",
+                                               "?pdbx_model_Cartn_z_ideal"]):
         p = pos[row.str(0)]
         for i in range(3):
-            row[i+1] = "{:.3f}".format(p[i])
+            if row.has(i+1):
+                row[i+1] = "{:.3f}".format(p[i])
+            if row.has(i+4):
+                row[i+4] = "{:.3f}".format(p[i])
     # add description
-    loop = block.find_loop("_pdbx_chem_comp_description_generator.comp_id").get_loop()
-    if not loop:
-        loop = block.init_loop("_pdbx_chem_comp_description_generator.", ["comp_id",
-                                                                          "program_name",
-                                                                          "program_version",
-                                                                          "descriptor"])
-    tags = [x[x.index(".")+1:] for x in loop.tags]
-    row = ["" for _ in range(len(tags))]
-    for tag, val in (("comp_id", st[0][0][0].name),
-                     ("program_name", "servalcat"),
-                     ("program_version", servalcat.__version__),
-                     ("descriptor", "optimization tool")):
-        if tag in tags: row[tags.index(tag)] = val
-    loop.add_row(gemmi.cif.quote_list(row))
+    add_program_info_to_dictionary(block, st[0][0][0].name)
     doc.write_file(output_prefix + "_updated.cif", style=gemmi.cif.Style.Aligned)
+    logger.writeln("Updated dictionary saved: {}".format(output_prefix + "_updated.cif"))
+    with open(output_prefix + "_stats.json", "w") as ofs:
+        for stats in all_stats:
+            for s in stats:
+                s["geom"] = s["geom"].to_dict()
+        json.dump(all_stats, ofs, indent=2)
+        logger.writeln("Refinement statistics saved: {}".format(ofs.name))
 # refine_and_update_dictionary()
 def refine_geom(model_in, monomer_dir, cif_files, h_change, ncycle, output_prefix, randomize, refmac_keywords,
-                find_links=False):
+                find_links=False, use_ncsr=False):
     st = utils.fileio.read_structure(model_in)
     utils.model.setup_entities(st, clear=True, force_subchain_names=True, overwrite_entity_type=True)
     if st.ncs:
@@ -126,7 +149,11 @@ def refine_geom(model_in, monomer_dir, cif_files, h_change, ncycle, output_prefi
     except RuntimeError as e:
         raise SystemExit("Error: {}".format(e))
     refmac_keywords = metal_kws + refmac_keywords
-    geom = Geom(st, topo, monlib, shake_rms=randomize, refmac_keywords=refmac_keywords)
+    if use_ncsr:
+        ncslist = utils.restraints.prepare_ncs_restraints(st)
+    else:
+        ncslist = False
+    geom = Geom(st, topo, monlib, shake_rms=randomize, refmac_keywords=refmac_keywords, ncslist=ncslist)
     refiner = Refine(st, geom)
     stats = refiner.run_cycles(ncycle)
     refiner.st.name = output_prefix
@@ -159,7 +186,8 @@ def main(args):
                     output_prefix=args.output_prefix,
                     randomize=args.randomize,
                     refmac_keywords=keywords,
-                    find_links=args.find_links)
+                    find_links=args.find_links,
+                    use_ncsr=args.ncsr)
     else:
         if not args.output_prefix:
             args.output_prefix = decide_prefix(args.update_dictionary)