PyPI - modelcraft - Versions diffs - 5.0.2__py3-none-any.whl → 6.0.0__py3-none-any.whl - Mend

modelcraft 5.0.2py3-none-any.whl → 6.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

modelcraft/__init__.py +16 -31
modelcraft/__main__.py +0 -1
modelcraft/arguments.py +35 -7
modelcraft/combine.py +22 -41
modelcraft/contents.py +188 -164
modelcraft/environ.py +0 -7
modelcraft/geometry.py +39 -27
modelcraft/job.py +6 -5
modelcraft/jobs/acedrg.py +2 -0
modelcraft/jobs/buccaneer.py +22 -4
modelcraft/jobs/comit.py +2 -0
modelcraft/jobs/ctruncate.py +3 -1
modelcraft/jobs/emda.py +2 -0
modelcraft/jobs/findwaters.py +2 -0
modelcraft/jobs/freerflag.py +2 -0
modelcraft/jobs/libg.py +2 -0
modelcraft/jobs/molrep.py +2 -0
modelcraft/jobs/nautilus.py +28 -14
modelcraft/jobs/nucleofind.py +88 -0
modelcraft/jobs/parrot.py +13 -2
modelcraft/jobs/phasematch.py +2 -1
modelcraft/jobs/refmac.py +3 -1
modelcraft/jobs/servalcat.py +38 -4
modelcraft/jobs/sheetbend.py +2 -0
modelcraft/modelcraftem.py +49 -6
modelcraft/modelcraftxray.py +90 -42
modelcraft/monlib.py +55 -52
modelcraft/pdbe.py +54 -0
modelcraft/pipeline.py +1 -1
modelcraft/prune.py +69 -0
modelcraft/reflections.py +11 -1
modelcraft/scripts/contents.py +5 -215
modelcraft/scripts/copies.py +26 -17
modelcraft/scripts/modelcraft.py +1 -0
modelcraft/scripts/sidechains.py +141 -0
modelcraft/scripts/validate.py +81 -0
modelcraft/sequence.py +106 -0
modelcraft/solvent.py +42 -113
modelcraft/structure.py +64 -41
modelcraft/tests/ccp4/__init__.py +7 -11
modelcraft/tests/ccp4/test_acedrg.py +2 -0
modelcraft/tests/ccp4/test_arguments.py +3 -0
modelcraft/tests/ccp4/test_buccaneer.py +3 -2
modelcraft/tests/ccp4/test_cell.py +4 -1
modelcraft/tests/ccp4/test_comit.py +2 -0
modelcraft/tests/ccp4/test_contents.py +99 -17
modelcraft/tests/ccp4/test_copies.py +1 -0
modelcraft/tests/ccp4/test_ctruncate.py +2 -0
modelcraft/tests/ccp4/test_findwaters.py +2 -0
modelcraft/tests/ccp4/test_freerflag.py +2 -0
modelcraft/tests/ccp4/test_libg.py +1 -0
modelcraft/tests/ccp4/test_molrep.py +3 -0
modelcraft/tests/ccp4/test_monlib.py +75 -45
modelcraft/tests/ccp4/test_nautilus.py +5 -3
modelcraft/tests/ccp4/test_nucleofind.py +62 -0
modelcraft/tests/ccp4/test_parrot.py +3 -1
modelcraft/tests/ccp4/test_phasematch.py +2 -0
modelcraft/tests/ccp4/test_prune.py +17 -0
modelcraft/tests/ccp4/test_reflections.py +110 -1
modelcraft/tests/ccp4/test_refmac.py +3 -0
modelcraft/tests/{unittests/test_contents.py → ccp4/test_sequence.py} +5 -12
modelcraft/tests/ccp4/test_servalcat.py +52 -0
modelcraft/tests/ccp4/test_sheetbend.py +4 -3
modelcraft/tests/ccp4/test_sidechains.py +25 -0
modelcraft/tests/ccp4/test_solvent.py +12 -26
modelcraft/tests/ccp4/test_structure.py +1 -0
modelcraft/tests/ccp4/test_validation.py +19 -0
modelcraft/tests/ccp4/test_xray.py +12 -6
modelcraft/tests/ccpem/test_em.py +3 -0
modelcraft/tests/ccpem/test_emda.py +2 -0
modelcraft/tests/ccpem/test_refmac.py +1 -0
modelcraft/tests/ccpem/test_servalcat.py +4 -3
modelcraft/utils.py +16 -4
modelcraft/validation.py +101 -0
modelcraft-6.0.0.dist-info/METADATA +76 -0
modelcraft-6.0.0.dist-info/RECORD +85 -0
{modelcraft-5.0.2.dist-info → modelcraft-6.0.0.dist-info}/WHEEL +1 -1
{modelcraft-5.0.2.dist-info → modelcraft-6.0.0.dist-info}/entry_points.txt +2 -0
modelcraft/coot/prune.py +0 -1085
modelcraft/coot/sidechains.py +0 -68
modelcraft/jobs/acorn.py +0 -114
modelcraft/jobs/coot.py +0 -104
modelcraft/tests/ccp4/test_coot.py +0 -29
modelcraft/tests/ccp4/test_geometry.py +0 -20
modelcraft/tests/unittests/__init__.py +0 -0
modelcraft/tests/unittests/test_reflections.py +0 -101
modelcraft-5.0.2.dist-info/LICENSE +0 -504
modelcraft-5.0.2.dist-info/METADATA +0 -48
modelcraft-5.0.2.dist-info/RECORD +0 -82
{modelcraft-5.0.2.dist-info → modelcraft-6.0.0.dist-info}/top_level.txt +0 -0

modelcraft/contents.py CHANGED Viewed

@@ -1,63 +1,43 @@
-import enum
+import abc
+import functools
 import json
-PROTEIN_CODES = {
-    "A": "ALA",
-    "B": "ASX",
-    "C": "CYS",
-    "D": "ASP",
-    "E": "GLU",
-    "F": "PHE",
-    "G": "GLY",
-    "H": "HIS",
-    "I": "ILE",
-    "K": "LYS",
-    "L": "LEU",
-    "M": "MET",
-    "N": "ASN",
-    "O": "PYL",
-    "P": "PRO",
-    "Q": "GLN",
-    "R": "ARG",
-    "S": "SER",
-    "T": "THR",
-    "U": "SEC",
-    "V": "VAL",
-    "W": "TRP",
-    "X": "UNK",
-    "Y": "TYR",
-    "Z": "GLX",
-}
-RNA_CODES = {
-    "A": "A",
-    "C": "C",
-    "G": "G",
-    "I": "I",
-    "U": "U",
-    "X": "N",
-}
-DNA_CODES = {
-    "A": "DA",
-    "C": "DC",
-    "G": "DG",
-    "I": "DI",
-    "T": "DT",
-    "U": "DU",
-    "X": "DN",
-}
-PIR_CODES = {"D1", "DC", "DL", "F1", "N1", "N3", "P1", "RC", "RL", "XX"}
-class PolymerType(enum.Enum):
-    PROTEIN = "PROTEIN"
-    RNA = "RNA"
-    DNA = "DNA"
-class Polymer:
+import math
+from . import pdbe
+from .monlib import MonLib
+from .sequence import PolymerType, sequences_in_file
+BUFFERS = {"12P", "144", "15P", "16D", "1BO", "1PS", "2OS", "3CO", "3NI", "ACA", "ACN"}
+BUFFERS |= {"ACT", "ACY", "AG", "AGC", "AL", "AZI", "B3P", "B7G", "BA", "BCN", "BE7"}
+BUFFERS |= {"BEQ", "BGC", "BMA", "BNG", "BOG", "BR", "BRO", "BTB", "BTC", "BU1", "BU2"}
+BUFFERS |= {"BU3", "C10", "C15", "C8E", "CA", "CAC", "CBM", "CBX", "CCN", "CD", "CE1"}
+BUFFERS |= {"CIT", "CL", "CLO", "CM", "CM5", "CN", "CO", "CPS", "CRY", "CS", "CU"}
+BUFFERS |= {"CU1", "CXE", "CYN", "CYS", "DDQ", "DHD", "DIA", "DIO", "DMF", "DMS", "DMU"}
+BUFFERS |= {"DMX", "DOX", "DPR", "DR6", "DXG", "EDO", "EEE", "EGL", "EOH", "ETF", "F"}
+BUFFERS |= {"FCL", "FCY", "FE", "FE2", "FLO", "FMT", "FRU", "GBL", "GCD", "GLC", "GLO"}
+BUFFERS |= {"GLY", "GOL", "GPX", "HEZ", "HG", "HTG", "HTO", "ICI", "ICT", "IDO", "IDT"}
+BUFFERS |= {"IOD", "IOH", "IPA", "IPH", "JEF", "K", "LAK", "LAT", "LBT", "LDA", "LI"}
+BUFFERS |= {"LMT", "MA4", "MAN", "MG", "MG8", "MHA", "MN", "MN3", "MOH", "MPD", "MPO"}
+BUFFERS |= {"MRD", "MRY", "MTL", "N8E", "NA", "NCO", "NH4", "NHE", "NI", "NO3", "OTE"}
+BUFFERS |= {"P33", "P4C", "PB", "PDO", "PE4", "PE7", "PE8", "PEU", "PG5", "PG6", "PGE"}
+BUFFERS |= {"PGO", "PGQ", "PGR", "PIG", "PIN", "POL", "RB", "SAL", "SBT", "SCN", "SDS"}
+BUFFERS |= {"SO4", "SOR", "SPD", "SPK", "SPM", "SR", "SUC", "SUL", "SYL", "TAR", "TAU"}
+BUFFERS |= {"TBU", "TEP", "TFP", "TLA", "TMA", "TRE", "TRS", "TRT", "UMQ", "UNX", "URE"}
+BUFFERS |= {"XPE", "Y1", "YT3", "ZN", "ZN2"}
+@functools.cache
+def is_buffer(code: str) -> bool:
+    return code.upper() in BUFFERS
+class Component(abc.ABC):
+    @abc.abstractmethod
+    def volume(self, monlib: MonLib):
+        pass
+class Polymer(Component):
     def __init__(
         self,
         sequence: str,
@@ -67,29 +47,54 @@ class Polymer:
     ):
         self.sequence = sequence.upper()
         self.stoichiometry = stoichiometry
-        self.type = polymer_type or guess_sequence_type(self.sequence)
+        self.type = polymer_type or PolymerType.guess(self.sequence)
         self.modifications = modifications or []
-    def __eq__(self, other) -> bool:
-        if isinstance(other, Polymer):
-            return (
-                self.sequence == other.sequence
-                and self.type == other.type
-                and self.modifications == other.modifications
-            )
-        return NotImplemented
+    def __str__(self) -> str:
+        s = f"{self.type.name} with {len(self.sequence)} residues: "
+        if len(self.sequence) > 9:
+            s += f"{self.sequence[:3]}...{self.sequence[-3:]}"
+        else:
+            s += f"{self.sequence:9}"
+        return s
     @classmethod
-    def from_json(cls, component: dict) -> "Polymer":
+    def from_json(cls, component: dict, polymer_type: PolymerType) -> "Polymer":
         return cls(
             sequence=component["sequence"],
             stoichiometry=component.get("stoichiometry"),
+            polymer_type=polymer_type,
             modifications=component.get("modifications"),
         )
+    @classmethod
+    def from_pdbe(cls, mol: dict, polymer_type: PolymerType) -> "Polymer":
+        mod_indices = {}
+        for index, mod in mol["pdb_sequence_indices_with_multiple_residues"].items():
+            code1 = mod["one_letter_code"]
+            code3 = mod["three_letter_code"]
+            if code3 not in ("DA", "DC", "DG", "DT"):
+                key = code1, code3
+                mod_indices.setdefault(key, []).append(index)
+        modifications = []
+        for (code1, code3), indices in mod_indices.items():
+            total = mol["sequence"].count(code1)
+            if code1 == "M" and mol["sequence"][0] == "M":
+                total -= 1
+            if len(indices) >= total:
+                modifications.append(f"{code1}->{code3}")
+            else:
+                modifications.extend(f"{index}->{code3}" for index in indices)
+        return cls(
+            sequence=mol["sequence"],
+            stoichiometry=mol["number_of_copies"],
+            polymer_type=polymer_type,
+            modifications=modifications,
+        )
     @classmethod
     def from_sequence_file(cls, path: str, polymer_type: PolymerType = None):
-        with open(path) as stream:
+        with open(path, encoding="utf-8") as stream:
             contents = stream.read()
             for sequence in sequences_in_file(contents=contents):
                 yield cls(sequence=sequence, polymer_type=polymer_type)
@@ -102,7 +107,7 @@ class Polymer:
         }
     def residue_codes(self, modified: bool = True) -> list:
-        codes = [code1_to_code3(code1, self.type) for code1 in self.sequence]
+        codes = self.type.parse(self.sequence)
         if modified:
             for mod in self.modifications:
                 source, code = mod.split("->")
@@ -118,16 +123,27 @@ class Polymer:
     def is_selenomet(self) -> bool:
         return "M->MSE" in self.modifications
+    def weight(self, monlib: MonLib) -> float:
+        codes = self.residue_codes(modified=False)
+        weight = sum(monlib.weight(code) for code in codes)
+        weight -= monlib.weight("HOH") * (len(codes) - 1)
+        return weight
+    def volume(self, monlib: MonLib) -> float:
+        density = 1.35 if self.type == PolymerType.PROTEIN else 2.0
+        return self.weight(monlib) / (density * 0.602214)
-class Carb:
+class Carb(Component):
     def __init__(self, codes: dict, stoichiometry: int = None):
         self.codes = codes
         self.stoichiometry = stoichiometry
-    def __eq__(self, other) -> bool:
-        if isinstance(other, Carb):
-            return self.codes == other.codes
-        return NotImplemented
+    def __str__(self) -> str:
+        s = "Carb:"
+        for code, count in self.codes.items():
+            s += f" {count}x{code}"
+        return s
     @classmethod
     def from_json(cls, component: dict) -> "Carb":
@@ -136,19 +152,30 @@ class Carb:
             stoichiometry=component.get("stoichiometry"),
         )
+    @classmethod
+    def from_pdbe(cls, mol: dict) -> "Carb":
+        codes = mol["carb_codes"]
+        length = sum(codes.values())
+        stoichiometry = mol["number_of_copies"] // length
+        return cls(codes=codes, stoichiometry=stoichiometry)
     def to_json(self) -> dict:
         return {"codes": self.codes, "stoichiometry": self.stoichiometry}
+    def volume(self, monlib: MonLib) -> float:
+        monomers = sum(self.codes.values())
+        volume = sum(monlib.volume(code) for code in self.codes)
+        volume -= monomers * monlib.volume("HOH")
+        return volume
-class Ligand:
+class Ligand(Component):
     def __init__(self, code: str, stoichiometry: int = None):
         self.code = code
         self.stoichiometry = stoichiometry
-    def __eq__(self, other) -> bool:
-        if isinstance(other, Ligand):
-            return self.code == other.code
-        return NotImplemented
+    def __str__(self) -> str:
+        return f"Ligand: {self.code}"
     @classmethod
     def from_json(cls, component: dict) -> "Ligand":
@@ -157,9 +184,16 @@ class Ligand:
             stoichiometry=component.get("stoichiometry"),
         )
+    @classmethod
+    def from_pdbe(cls, mol: dict) -> "Ligand":
+        return cls(code=mol["chem_comp_ids"][0], stoichiometry=mol["number_of_copies"])
     def to_json(self) -> dict:
         return {"code": self.code, "stoichiometry": self.stoichiometry}
+    def volume(self, monlib: MonLib) -> float:
+        return monlib.volume(self.code)
 class AsuContents:
     def __init__(
@@ -171,7 +205,6 @@ class AsuContents:
         carbs: list = None,
         ligands: list = None,
         buffers: list = None,
-        smiles: dict = None,
     ):
         self.copies = copies
         self.proteins = proteins or []
@@ -180,7 +213,6 @@ class AsuContents:
         self.carbs = carbs or []
         self.ligands = ligands or []
         self.buffers = buffers or []
-        self.smiles = smiles or {}
     @classmethod
     def from_file(cls, path: str) -> "AsuContents":
@@ -190,31 +222,26 @@ class AsuContents:
     @classmethod
     def from_json_file(cls, path: str) -> "AsuContents":
-        contents = cls()
-        with open(path) as stream:
-            contents_json = json.load(stream)
-        contents.copies = contents_json.get("copies")
-        for obj in contents_json.get("proteins") or []:
-            polymer = Polymer.from_json(obj)
-            polymer.type = PolymerType.PROTEIN
-            contents.proteins.append(polymer)
-        for obj in contents_json.get("rnas") or []:
-            polymer = Polymer.from_json(obj)
-            polymer.type = PolymerType.RNA
-            contents.rnas.append(polymer)
-        for obj in contents_json.get("dnas") or []:
-            polymer = Polymer.from_json(obj)
-            polymer.type = PolymerType.DNA
-            contents.dnas.append(polymer)
-        for obj in contents_json.get("carbs") or []:
-            carb = Carb.from_json(obj)
-            contents.carbs.append(carb)
-        for obj in contents_json.get("ligands") or []:
-            ligand = Ligand.from_json(obj)
-            contents.ligands.append(ligand)
-        contents.buffers = contents_json.get("buffers") or []
-        contents.smiles = contents_json.get("smiles") or []
-        return contents
+        with open(path, encoding="utf-8") as stream:
+            contents = json.load(stream)
+        return cls(
+            copies=contents.get("copies"),
+            proteins=[
+                Polymer.from_json(obj, PolymerType.PROTEIN)
+                for obj in contents.get("proteins", [])
+            ],
+            rnas=[
+                Polymer.from_json(obj, PolymerType.RNA)
+                for obj in contents.get("rnas", [])
+            ],
+            dnas=[
+                Polymer.from_json(obj, PolymerType.DNA)
+                for obj in contents.get("dnas", [])
+            ],
+            carbs=[Carb.from_json(obj) for obj in contents.get("carbs", [])],
+            ligands=[Ligand.from_json(obj) for obj in contents.get("ligands", [])],
+            buffers=contents.get("buffers", []),
+        )
     @classmethod
     def from_sequence_file(
@@ -233,20 +260,67 @@ class AsuContents:
         if polymer.type == PolymerType.DNA:
             self.dnas.append(polymer)
+    @classmethod
+    def from_pdbe(cls, entry_id: str) -> "AsuContents":
+        contents = cls(copies=1)
+        for mol in pdbe.molecule_dicts(entry_id):
+            molecule_type = mol["molecule_type"].lower()
+            if "polypeptide" in molecule_type:
+                protein = Polymer.from_pdbe(mol, PolymerType.PROTEIN)
+                contents.proteins.append(protein)
+            elif "polyribonucleotide" in molecule_type:
+                rna = Polymer.from_pdbe(mol, PolymerType.RNA)
+                contents.rnas.append(rna)
+            elif "polydeoxyribonucleotide" in molecule_type:
+                dna = Polymer.from_pdbe(mol, PolymerType.DNA)
+                contents.dnas.append(dna)
+            elif "carbohydrate" in molecule_type:
+                carb = Carb.from_pdbe(mol)
+                contents.carbs.append(carb)
+            elif "bound" in molecule_type:
+                ligand = Ligand.from_pdbe(mol)
+                if is_buffer(ligand.code):
+                    contents.buffers.append(ligand.code)
+                else:
+                    contents.ligands.append(ligand)
+        contents.divide_stoichiometry()
+        return contents
+    def components(self) -> list[Component]:
+        return self.proteins + self.rnas + self.dnas + self.carbs + self.ligands
+    def divide_stoichiometry(self):
+        counts = []
+        for component in self.components():
+            if component.stoichiometry is not None:
+                counts.append(component.stoichiometry)
+        if len(counts) > 0:
+            if len(counts) > 1:
+                divisor = functools.reduce(math.gcd, counts)
+            else:
+                divisor = counts[0]
+            if divisor > 1:
+                self.copies *= divisor
+                for component in self.components():
+                    component.stoichiometry //= divisor
     def monomer_codes(self) -> set:
         codes = set()
         for polymer in self.proteins + self.rnas + self.dnas:
-            codes.update(set(polymer.residue_codes(modified=True)))
+            codes |= set(polymer.residue_codes(modified=True))
         for carb in self.carbs:
-            codes.update(set(carb.codes.keys()))
+            codes |= set(carb.codes.keys())
         for ligand in self.ligands:
             codes.add(ligand.code)
-        codes.update(set(self.buffers))
+        codes |= set(self.buffers)
         return codes
     def is_selenomet(self) -> bool:
         return len(self.proteins) > 0 and all(p.is_selenomet() for p in self.proteins)
+    def volume(self, monlib: MonLib) -> float:
+        return sum(c.volume(monlib) * (c.stoichiometry or 1) for c in self.components())
     def to_json(self) -> list:
         return {
             "copies": self.copies,
@@ -256,68 +330,18 @@ class AsuContents:
             "carbs": [carb.to_json() for carb in self.carbs],
             "ligands": [ligand.to_json() for ligand in self.ligands],
             "buffers": self.buffers,
-            "smiles": self.smiles,
         }
     def write_json_file(self, path: str) -> None:
-        with open(path, "w") as stream:
+        with open(path, "w", encoding="utf-8") as stream:
             json.dump(self.to_json(), stream, indent=2)
     def write_sequence_file(
         self, path: str, types: list = None, line_length: int = 60
     ) -> None:
-        with open(path, "w") as stream:
+        with open(path, "w", encoding="utf-8") as stream:
             for polymer in self.proteins + self.rnas + self.dnas:
                 if types is None or polymer.type in types:
-                    stream.write(f">{polymer.type.value}\n")
+                    stream.write(f">{polymer.type.name}\n")
                     for i in range(0, len(polymer.sequence), line_length):
                         stream.write(polymer.sequence[i : i + line_length] + "\n")
-def code1_to_code3(code1: str, polymer_type: PolymerType) -> str:
-    return {
-        PolymerType.PROTEIN: PROTEIN_CODES.get(code1) or PROTEIN_CODES["X"],
-        PolymerType.RNA: RNA_CODES.get(code1) or RNA_CODES["X"],
-        PolymerType.DNA: DNA_CODES.get(code1) or DNA_CODES["X"],
-    }[polymer_type]
-def guess_sequence_type(sequence: str) -> PolymerType:
-    codes = set(sequence)
-    if "U" in codes:
-        return PolymerType.RNA
-    if codes & set("DEFHIKLMNPQRSVWY"):
-        return PolymerType.PROTEIN
-    if codes == {"A"}:
-        return PolymerType.PROTEIN
-    if codes == {"G"}:
-        return PolymerType.PROTEIN
-    if "T" in codes:
-        return PolymerType.DNA
-    return PolymerType.RNA
-def sequences_in_file(contents: str) -> list:
-    sequence = ""
-    sequences = []
-    skip_line = False
-    skip_lines = False
-    lines = contents.splitlines(keepends=False)
-    for line in lines:
-        if skip_line:
-            skip_line = False
-            continue
-        if line[:1] == ">":
-            if len(sequence) > 0:
-                sequences.append(sequence)
-            sequence = ""
-            if line[1:3] in PIR_CODES and line[3:4] == ";":
-                skip_line = True
-            skip_lines = False
-        elif line[:1] != ";" and not skip_lines:
-            sequence += "".join(c for c in line if c.isalpha())
-            if line[-1:] == "*":
-                skip_lines = True
-    if len(sequence) > 0:
-        sequences.append(sequence)
-    return sequences

modelcraft/environ.py CHANGED Viewed

@@ -6,10 +6,3 @@ def setup_environ():
         if variable not in os.environ:
             raise EnvironmentError(variable + " environment variable not set")
     os.environ["LD_LIBRARY_PATH"] = os.environ["CLIB"]
-    os.environ["COOT_N_THREADS"] = "1"
-    os.environ["GOTO_NUM_THREADS"] = "1"
-    os.environ["MKL_NUM_THREADS"] = "1"
-    os.environ["NUMEXPR_NUM_THREADS"] = "1"
-    os.environ["OMP_NUM_THREADS"] = "1"
-    os.environ["OPENBLAS_NUM_THREADS"] = "1"
-    os.environ["VECLIB_MAXIMUM_THREADS"] = "1"

modelcraft/geometry.py CHANGED Viewed

@@ -1,32 +1,44 @@
-import math
-import os
+import collections
 import gemmi
-from modelcraft.monlib import in_library
+import numpy as np
+from .monlib import MonLib
+def per_residue_geometry_rmsz(
+    structure: gemmi.Structure, monlib: MonLib, model_index: int = 0
+) -> dict:
+    atom_zs = _atom_zs(structure, monlib, model_index)
+    rv = {}
+    for chain in structure[model_index]:
+        for residue in chain:
+            zs = np.concatenate([atom_zs.get(atom.serial, []) for atom in residue])
+            rmsz = np.sqrt(np.mean(np.square(zs))) if len(zs) > 0 else np.nan
+            rv[(chain.name, str(residue.seqid))] = rmsz
+    return rv
-def rmsz(structure: gemmi.Structure) -> float:
-    codes = [code for code in structure[0].get_all_residue_names() if in_library(code)]
-    monlib = gemmi.read_monomer_lib(os.environ["CLIBD_MON"], codes)
-    devnull = open(os.devnull, "w")
-    topology = gemmi.prepare_topology(structure, monlib, warnings=devnull)
-    num_of_squares = 0
-    sum_of_squares = 0.0
-    for bond in topology.bonds:
-        num_of_squares += 1
-        sum_of_squares += bond.calculate_z() ** 2
-    for angle in topology.angles:
-        num_of_squares += 1
-        sum_of_squares += angle.calculate_z() ** 2
-    for torsion in topology.torsions:
-        if torsion.restr.esd > 0:
-            num_of_squares += 1
-            sum_of_squares += torsion.calculate_z() ** 2
-    for plane in topology.planes:
+def _atom_zs(structure: gemmi.Structure, monlib: MonLib, model_index: int) -> dict:
+    structure.assign_serial_numbers()
+    topo = gemmi.prepare_topology(structure, monlib, model_index)
+    atom_zs = collections.defaultdict(list)
+    for bond in topo.bonds:
+        z = bond.calculate_z()
+        for atom in bond.atoms:
+            atom_zs[atom.serial].append(z)
+    for angle in topo.angles:
+        z = angle.calculate_z()
+        for atom in angle.atoms:
+            atom_zs[atom.serial].append(z)
+    for torsion in topo.torsions:
+        if torsion.restr.esd > 0:  # Some torsions are only restrained by planes
+            z = torsion.calculate_z()
+            for atom in torsion.atoms:
+                atom_zs[atom.serial].append(z)
+    for plane in topo.planes:
         best_plane = gemmi.find_best_plane(plane.atoms)
-        max_z = 0
         for atom in plane.atoms:
-            distance = gemmi.get_distance_from_plane(atom.pos, best_plane)
-            max_z = max(distance / plane.restr.esd, max_z)
-        num_of_squares += 1
-        sum_of_squares += max_z ** 2
-    return math.sqrt(sum_of_squares / num_of_squares)
+            z = gemmi.get_distance_from_plane(atom.pos, best_plane) / plane.restr.esd
+            atom_zs[atom.serial].append(z)
+    return atom_zs

modelcraft/job.py CHANGED Viewed

@@ -4,8 +4,9 @@ import shutil
 import subprocess
 import textwrap
 import time
 from .pipeline import Pipeline
-from .utils import random_id
+from .utils import puid
 class Job(abc.ABC):
@@ -22,13 +23,13 @@ class Job(abc.ABC):
         if self._exe_path is None:
             raise ValueError(f"Executable '{self._exe_name}' not found")
         if pipeline is None:
-            self._directory = f"job_{self._exe_name}_{random_id(length=20)}"
+            self._directory = f"job_{self._exe_name}_{puid(length=20)}"
         else:
             self._directory = pipeline.next_job_directory(self._exe_name)
             pipeline.report_job_start(self._exe_name)
         os.makedirs(self._directory, exist_ok=True)
         self._setup()
-        with open(self._path("script.sh"), "w") as stream:
+        with open(self._path("script.sh"), "w", encoding="utf-8") as stream:
             stream.write(self._script())
         os.chmod(self._path("script.sh"), 0o755)
         start_time = time.time()
@@ -56,8 +57,8 @@ class Job(abc.ABC):
         pass
     def _run_subprocess(self):
-        with open(self._path("stdout.txt"), "w") as out_stream:
-            with open(self._path("stderr.txt"), "w") as err_stream:
+        with open(self._path("stdout.txt"), "w", encoding="utf-8") as out_stream:
+            with open(self._path("stderr.txt"), "w", encoding="utf-8") as err_stream:
                 process = subprocess.Popen(
                     args=[self._exe_path] + self._args,
                     stdin=subprocess.PIPE if self._stdin else None,

modelcraft/jobs/acedrg.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import dataclasses
 import gemmi
 from ..job import Job

modelcraft/jobs/buccaneer.py CHANGED Viewed

@@ -1,11 +1,14 @@
 import dataclasses
 import os
 import xml.etree.ElementTree as ET
 import gemmi
-from ..contents import AsuContents, PolymerType, PROTEIN_CODES
+from ..contents import AsuContents
 from ..job import Job
 from ..reflections import DataItem, write_mtz
-from ..structure import consecutive_residues, read_structure, write_mmcif
+from ..sequence import PROTEIN_CODES, PolymerType
+from ..structure import read_structure, write_mmcif
 @dataclasses.dataclass
@@ -124,12 +127,27 @@ class Buccaneer(Job):
         )
-def _known_structure_ids(structure: gemmi.Structure) -> list:
+def _known_structure_ids(structure: gemmi.Structure):
     "Known structure IDs for ligands (but not modified residues) with a CA atom"
     protein_residue_names = set(PROTEIN_CODES.values()) | {"MSE", "UNK"}
     for chain in structure[0]:
-        for residues in consecutive_residues(chain):
+        for residues in _consecutive_residues(chain):
             if not any(res.name in protein_residue_names for res in residues):
                 for residue in residues:
                     if "CA" in residue:
                         yield f"/{chain.name}/{str(residue.seqid)}/*/:1.0"
+def _consecutive_residues(chain: gemmi.Chain):
+    "Iterate through lists of residues with consecutive seqnums (first conformer only)"
+    consecutive = []
+    last_seqnum = None
+    for residue in chain.first_conformer():
+        if last_seqnum is None or residue.seqid.num == last_seqnum + 1:
+            consecutive.append(residue)
+        else:
+            yield consecutive
+            consecutive = [residue]
+        last_seqnum = residue.seqid.num
+    if len(consecutive) > 0:
+        yield consecutive

modelcraft/jobs/comit.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import dataclasses
 import gemmi
 from ..job import Job
 from ..reflections import DataItem, write_mtz

modelcraft 5.0.2__py3-none-any.whl → 6.0.0__py3-none-any.whl

modelcraft 5.0.2py3-none-any.whl → 6.0.0py3-none-any.whl