PyPI - modelcraft - Versions diffs - 5.0.2__py3-none-any.whl → 6.0.0__py3-none-any.whl - Mend

modelcraft 5.0.2py3-none-any.whl → 6.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

modelcraft/__init__.py +16 -31
modelcraft/__main__.py +0 -1
modelcraft/arguments.py +35 -7
modelcraft/combine.py +22 -41
modelcraft/contents.py +188 -164
modelcraft/environ.py +0 -7
modelcraft/geometry.py +39 -27
modelcraft/job.py +6 -5
modelcraft/jobs/acedrg.py +2 -0
modelcraft/jobs/buccaneer.py +22 -4
modelcraft/jobs/comit.py +2 -0
modelcraft/jobs/ctruncate.py +3 -1
modelcraft/jobs/emda.py +2 -0
modelcraft/jobs/findwaters.py +2 -0
modelcraft/jobs/freerflag.py +2 -0
modelcraft/jobs/libg.py +2 -0
modelcraft/jobs/molrep.py +2 -0
modelcraft/jobs/nautilus.py +28 -14
modelcraft/jobs/nucleofind.py +88 -0
modelcraft/jobs/parrot.py +13 -2
modelcraft/jobs/phasematch.py +2 -1
modelcraft/jobs/refmac.py +3 -1
modelcraft/jobs/servalcat.py +38 -4
modelcraft/jobs/sheetbend.py +2 -0
modelcraft/modelcraftem.py +49 -6
modelcraft/modelcraftxray.py +90 -42
modelcraft/monlib.py +55 -52
modelcraft/pdbe.py +54 -0
modelcraft/pipeline.py +1 -1
modelcraft/prune.py +69 -0
modelcraft/reflections.py +11 -1
modelcraft/scripts/contents.py +5 -215
modelcraft/scripts/copies.py +26 -17
modelcraft/scripts/modelcraft.py +1 -0
modelcraft/scripts/sidechains.py +141 -0
modelcraft/scripts/validate.py +81 -0
modelcraft/sequence.py +106 -0
modelcraft/solvent.py +42 -113
modelcraft/structure.py +64 -41
modelcraft/tests/ccp4/__init__.py +7 -11
modelcraft/tests/ccp4/test_acedrg.py +2 -0
modelcraft/tests/ccp4/test_arguments.py +3 -0
modelcraft/tests/ccp4/test_buccaneer.py +3 -2
modelcraft/tests/ccp4/test_cell.py +4 -1
modelcraft/tests/ccp4/test_comit.py +2 -0
modelcraft/tests/ccp4/test_contents.py +99 -17
modelcraft/tests/ccp4/test_copies.py +1 -0
modelcraft/tests/ccp4/test_ctruncate.py +2 -0
modelcraft/tests/ccp4/test_findwaters.py +2 -0
modelcraft/tests/ccp4/test_freerflag.py +2 -0
modelcraft/tests/ccp4/test_libg.py +1 -0
modelcraft/tests/ccp4/test_molrep.py +3 -0
modelcraft/tests/ccp4/test_monlib.py +75 -45
modelcraft/tests/ccp4/test_nautilus.py +5 -3
modelcraft/tests/ccp4/test_nucleofind.py +62 -0
modelcraft/tests/ccp4/test_parrot.py +3 -1
modelcraft/tests/ccp4/test_phasematch.py +2 -0
modelcraft/tests/ccp4/test_prune.py +17 -0
modelcraft/tests/ccp4/test_reflections.py +110 -1
modelcraft/tests/ccp4/test_refmac.py +3 -0
modelcraft/tests/{unittests/test_contents.py → ccp4/test_sequence.py} +5 -12
modelcraft/tests/ccp4/test_servalcat.py +52 -0
modelcraft/tests/ccp4/test_sheetbend.py +4 -3
modelcraft/tests/ccp4/test_sidechains.py +25 -0
modelcraft/tests/ccp4/test_solvent.py +12 -26
modelcraft/tests/ccp4/test_structure.py +1 -0
modelcraft/tests/ccp4/test_validation.py +19 -0
modelcraft/tests/ccp4/test_xray.py +12 -6
modelcraft/tests/ccpem/test_em.py +3 -0
modelcraft/tests/ccpem/test_emda.py +2 -0
modelcraft/tests/ccpem/test_refmac.py +1 -0
modelcraft/tests/ccpem/test_servalcat.py +4 -3
modelcraft/utils.py +16 -4
modelcraft/validation.py +101 -0
modelcraft-6.0.0.dist-info/METADATA +76 -0
modelcraft-6.0.0.dist-info/RECORD +85 -0
{modelcraft-5.0.2.dist-info → modelcraft-6.0.0.dist-info}/WHEEL +1 -1
{modelcraft-5.0.2.dist-info → modelcraft-6.0.0.dist-info}/entry_points.txt +2 -0
modelcraft/coot/prune.py +0 -1085
modelcraft/coot/sidechains.py +0 -68
modelcraft/jobs/acorn.py +0 -114
modelcraft/jobs/coot.py +0 -104
modelcraft/tests/ccp4/test_coot.py +0 -29
modelcraft/tests/ccp4/test_geometry.py +0 -20
modelcraft/tests/unittests/__init__.py +0 -0
modelcraft/tests/unittests/test_reflections.py +0 -101
modelcraft-5.0.2.dist-info/LICENSE +0 -504
modelcraft-5.0.2.dist-info/METADATA +0 -48
modelcraft-5.0.2.dist-info/RECORD +0 -82
{modelcraft-5.0.2.dist-info → modelcraft-6.0.0.dist-info}/top_level.txt +0 -0

modelcraft/solvent.py CHANGED Viewed

@@ -1,140 +1,69 @@
 import collections
 import dataclasses
-import functools
 import math
-import re
-import gemmi
-from .contents import AsuContents, Polymer, PolymerType
-from .monlib import chemcomp
-def solvent_fraction(contents: AsuContents, mtz: gemmi.Mtz) -> float:
-    volume = _contents_volume(contents)
-    asu_volume = mtz.cell.volume / len(mtz.spacegroup.operations())
-    copies = contents.copies or _guess_copies(contents, mtz)
-    return 1 - copies * volume / asu_volume
-@functools.lru_cache(maxsize=None)
-def _library_weight(code: str) -> float:
-    return sum(atom.el.weight for atom in chemcomp(code).atoms)
-@functools.lru_cache(maxsize=None)
-def _library_volume(code: str) -> float:
-    return sum(18 for atom in chemcomp(code).atoms if not atom.is_hydrogen())
-def _polymer_weight(polymer: Polymer) -> float:
-    codes = polymer.residue_codes(modified=False)
-    total = sum(_library_weight(code) for code in codes)
-    total -= _library_weight("HOH") * (len(codes) - 1)
-    return total
-def _polymer_volume(polymer: Polymer) -> float:
-    density = 1.35 if polymer.type == PolymerType.PROTEIN else 2.0
-    return _polymer_weight(polymer) / (density * 0.602214)
-def _smiles_volume(smiles: str) -> float:
-    atoms = re.findall(pattern="[A-Z][a-z]?", string=smiles)
-    return 18 * len(atoms)
+import gemmi
-def _contents_volume(contents: AsuContents) -> float:
-    return sum(
-        item.volume * item.stoichiometry for item in _volume_components(contents)
-    )
+from .contents import AsuContents
+from .monlib import MonLib
-@dataclasses.dataclass
-class _VolumeComponent:
-    description: str
-    stoichiometry: int
-    stoichiometry_assumed: bool
-    volume: float
-def _volume_components(contents: AsuContents):
-    for kind, polymers in (
-        ("Protein", contents.proteins),
-        ("RNA", contents.rnas),
-        ("DNA", contents.dnas),
-    ):
-        for polymer in polymers:
-            sequence = polymer.sequence
-            description = f"{kind} with {len(sequence)} residues: "
-            if len(sequence) > 9:
-                description += f"{sequence[:3]}...{sequence[-3:]}"
-            else:
-                description += f"{sequence:9}"
-            stoichiometry = polymer.stoichiometry or 1
-            stoichiometry_assumed = polymer.stoichiometry is None
-            volume = _polymer_volume(polymer)
-            yield _VolumeComponent(
-                description, stoichiometry, stoichiometry_assumed, volume
-            )
-    for carb in contents.carbs:
-        description = "Carb:"
-        stoichiometry = carb.stoichiometry or 1
-        stoichiometry_assumed = carb.stoichiometry is None
-        volume = 0
-        length = 0
-        for code, count in carb.codes.items():
-            description += f" {count}x{code}"
-            length += count
-            if code in contents.smiles:
-                volume += _smiles_volume(contents.smiles[code]) * count
-            else:
-                volume += _library_volume(code) * count
-        volume -= _library_volume("HOH") * length
-        yield _VolumeComponent(
-            description, stoichiometry, stoichiometry_assumed, volume
-        )
-    for ligand in contents.ligands:
-        description = "Ligand: " + ligand.code
-        stoichiometry = ligand.stoichiometry or 1
-        stoichiometry_assumed = ligand.stoichiometry is None
-        if ligand.code in contents.smiles:
-            volume = _smiles_volume(contents.smiles[ligand.code])
-        else:
-            volume = _library_volume(ligand.code)
-        yield _VolumeComponent(
-            description, stoichiometry, stoichiometry_assumed, volume
-        )
+def solvent_fraction(
+    contents: AsuContents,
+    cell: gemmi.UnitCell,
+    spacegroup: gemmi.SpaceGroup,
+    resolution: float,
+    monlib: MonLib = None,
+) -> float:
+    monlib = monlib or MonLib(contents.monomer_codes(), include_standard=True)
+    asu_volume = cell.volume / len(spacegroup.operations())
+    copies = contents.copies
+    if copies is None:
+        copies = _guess_copies(contents, cell, spacegroup, resolution, monlib)
+    return 1 - copies * contents.volume(monlib) / asu_volume
 @dataclasses.dataclass
-class _CopiesOption:
+class CopiesOption:
     copies: int
     solvent: float
     probability: float
-def _copies_options(contents: AsuContents, mtz: gemmi.Mtz) -> list:
+def copies_options(
+    contents: AsuContents,
+    cell: gemmi.UnitCell,
+    spacegroup: gemmi.SpaceGroup,
+    resolution: float,
+    monlib: MonLib,
+) -> list:
     options = []
     nucleic_acids = contents.rnas + contents.dnas
-    mwp = sum(_polymer_weight(p) * (p.stoichiometry or 1) for p in contents.proteins)
-    mwn = sum(_polymer_weight(n) * (n.stoichiometry or 1) for n in nucleic_acids)
-    asu_volume = mtz.cell.volume / len(mtz.spacegroup.operations())
-    contents_volume = _contents_volume(contents)
-    resolution = mtz.resolution_high()
+    mwp = sum(p.weight(monlib) * (p.stoichiometry or 1) for p in contents.proteins)
+    mwn = sum(n.weight(monlib) * (n.stoichiometry or 1) for n in nucleic_acids)
+    asu_volume = cell.volume / len(spacegroup.operations())
+    contents_volume = contents.volume(monlib)
     total_probability = 0
     for copies in range(1, 60):
         solvent = 1 - copies * contents_volume / asu_volume
-        probability = _probability(mwp, mwn, copies, asu_volume, resolution)
+        probability = _matthews_probability(mwp, mwn, copies, asu_volume, resolution)
         if solvent < 0:
             break
-        options.append(_CopiesOption(copies, solvent, probability))
+        options.append(CopiesOption(copies, solvent, probability))
         total_probability += probability
     for option in options:
         option.probability /= total_probability
     return options
-def _guess_copies(contents: AsuContents, mtz: gemmi.Mtz) -> int:
-    options = _copies_options(contents, mtz)
+def _guess_copies(
+    contents: AsuContents,
+    cell: gemmi.UnitCell,
+    spacegroup: gemmi.SpaceGroup,
+    resolution: float,
+    monlib: MonLib,
+) -> int:
+    options = copies_options(contents, cell, spacegroup, resolution, monlib)
     if len(options) == 0:
         raise ValueError("Contents are too big to fit into the asymmetric unit")
     chosen = max(options, key=lambda option: option.probability)
@@ -165,15 +94,15 @@ _MATTHEWS_PROBABILITY_SETTINGS = [
 ]
-def _probability(
+def _matthews_probability(
     protein_mw: float,
     nucleic_mw: float,
     copies: int,
     asu_volume: float,
     resolution: float,
 ) -> float:
-    total_mw = protein_mw + nucleic_mw
-    matt = asu_volume / (total_mw * copies)
+    total_mw = (protein_mw + nucleic_mw) * copies
+    matthews = asu_volume / total_mw
     if protein_mw > 0.9 * total_mw:
         for index in range(12):
             if resolution < _MATTHEWS_PROBABILITY_SETTINGS[index].rbin:
@@ -183,5 +112,5 @@ def _probability(
     else:
         index = 14
     _, p0, vmbar, w, a, s = _MATTHEWS_PROBABILITY_SETTINGS[index]
-    z = (matt - vmbar) / w
+    z = (matthews - vmbar) / w
     return p0 + a * (math.exp(-math.exp(-z) - z * s + 1))

modelcraft/structure.py CHANGED Viewed

@@ -1,6 +1,8 @@
 from typing import Iterator
 import gemmi
-from .monlib import atom_ids, in_library, is_protein, is_nucleic
+from .monlib import MonLib
 def read_structure(path: str) -> gemmi.Structure:
@@ -10,25 +12,11 @@ def read_structure(path: str) -> gemmi.Structure:
     # TODO: Currently altconfs appear in CIF auth_atom_id after sheetbend
     # TODO: Keep alternative conformations after problem is fixed
     structure.remove_alternative_conformations()
+    _remove_point_mutations(structure)
     _patch_names(structure)
     return structure
-def consecutive_residues(chain: gemmi.Chain):
-    "Iterate through lists of residues with consecutive seqnums (first conformer only)"
-    consecutive = []
-    last_seqnum = None
-    for residue in chain.first_conformer():
-        if last_seqnum is None or residue.seqid.num == last_seqnum + 1:
-            consecutive.append(residue)
-        else:
-            yield consecutive
-            consecutive = [residue]
-        last_seqnum = residue.seqid.num
-    if len(consecutive) > 0:
-        yield consecutive
 def contains_residue(structure: gemmi.Structure, name: str) -> bool:
     return any(residue.name == name for residue in _residues(structure))
@@ -42,15 +30,6 @@ def remove_residues(structure: gemmi.Structure, names) -> None:
     structure.remove_empty_chains()
-def remove_non_library_atoms(structure: gemmi.Structure) -> None:
-    for residue in _residues(structure):
-        if in_library(residue.name):
-            for i, atom in reversed(list(enumerate(residue))):
-                if atom.name not in atom_ids(residue.name):
-                    del residue[i]
-    structure.remove_empty_chains()
 def remove_non_protein(structure: gemmi.Structure) -> None:
     for model in structure:
         for chain in model:
@@ -68,13 +47,15 @@ def write_mmcif(path: str, structure: gemmi.Structure) -> None:
 class ModelStats:
-    def __init__(self, structure: gemmi.Structure):
+    def __init__(self, structure: gemmi.Structure, monlib: MonLib = None):
         self.residues: int = 0
         self.protein: int = 0
         self.nucleic: int = 0
         self.waters: int = 0
         self.dummy_atoms: int = 0
+        monlib = monlib or MonLib(structure[0].get_all_residue_names())
         for residue in _residues(structure):
             if residue.name == "HOH":
                 self.waters += 1
@@ -82,24 +63,11 @@ class ModelStats:
                 self.dummy_atoms += 1
             else:
                 self.residues += 1
-                if is_protein(residue.name):
+                if monlib.is_protein(residue.name):
                     self.protein += 1
-                if is_nucleic(residue.name):
+                if monlib.is_nucleic(residue.name):
                     self.nucleic += 1
-    def __eq__(self, other):
-        if isinstance(other, ModelStats):
-            return (
-                self.residues == other.residues
-                and self.waters == other.waters
-                and self.dummy_atoms == other.dummy_atoms
-            )
-        return NotImplemented
-    def __ne__(self, other):
-        equal = self.__eq__(other)
-        return NotImplemented if equal is not NotImplemented else not equal
 def _residues(structure: gemmi.Structure) -> Iterator[gemmi.Residue]:
     for model in structure:
@@ -108,6 +76,19 @@ def _residues(structure: gemmi.Structure) -> Iterator[gemmi.Residue]:
                 yield residue
+def _remove_point_mutations(structure: gemmi.Structure) -> None:
+    for model in structure:
+        to_remove = []
+        for chain in model:
+            for group in chain.whole().residue_groups():
+                for i in range(1, len(group)):
+                    residue = group[i]
+                    key = (chain.name, str(residue.seqid), residue.name)
+                    to_remove.append(key)
+        for chain_name, residue_seqid, residue_name in to_remove:
+            del model[chain_name][residue_seqid][residue_name]
 def _patch_names(structure: gemmi.Structure) -> None:
     residue_patches = {"SUL": "SO4"}
     atom_patches = {("HOH", "O1"): "O"}
@@ -117,3 +98,45 @@ def _patch_names(structure: gemmi.Structure) -> None:
         for atom in residue:
             atom.name = atom.name.strip()
             atom.name = atom_patches.get((residue.name, atom.name), atom.name)
+def _are_connected(
+    residue1: gemmi.Residue, residue2: gemmi.Residue, monlib: MonLib
+) -> bool:
+    if (
+        monlib.is_protein(residue1.name)
+        and monlib.is_protein(residue2.name)
+        and "C" in residue1
+        and "N" in residue2
+    ):
+        for atom1 in residue1["C"]:
+            for atom2 in residue2["N"]:
+                if atom1.pos.dist(atom2.pos) < 2.5:
+                    return True
+    if (
+        monlib.is_nucleic(residue1.name)
+        and monlib.is_nucleic(residue2.name)
+        and "O3'" in residue1
+        and "P" in residue2
+    ):
+        for atom1 in residue1["O3'"]:
+            for atom2 in residue2["P"]:
+                if atom1.pos.dist(atom2.pos) < 2.5:
+                    return True
+    return False
+def remove_isolated_fragments(chain: gemmi.Chain, monlib: MonLib, max_length: int):
+    to_remove = []
+    fragment = []
+    for i, residue in enumerate(chain):
+        if i > 0 and _are_connected(chain[i - 1], residue, monlib):
+            fragment.append(i)
+        else:
+            if len(fragment) <= max_length:
+                to_remove.extend(fragment)
+            fragment = [i]
+    if len(fragment) <= max_length:
+        to_remove.extend(fragment)
+    for i in reversed(to_remove):
+        del chain[i]

modelcraft/tests/ccp4/__init__.py CHANGED Viewed

@@ -1,9 +1,11 @@
 import functools
 import os
 import shutil
-import uuid
 import urllib.request
+import uuid
 import gemmi
 from modelcraft.contents import AsuContents, Ligand, Polymer, PolymerType
 from modelcraft.jobs.refmac import Refmac
 from modelcraft.reflections import DataItem
@@ -16,7 +18,7 @@ def ccp4_path(*paths: str) -> str:
 def in_temp_directory(func):
     def wrapper():
-        tmp_dir = "tmp%s" % uuid.uuid4()
+        tmp_dir = f"tmp{uuid.uuid4()}"
         os.mkdir(tmp_dir)
         os.chdir(tmp_dir)
         try:
@@ -66,7 +68,6 @@ def insulin_refmac():
 @functools.lru_cache(maxsize=None)
 def insulin_contents():
-    contents = AsuContents()
     chain_a = Polymer(
         sequence="GIVEQCCASVCSLYQLENYCN",
         polymer_type=PolymerType.PROTEIN,
@@ -75,9 +76,8 @@ def insulin_contents():
         sequence="FVNQHLCGSHLVEALYLVCGERGFFYTPKA",
         polymer_type=PolymerType.PROTEIN,
     )
-    contents.add_polymer(chain_a)
-    contents.add_polymer(chain_b)
-    return contents
+    ligand = Ligand("GOL")
+    return AsuContents(proteins=[chain_a, chain_b], ligands=[ligand])
 @functools.lru_cache(maxsize=None)
@@ -92,8 +92,4 @@ def pdb1rxf_contents():
     )
     protein = Polymer(sequence=sequence, polymer_type=PolymerType.PROTEIN)
     ligand = Ligand(code="FE")
-    contents = AsuContents()
-    contents.proteins.append(protein)
-    contents.ligands.append(ligand)
-    contents.copies = 1
-    return contents
+    return AsuContents(copies=1, proteins=[protein], ligands=[ligand])

modelcraft/tests/ccp4/test_acedrg.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import math
 import os
 import gemmi
 from modelcraft.jobs.acedrg import Acedrg

modelcraft/tests/ccp4/test_arguments.py CHANGED Viewed

@@ -1,6 +1,9 @@
 import subprocess
 import pytest
 from modelcraft.arguments import parse
 from . import ccp4_path, in_temp_directory, pdbe_download

modelcraft/tests/ccp4/test_buccaneer.py CHANGED Viewed

@@ -1,11 +1,12 @@
 from modelcraft.jobs.buccaneer import Buccaneer, _known_structure_ids
 from modelcraft.structure import ModelStats, read_structure
 from . import (
     in_temp_directory,
-    insulin_fsigf,
+    insulin_contents,
     insulin_freer,
+    insulin_fsigf,
     insulin_refmac,
-    insulin_contents,
     pdbe_download,
 )

modelcraft/tests/ccp4/test_cell.py CHANGED Viewed

@@ -1,13 +1,16 @@
 import urllib.request
 import gemmi
 from modelcraft.cell import max_distortion, remove_scale, update_cell
 from modelcraft.structure import read_structure
 from . import in_temp_directory
 @in_temp_directory
 def test_1ana():
-    url = "https://files-versioned.wwpdb.org/pdb_versioned/data/entries/"
+    url = "https://ftp.ebi.ac.uk/pub/databases/pdb_versioned/data/entries/"
     url += "an/pdb_00001ana/pdb_00001ana_xyz_v1-2.cif.gz"
     urllib.request.urlretrieve(url, "1ana.cif.gz")
     structure = read_structure("1ana.cif.gz")

modelcraft/tests/ccp4/test_comit.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import gemmi
 from modelcraft.jobs.comit import Comit
 from modelcraft.reflections import DataItem
 from . import ccp4_path

modelcraft/tests/ccp4/test_contents.py CHANGED Viewed

@@ -1,8 +1,11 @@
-from modelcraft.scripts.contents import _entry_contents, _smiles
+from pytest import approx
+from modelcraft.contents import AsuContents, Polymer, PolymerType
+from modelcraft.monlib import MonLib
 def _test_contents(entry: str, expected_json: list, selenomet: bool):
-    contents = _entry_contents(entry)
+    contents = AsuContents.from_pdbe(entry)
     assert contents.to_json() == expected_json
     assert contents.is_selenomet() == selenomet
     return contents
@@ -13,7 +16,10 @@ def test_1o6a():
         "copies": 2,
         "proteins": [
             {
-                "sequence": "SETRKTEVPSDKLELLLDIPLKVTVELGRTRMTLKRVLEMIHGSIIELDKLTGEPVDILVNGKLIARGEVVVIDENFGVRITEIVSPKERLELLNE",
+                "sequence": (
+                    "SETRKTEVPSDKLELLLDIPLKVTVELGRTRMTLKRVLEMIHGSIIELDKLTGEPVDILV"
+                    "NGKLIARGEVVVIDENFGVRITEIVSPKERLELLNE"
+                ),
                 "stoichiometry": 1,
                 "modifications": ["M->MSE"],
             }
@@ -23,7 +29,6 @@ def test_1o6a():
         "carbs": [],
         "ligands": [],
         "buffers": [],
-        "smiles": {},
     }
     _test_contents("1o6a", expected, selenomet=True)
@@ -34,7 +39,11 @@ def test_4gxy():
         "proteins": [],
         "rnas": [
             {
-                "sequence": "GGCGGCAGGUGCUCCCGACCCUGCGGUCGGGAGUUAAAAGGGAAGCCGGUGCAAGUCCGGCACGGUCCCGCCACUGUGACGGGGAGUCGCCCCUCGGGAUGUGCCACUGGCCCGAAGGCCGGGAAGGCGGAGGGGCGGCGAGGAUCCGGAGUCAGGAAACCUGCCUGCCGUC",
+                "sequence": (
+                    "GGCGGCAGGUGCUCCCGACCCUGCGGUCGGGAGUUAAAAGGGAAGCCGGUGCAAGUCCGG"
+                    "CACGGUCCCGCCACUGUGACGGGGAGUCGCCCCUCGGGAUGUGCCACUGGCCCGAAGGCC"
+                    "GGGAAGGCGGAGGGGCGGCGAGGAUCCGGAGUCAGGAAACCUGCCUGCCGUC"
+                ),
                 "stoichiometry": 1,
                 "modifications": ["1->GTP", "172->CCC"],
             }
@@ -46,7 +55,6 @@ def test_4gxy():
             {"code": "IRI", "stoichiometry": 7},
         ],
         "buffers": ["MG"],
-        "smiles": {},
     }
     _test_contents("4gxy", expected, selenomet=False)
@@ -56,7 +64,24 @@ def test_6as7():
         "copies": 1,
         "proteins": [
             {
-                "sequence": "DEEQVFHFYWLDAYEDQYNQPGVVFLFGKVWIESAETHVSCCVMVKNIERTLYFLPREMKIDLNTGKETGTPISMKDVYEEFDEKIATKYKIMKFKSKPVEKNYAFEIPDVPEKSEYLEVKYSAEMPQLPQDLKGETFSHVFGTNTSSLELFLMNRKIKGPCWLEVKSPQLLNQPVSWCKAEAMALKPDLVNVIKDVSPPPLVVMAFSMKTMQNAKNHQNEIIAMAALVHHSFALDKAAPKPPFQSHFCVVSKPKDCIFPYAFKEVIEKKNVKVEVAATERTLLGFFLAKVHKIDPDIIVGHNIYGFELEVLLQRINVCKAPHWSKIGRLKRSNMPKLGGRSGFGERNATCGRMICDVEISAKELIRCKSYHLSELVQQILKTERVVIPMENIQNMYSESSQLLYLLEHTWKDAKFILQIMCELNVLPLALQITNIAGNIMSRTLMGGRSERNEFLLLHAFYENNYIVPDKQIFRKPQQKLGDEDEEIDGDTNKYKKGRKKAAYAGGLVLDPKVGFYDKFILLLDFNSLYPSIIQEFNICFTTVQRVASEAQKVTEDGEQEQIPELPDPSLEMGILPREIRKLVERRKQVKQLMKQQDLNPDLILQYDIRQKALKLTANSMYGCLGFSYSRFYAKPLAALVTYKGREILMHTKEMVQKMNLEVIYGDTDSIMINTNSTNLEEVFKLGNKVKSEVNKLYKLLEIDIDGVFKSLLLLKKKKYAALVVEPTSDGNYVTKQELKGLDIVRRDWCDLAKDTGNFVIGQILSDQSRDTIVENIQKRLIEIGENVLNGSVPVSQFEINKALTKDPQDYPDKKSLPHVHVALWINSQGGRKVKAGDTVSYVICQDGSNLTASQRAYAPEQLQKQDNLTIDTQYYLAQQIHPVVARICEPIDGIDAVLIATWLGLDPTQFRVHHYHKDEEN",
+                "sequence": (
+                    "DEEQVFHFYWLDAYEDQYNQPGVVFLFGKVWIESAETHVSCCVMVKNIERTLYFLPREMK"
+                    "IDLNTGKETGTPISMKDVYEEFDEKIATKYKIMKFKSKPVEKNYAFEIPDVPEKSEYLEV"
+                    "KYSAEMPQLPQDLKGETFSHVFGTNTSSLELFLMNRKIKGPCWLEVKSPQLLNQPVSWCK"
+                    "AEAMALKPDLVNVIKDVSPPPLVVMAFSMKTMQNAKNHQNEIIAMAALVHHSFALDKAAPK"
+                    "PPFQSHFCVVSKPKDCIFPYAFKEVIEKKNVKVEVAATERTLLGFFLAKVHKIDPDIIVGH"
+                    "NIYGFELEVLLQRINVCKAPHWSKIGRLKRSNMPKLGGRSGFGERNATCGRMICDVEISAK"
+                    "ELIRCKSYHLSELVQQILKTERVVIPMENIQNMYSESSQLLYLLEHTWKDAKFILQIMCEL"
+                    "NVLPLALQITNIAGNIMSRTLMGGRSERNEFLLLHAFYENNYIVPDKQIFRKPQQKLGDED"
+                    "EEIDGDTNKYKKGRKKAAYAGGLVLDPKVGFYDKFILLLDFNSLYPSIIQEFNICFTTVQR"
+                    "VASEAQKVTEDGEQEQIPELPDPSLEMGILPREIRKLVERRKQVKQLMKQQDLNPDLILQY"
+                    "DIRQKALKLTANSMYGCLGFSYSRFYAKPLAALVTYKGREILMHTKEMVQKMNLEVIYGDT"
+                    "DSIMINTNSTNLEEVFKLGNKVKSEVNKLYKLLEIDIDGVFKSLLLLKKKKYAALVVEPTS"
+                    "DGNYVTKQELKGLDIVRRDWCDLAKDTGNFVIGQILSDQSRDTIVENIQKRLIEIGENVLN"
+                    "GSVPVSQFEINKALTKDPQDYPDKKSLPHVHVALWINSQGGRKVKAGDTVSYVICQDGSNL"
+                    "TASQRAYAPEQLQKQDNLTIDTQYYLAQQIHPVVARICEPIDGIDAVLIATWLGLDPTQFR"
+                    "VHHYHKDEEN"
+                ),
                 "stoichiometry": 1,
                 "modifications": [],
             }
@@ -77,7 +102,6 @@ def test_6as7():
         "carbs": [],
         "ligands": [{"code": "DCP", "stoichiometry": 1}],
         "buffers": ["MG", "CO"],
-        "smiles": {},
     }
     _test_contents("6as7", expected, selenomet=False)
@@ -87,7 +111,17 @@ def test_4aqd():
         "copies": 1,
         "proteins": [
             {
-                "sequence": "RSEDDIIIATKNGKVRGMNLTVFGGTVTAFLGIPYAQPPLGRLRFKKPQSLTKWSDIWNATKYANSCCQNIDQSFPGFHGSEMWNPNTDLSEDCLYLNVWIPAPKPKNATVLIWIYGGGFQTGTSSLHVYDGKFLARVERVIVVSMNYRVGALGFLALPGNPEAPGNMGLFDQQLALQWVQKNIAAFGGNPKSVTLFGESAGAASVSLHLLSPGSHSLFTRAILQSGSFNAPWAVTSLYEARNRTLNLAKLTGCSRENETEIIKCLRNKDPQEILLNEAFVVPYGTPLSVNFGPTVDGDFLTDMPDILLELGQFKKTQILVGVNKDEGTAFLVYGAPGFSKDNNSIITRKEFQEGLKIFFPGVSEFGKESILFHYTDWVDDQRPENYREALGDVVGDYNFICPALEFTKKFSEWGNNAFFYYFEHRSSKLPWPEWMGVMHGYEIEFVFGLPLERRDNYTKAEEILSRSIVKRWANFAKYGNPNETQNNSTSWPVFKSTEQKYLTLNTESTRIMTKLRAQQCRFWTSFFPKV",
+                "sequence": (
+                    "RSEDDIIIATKNGKVRGMNLTVFGGTVTAFLGIPYAQPPLGRLRFKKPQSLTKWSDIWNA"
+                    "TKYANSCCQNIDQSFPGFHGSEMWNPNTDLSEDCLYLNVWIPAPKPKNATVLIWIYGGGF"
+                    "QTGTSSLHVYDGKFLARVERVIVVSMNYRVGALGFLALPGNPEAPGNMGLFDQQLALQWV"
+                    "QKNIAAFGGNPKSVTLFGESAGAASVSLHLLSPGSHSLFTRAILQSGSFNAPWAVTSLYE"
+                    "ARNRTLNLAKLTGCSRENETEIIKCLRNKDPQEILLNEAFVVPYGTPLSVNFGPTVDGDF"
+                    "LTDMPDILLELGQFKKTQILVGVNKDEGTAFLVYGAPGFSKDNNSIITRKEFQEGLKIFF"
+                    "PGVSEFGKESILFHYTDWVDDQRPENYREALGDVVGDYNFICPALEFTKKFSEWGNNAFF"
+                    "YYFEHRSSKLPWPEWMGVMHGYEIEFVFGLPLERRDNYTKAEEILSRSIVKRWANFAKYG"
+                    "NPNETQNNSTSWPVFKSTEQKYLTLNTESTRIMTKLRAQQCRFWTSFFPKV"
+                ),
                 "stoichiometry": 2,
                 "modifications": [],
             }
@@ -105,8 +139,7 @@ def test_4aqd():
             {"code": "PG4", "stoichiometry": 2},
             {"code": "PEG", "stoichiometry": 2},
         ],
-        "buffers": ["EDO", "CL", "GLY"],
-        "smiles": {},
+        "buffers": ["EDO", "UNX", "CL", "GLY"],
     }
     _test_contents("4aqd", expected, selenomet=False)
@@ -116,7 +149,13 @@ def test_1vjr():
         "copies": 1,
         "proteins": [
             {
-                "sequence": "MGSDKIHHHHHHVLDKIELFILDMDGTFYLDDSLLPGSLEFLETLKEKNKRFVFFTNNSSLGAQDYVRKLRNMGVDVPDDAVVTSGEITAEHMLKRFGRCRIFLLGTPQLKKVFEAYGHVIDEENPDFVVLGFDKTLTYERLKKACILLRKGKFYIATHPDINCPSKEGPVPDAGSIMAAIEASTGRKPDLIAGKPNPLVVDVISEKFGVPKERMAMVGDRLYTDVKLGKNAGIVSILVLTGETTPEDLERAETKPDFVFKNLGELAKAVQ",
+                "sequence": (
+                    "MGSDKIHHHHHHVLDKIELFILDMDGTFYLDDSLLPGSLEFLETLKEKNKRFVFFTNNSS"
+                    "LGAQDYVRKLRNMGVDVPDDAVVTSGEITAEHMLKRFGRCRIFLLGTPQLKKVFEAYGHV"
+                    "IDEENPDFVVLGFDKTLTYERLKKACILLRKGKFYIATHPDINCPSKEGPVPDAGSIMAA"
+                    "IEASTGRKPDLIAGKPNPLVVDVISEKFGVPKERMAMVGDRLYTDVKLGKNAGIVSILVL"
+                    "TGETTPEDLERAETKPDFVFKNLGELAKAVQ"
+                ),
                 "stoichiometry": 1,
                 "modifications": ["M->MSE"],
             }
@@ -126,7 +165,6 @@ def test_1vjr():
         "carbs": [],
         "ligands": [],
         "buffers": ["NI", "CL"],
-        "smiles": {},
     }
     _test_contents("1vjr", expected, selenomet=True)
@@ -157,7 +195,6 @@ def test_1cag():
         "carbs": [],
         "ligands": [],
         "buffers": ["ACY"],
-        "smiles": {},
     }
     contents = _test_contents("1cag", expected, selenomet=False)
     polymer = contents.proteins[0]
@@ -182,10 +219,55 @@ def test_1iha():
         "carbs": [],
         "ligands": [{"code": "RHD", "stoichiometry": 1}],
         "buffers": ["CL"],
-        "smiles": {},
     }
     _test_contents("1iha", expected, selenomet=False)
-def test_0pr_smiles():
-    assert _smiles("0PR") == "Cc1c(c(c(cn1)COP(=O)(O)O)CN[C@@H](Cc2ccc(cc2)O)C(=O)O)O"
+def test_3ue7():
+    expected = {
+        "copies": 1,
+        "proteins": [
+            {
+                "sequence": "TTCCPSIVARSNFNACRLPGTPEALCATYTGCIIIPGATCPGDYAN",
+                "stoichiometry": 1,
+                "modifications": [
+                    "T->DTH",
+                    "C->DCY",
+                    "P->DPR",
+                    "S->DSN",
+                    "I->DIL",
+                    "V->DVA",
+                    "A->DAL",
+                    "R->DAR",
+                    "N->DSG",
+                    "F->DPN",
+                    "L->DLE",
+                    "E->DGL",
+                    "Y->DTY",
+                    "D->DAS",
+                ],
+            },
+            {
+                "sequence": "TTCCPSIVAKSNFNACRLPGTPEALCATYTGCIIIPGATCPGDYAN",
+                "stoichiometry": 1,
+                "modifications": [],
+            },
+        ],
+        "rnas": [],
+        "dnas": [],
+        "carbs": [],
+        "ligands": [],
+        "buffers": [],
+    }
+    _test_contents("3ue7", expected, selenomet=False)
+def test_5vz8():
+    contents = AsuContents.from_pdbe("5vz8")
+    contents.monomer_codes()
+def test_polymer_weight():
+    polymer = Polymer("GG", polymer_type=PolymerType.PROTEIN)
+    monlib = MonLib(["GLY"], include_standard=True)
+    assert polymer.weight(monlib) == approx(132.12, abs=0.01)

modelcraft 5.0.2__py3-none-any.whl → 6.0.0__py3-none-any.whl

modelcraft 5.0.2py3-none-any.whl → 6.0.0py3-none-any.whl