PyPI - gemmi-protools - Versions diffs - 0.1.16__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

gemmi-protools 0.1.16py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of gemmi-protools might be problematic. Click here for more details.

Files changed (24) hide show

gemmi_protools/__init__.py +1 -4
gemmi_protools/io/convert.py +0 -3
gemmi_protools/io/reader.py +749 -310
gemmi_protools/{utils → tools}/align.py +38 -55
gemmi_protools/tools/dockq.py +127 -0
gemmi_protools/tools/mesh.py +95 -0
gemmi_protools/{utils → tools}/pdb_annot.py +21 -106
gemmi_protools-1.0.0.dist-info/METADATA +41 -0
gemmi_protools-1.0.0.dist-info/RECORD +19 -0
gemmi_protools/io/cif_opts.py +0 -173
gemmi_protools/io/parse_pdb_header.py +0 -387
gemmi_protools/io/parser.py +0 -292
gemmi_protools/io/pdb_opts.py +0 -179
gemmi_protools/io/peptide.py +0 -32
gemmi_protools/io/struct_info.py +0 -91
gemmi_protools/utils/dockq.py +0 -139
gemmi_protools/utils/fixer.py +0 -274
gemmi_protools/utils/ppi.py +0 -74
gemmi_protools-0.1.16.dist-info/METADATA +0 -29
gemmi_protools-0.1.16.dist-info/RECORD +0 -26
/gemmi_protools/{utils → tools}/__init__.py +0 -0
{gemmi_protools-0.1.16.dist-info → gemmi_protools-1.0.0.dist-info}/WHEEL +0 -0
{gemmi_protools-0.1.16.dist-info → gemmi_protools-1.0.0.dist-info}/licenses/LICENSE +0 -0
{gemmi_protools-0.1.16.dist-info → gemmi_protools-1.0.0.dist-info}/top_level.txt +0 -0

gemmi_protools/io/parser.py DELETED Viewed

@@ -1,292 +0,0 @@
-"""
-@Author: Luo Jiejian
-"""
-import pathlib
-from collections import Counter
-from typing import Union, Optional, Dict, List
-import gemmi
-from typeguard import typechecked
-from gemmi_protools.io.cif_opts import _cif_entity_info, _is_cif, _get_cif_resolution
-from gemmi_protools.io.pdb_opts import _pdb_entity_info, _is_pdb, _get_pdb_resolution
-from gemmi_protools.io.struct_info import Entity
-@typechecked
-def _ent_from_structure(struct: gemmi.Structure) -> Entity:
-    """
-    Run .setup_entities() in advance
-    :param struct:
-    :return:
-    """
-    block = struct.make_mmcif_block()
-    ent_info = _cif_entity_info(block)
-    for ent in struct.entities:
-        if ent.name not in ent_info["eid2desc"]:
-            ent_info["eid2desc"][ent.name] = ent.name
-    return ent_info
-@typechecked
-def cif_parser(path: Union[str, pathlib.Path]):
-    """
-    Parse .cif or .cif.gz
-    :param path:
-    :return: (gemmi.Structure, entity)
-    """
-    if _is_cif(path):
-        doc = gemmi.cif.read(str(path))
-        block0 = doc.sole_block()
-        struct = gemmi.read_structure(str(path))
-        struct.setup_entities()
-        # sheet_id like 1' will get some strange errors
-        # result in sheets with 0 strands
-        # delete sheets with 0 strands
-        # check here
-        zero_sheet_ind = []
-        for i, sheet in enumerate(struct.sheets):
-            if len(sheet.strands) == 0:
-                zero_sheet_ind.append(i)
-        if zero_sheet_ind:
-            zero_sheet_ind.sort(reverse=True)
-            for i in zero_sheet_ind:
-                del struct.sheets[i]
-        # gemmi fail to parse right resolution, update here
-        struct.resolution = _get_cif_resolution(block0)
-        # ent information
-        # from doc
-        ent_0 = _cif_entity_info(block0)
-        # init from struct
-        ent_1 = _ent_from_structure(struct)
-        # update ent_0 with ent_1
-        for super_key in ["eid2desc", "polymer2eid"]:
-            for key, val in ent_1[super_key].items():
-                if key not in ent_0[super_key]:
-                    ent_0[super_key][key] = val
-        return struct, ent_0
-    else:
-        raise ValueError("Only support .cif or .cif.gz file, but got %s" % path)
-@typechecked
-def _assign_digital_entity_names(structure: gemmi.Structure) -> Optional[Dict[str, str]]:
-    """
-    Run .setup_entities() in advance
-    :param structure:
-    :return:
-    """
-    # rename entities' names to numbers if not
-    not_digit_name = False
-    for ent in structure.entities:
-        if not ent.name.isdigit():
-            not_digit_name = True
-            break
-    if not_digit_name:
-        mapper = dict()
-        for ix, ent in enumerate(structure.entities):
-            new_name = str(ix + 1)
-            mapper[ent.name] = new_name
-            ent.name = new_name
-        return mapper
-    else:
-        return None
-@typechecked
-def _update_entity_names(entity: Entity, mapper: Dict[str, str]):
-    """
-    Update entity names to new ones in eid2desc, eid2specie, eid2taxid in place.
-    :param entity:
-    :param mapper: {old_entity_name: new_entity_name}
-    :return:
-    """
-    for super_key in ['eid2desc', 'eid2specie', 'eid2taxid']:
-        tmp = dict()
-        for key in entity[super_key]:
-            tmp[mapper[key]] = entity[super_key][key]
-        entity.__setattr__(super_key, tmp)
-    new_polymer2eid = dict()
-    for c, old_eid in entity.polymer2eid.items():
-        new_polymer2eid[c] = mapper[old_eid]
-    entity.__setattr__(name="polymer2eid", value=new_polymer2eid)
-def _melt_dict(inputs: dict):
-    outputs = dict()
-    for keys, val in inputs.items():
-        for k in keys.split(","):
-            outputs[k] = val
-    return outputs
-@typechecked
-def pdb_parser(path: Union[str, pathlib.Path]):
-    """
-    Parse .pdb or .pdb.gz
-    :param path:
-    :return: (gemmi.Structure, entity)
-    """
-    if _is_pdb(path):
-        struct = gemmi.read_structure(str(path))
-        struct.resolution = _get_pdb_resolution(struct.raw_remarks)
-        ent_0 = _pdb_entity_info(path)
-        ch2desc = _melt_dict(ent_0.eid2desc)
-        ch2specie = _melt_dict(ent_0.eid2specie)
-        ch2taxid = _melt_dict(ent_0.eid2taxid)
-        struct.setup_entities()
-        block = struct.make_mmcif_block()
-        ent_t = _cif_entity_info(block)
-        # set non-polymer entity names
-        non_polymer_entities = [e.name for e in struct.entities if e.polymer_type.name == "Unknown"]
-        for k in non_polymer_entities:
-            assert k in ent_t.eid2desc
-            if ent_t.eid2desc[k] == "?":
-                ent_t.eid2desc[k] = k
-        for k in ent_t.eid2desc.keys():
-            if k not in non_polymer_entities:
-                ent_t.eid2desc[k] = ch2desc.get(k, "?")
-        polymer_chs_used_as_eid = set(ch2specie.keys()).intersection(ent_t.eid2desc.keys())
-        for k in polymer_chs_used_as_eid:
-            ent_t.eid2specie[k] = ch2specie.get(k, "?")
-            ent_t.eid2taxid[k] = ch2taxid.get(k, "?")
-        m = _assign_digital_entity_names(struct)
-        _update_entity_names(ent_t, m)
-        return struct, ent_t
-    else:
-        raise ValueError("Only support .pdb or .pdb.gz file, but got %s" % path)
-@typechecked
-def _chain_type(structure: gemmi.Structure, chain_id: str) -> str:
-    out = None
-    values = {"PeptideL": "protein",
-              "Dna": "dna",
-              "Rna": "rna"}
-    for model in structure:
-        for cur_chain in model:
-            if cur_chain.name == chain_id:
-                sc_types = set()
-                for sc in cur_chain.subchains():
-                    t = sc.check_polymer_type().name
-                    if t != "Unknown":
-                        sc_types.update({t})
-                if len(sc_types) == 1:
-                    out = sc_types.pop()
-                else:
-                    out = "Unknown"
-    if out is None:
-        raise RuntimeError("chain_id %s not in structure" % chain_id)
-    else:
-        return values.get(out, "other")
-@typechecked
-def _get_model_chain_names(model: gemmi.Model) -> List[str]:
-    vals = []
-    for ch in model:
-        vals.append(ch.name)
-    return vals
-@typechecked
-def _assert_unique_chain_names_in_models(structure: gemmi.Structure):
-    for model in structure:
-        names = _get_model_chain_names(model)
-        nums = Counter(names)
-        dup_names = [k for k, v in nums.items() if v > 1]
-        if dup_names:
-            raise RuntimeError("Duplicate chain names in model %d: %s" % (model.num, ",".join(dup_names)))
-@typechecked
-def _chain_names2one_letter(structure: gemmi.Structure, only_uppercase: bool = True) -> Dict[str, str]:
-    """
-    Automatically generate one letter mapper when the length of chain name > 1 or chain name is not uppercase letters
-    (1) when only_uppercase is True, only supported when the number of chains of the one-model structure <= 26
-    (2) when only_uppercase is False, only supported when the number of chains of the one-model structure <= 62
-    If there are too many chains, make some splits or assemblies first,
-    or just keep the longer chain names in .cif format.
-    PDB only support the single letter chain name.
-    """
-    if len(structure) > 1:
-        raise RuntimeError("> 1 models in structure, do nothing")
-    _assert_unique_chain_names_in_models(structure)
-    n_chains = len(structure[0])
-    if only_uppercase:
-        l1 = ['Z', 'Y', 'X', 'W', 'V', 'U', 'T', 'S', 'R', 'Q', 'P', 'O', 'N', 'M',
-              'L', 'K', 'J', 'I', 'H', 'G', 'F', 'E', 'D', 'C', 'B', 'A']
-        mode = "UPPERCASE"
-    else:
-        l1 = ['9', '8', '7', '6', '5', '4', '3', '2', '1', '0',
-              'z', 'y', 'x', 'w', 'v', 'u', 't', 's', 'r', 'q',
-              'p', 'o', 'n', 'm', 'l', 'k', 'j', 'i', 'h', 'g',
-              'f', 'e', 'd', 'c', 'b', 'a', 'Z', 'Y', 'X', 'W',
-              'V', 'U', 'T', 'S', 'R', 'Q', 'P', 'O', 'N', 'M',
-              'L', 'K', 'J', 'I', 'H', 'G', 'F', 'E', 'D', 'C', 'B', 'A']
-        mode = "UPPERCASE + LOWERCASE + DIGITAL"
-    if n_chains > len(l1):
-        raise RuntimeError("Support max %d chains under %s mode, but got %d chains in structure"
-                           % (len(l1), mode, n_chains))
-    existed_one_letter_ids = []
-    for model in structure:
-        for chain in model:
-            if chain.name in l1 and chain.name not in existed_one_letter_ids:
-                existed_one_letter_ids.append(chain.name)
-    left_l1 = [i for i in l1 if i not in existed_one_letter_ids]
-    name_mapper = dict()
-    for model in structure:
-        for chain in model:
-            if chain.name not in l1:
-                new_name = left_l1.pop()
-                name_mapper[chain.name] = new_name
-    return name_mapper
-@typechecked
-def get_assembly(structure: gemmi.Structure, assembly_name: str,
-                 how: gemmi.HowToNameCopiedChain = gemmi.HowToNameCopiedChain.AddNumber):
-    struct = structure.clone()
-    struct.transform_to_assembly(assembly_name, how)
-    # update ENTITY.polymer2eid
-    scn2eid = dict()
-    for ent in struct.entities:
-        for scn in ent.subchains:
-            scn2eid[scn] = ent.name
-    polymer2eid = dict()
-    for model in struct:
-        for chain in model:
-            for sc in chain.subchains():
-                sc_t = sc.check_polymer_type().name
-                if sc_t in ["PeptideL", "Dna", "Rna"]:
-                    polymer2eid[chain.name] = scn2eid[sc.subchain_id()]
-                    break
-    return struct, polymer2eid

gemmi_protools/io/pdb_opts.py DELETED Viewed

@@ -1,179 +0,0 @@
-"""
-@Author: Luo Jiejian
-"""
-import gzip
-import io
-import pathlib
-import re
-from collections import defaultdict
-from typing import Dict, Union, List
-from typeguard import typechecked
-from gemmi_protools.io.parse_pdb_header import _parse_pdb_header_list
-from gemmi_protools.io.struct_info import Entity
-@typechecked
-def _molecule_information(header_dict: Dict) -> Entity:
-    entity2description = dict()
-    entity2species = dict()
-    entity2taxid = dict()
-    for idx in header_dict["compound"].keys():
-        compound = header_dict["compound"][idx]
-        if "chain" in compound:
-            chain = re.sub(pattern=r"\s+", repl="", string=compound["chain"])
-            if chain != "":
-                tmp = chain.split(",")
-                tmp.sort()
-                key = ",".join(tmp)
-                molecule = compound.get("molecule", "")
-                if idx in header_dict["source"]:
-                    source = header_dict["source"][idx]
-                    specie = source.get("organism_scientific", "")
-                    taxid = source.get("organism_taxid", "")
-                else:
-                    specie = ""
-                    taxid = ""
-                entity2description[key] = molecule
-                entity2species[key] = specie
-                entity2taxid[key] = taxid
-    vals = dict(eid2desc=entity2description,
-                eid2specie=entity2species,
-                eid2taxid=entity2taxid,
-                polymer2eid=dict()
-                )
-    return Entity(**vals)
-@typechecked
-def _is_pdb(path: Union[str, pathlib.Path]) -> bool:
-    if isinstance(path, str):
-        path = pathlib.Path(path)
-    if path.suffixes:
-        if path.suffixes[-1] == ".pdb":
-            return True
-        elif "".join(path.suffixes[-2:]) == ".pdb.gz":
-            return True
-        else:
-            return False
-    else:
-        return False
-# add by Ljj
-@typechecked
-def _pdb_entity_info(path: Union[str, pathlib.Path]) -> Entity:
-    if _is_pdb(path):
-        cur_path = pathlib.Path(path)
-        if cur_path.suffixes[-1] == ".pdb":
-            with open(path, "r") as text_io:
-                lines = text_io.readlines()
-        else:
-            with gzip.open(path, "rb") as gz_handle:
-                with io.TextIOWrapper(gz_handle, encoding="utf-8") as text_io:
-                    lines = text_io.readlines()
-    else:
-        raise ValueError("Only support .pdb or .pdb.gz file, but got %s" % path)
-    i = 0
-    for i in range(len(lines)):
-        line = lines[i]
-        record_type = line[0:6]
-        if record_type in ("ATOM  ", "HETATM", "MODEL "):
-            break
-    header = lines[0:i]
-    info = _parse_pdb_header_list(header)
-    return _molecule_information(info)
-@typechecked
-def _get_pdb_resolution(remark_lines: List[str]) -> float:
-    resolutions = []
-    for line in remark_lines:
-        tmp = re.search(r"REMARK.+RESOLUTION.+?([\d\.]+|NOT APPLICABLE)", line)
-        if tmp:
-            v = tmp.groups()[0]
-            try:
-                vf = float(v)
-            except (TypeError, ValueError):
-                continue
-            else:
-                resolutions.append(vf)
-    if resolutions:
-        return min(resolutions)
-    else:
-        return 0.0
-@typechecked
-def _compound_source_string(entity: Entity) -> List[str]:
-    entity2polymer = defaultdict(list)
-    for k, v in entity["polymer2eid"].items():
-        entity2polymer[v].append(k)
-    entity_labels = list(entity2polymer.keys())
-    entity_labels.sort()
-    for v in entity2polymer.values():
-        v.sort()
-    values = []
-    for i, el in enumerate(entity_labels):
-        values.append(dict(mol_id=str(i + 1),
-                           chain=", ".join(entity2polymer[el]),
-                           molecule=entity["eid2desc"].get(el, "?"),
-                           organism_scientific=entity["eid2specie"].get(el, "?"),
-                           organism_taxid=entity["eid2taxid"].get(el, "?")
-                           )
-                      )
-    outputs = []
-    # compound
-    compound_mol0 = "COMPND    MOL_ID: {mol_id};"
-    compound_mol1 = "COMPND {n_line:>3} MOL_ID: {mol_id};"
-    compound_molecule = "COMPND {n_line:>3} MOLECULE: {molecule};"
-    compound_chain = "COMPND {n_line:>3} CHAIN: {chain};"
-    i = 1
-    for val in values:
-        if i == 1:
-            outputs.append(compound_mol0.format(**val))
-            i += 1
-            for c_str in [compound_molecule, compound_chain]:
-                cur_val = val.copy()
-                cur_val["n_line"] = i
-                outputs.append(c_str.format(**cur_val))
-                i += 1
-        else:
-            for c_str in [compound_mol1, compound_molecule, compound_chain]:
-                cur_val = val.copy()
-                cur_val["n_line"] = i
-                outputs.append(c_str.format(**cur_val))
-                i += 1
-    source_mol0 = "SOURCE    MOL_ID: {mol_id};"
-    source_mol1 = "SOURCE {n_line:>3} MOL_ID: {mol_id};"
-    source_scientific = "SOURCE {n_line:>3} ORGANISM_SCIENTIFIC: {organism_scientific};"
-    source_taxid = "SOURCE {n_line:>3} ORGANISM_TAXID: {organism_taxid};"
-    i = 0
-    for val in values:
-        if i == 0:
-            outputs.append(source_mol0.format(**val))
-            i += 1
-            for c_str in [source_scientific, source_taxid]:
-                cur_val = val.copy()
-                cur_val["n_line"] = i
-                outputs.append(c_str.format(**cur_val))
-                i += 1
-        else:
-            for c_str in [source_mol1, source_scientific, source_taxid]:
-                cur_val = val.copy()
-                cur_val["n_line"] = i
-                outputs.append(c_str.format(**cur_val))
-                i += 1
-    return outputs

gemmi_protools/io/peptide.py DELETED Viewed

@@ -1,32 +0,0 @@
-"""
-@Author: Luo Jiejian
-"""
-from copy import deepcopy
-from Bio.PDB.Polypeptide import nucleic_letters_3to1_extended, protein_letters_3to1_extended
-def strip_key_val(inputs):
-    outputs = dict()
-    for key, val in inputs.items():
-        outputs[key.strip()] = val.strip()
-    return outputs
-def __nucleic_3to1_mapper():
-    mapper = deepcopy(nucleic_letters_3to1_extended)
-    mapper["DN"] = "N"
-    mapper["N"] = "N"
-    new_mapper = strip_key_val(mapper)
-    return new_mapper
-def __protein_3to1_mapper():
-    mapper = deepcopy(protein_letters_3to1_extended)
-    mapper["UNK"] = "X"
-    new_mapper = strip_key_val(mapper)
-    return new_mapper
-nucleic_3to1_mapper = __nucleic_3to1_mapper()
-protein_3to1_mapper = __protein_3to1_mapper()

gemmi_protools/io/struct_info.py DELETED Viewed

@@ -1,91 +0,0 @@
-"""
-@Author: Luo Jiejian
-"""
-from dataclasses import dataclass, field
-from datetime import datetime
-from typing import Dict, Optional
-import gemmi
-from typeguard import typechecked
-@typechecked
-@dataclass
-class Entity:
-    eid2desc: Dict[str, str] = field(default_factory=dict)
-    eid2specie: Dict[str, str] = field(default_factory=dict)
-    eid2taxid: Dict[str, str] = field(default_factory=dict)
-    polymer2eid: Dict[str, str] = field(default_factory=dict)
-    @typechecked
-    def __setattr__(self, name: str, value: Dict[str, str]):
-        super().__setattr__(name, value)
-    @typechecked
-    def update(self, inputs: Dict[str, Dict[str, str]]):
-        for key, value in inputs.items():
-            if hasattr(self, key):
-                self.__setattr__(key, value)
-    def get(self, name: str, default: Optional[str] = None):
-        if hasattr(self, name):
-            return self.__getitem__(name)
-        else:
-            return default
-    def __getitem__(self, name: str):
-        return getattr(self, name)
-    def keys(self):
-        return list(self.__dict__.keys())
-@typechecked
-@dataclass
-class Info:
-    cell_Z: str = ""
-    pdb_id: str = ""
-    exp_method: str = ""
-    deposition_date: str = "1909-01-08"
-    title: str = ""
-    keywords: str = ""
-    keywords_text: str = ""
-    @property
-    def __attributes_mapper(self):
-        return {'cell_Z': '_cell.Z_PDB',
-                'pdb_id': '_entry.id',
-                'exp_method': '_exptl.method',
-                'deposition_date': '_pdbx_database_status.recvd_initial_deposition_date',
-                'title': '_struct.title',
-                'keywords': '_struct_keywords.pdbx_keywords',
-                'keywords_text': '_struct_keywords.text'}
-    def to_gemmi_structure_infomap(self) -> gemmi.InfoMap:
-        outputs = dict()
-        for name, target_name in self.__attributes_mapper.items():
-            value = self.__getattribute__(name)
-            if isinstance(value, str):
-                v = str(value)
-                if len(v) > 1:
-                    outputs[target_name] = v
-        return gemmi.InfoMap(outputs)
-    @typechecked
-    def from_gemmi_structure_infomap(self, infomap: gemmi.InfoMap):
-        mapper_iv = {v: k for k, v in self.__attributes_mapper.items()}
-        for key, val in infomap.items():
-            if key in mapper_iv:
-                name = mapper_iv[key]
-                self.__setattr__(name, val)
-    @typechecked
-    def __setattr__(self, name: str, value: str):
-        if name == "deposition_date":
-            try:
-                datetime.strptime(value, "%Y-%m-%d")
-            except ValueError as e:
-                raise ValueError(f"{e}")
-        if hasattr(self, name):
-            super().__setattr__(name, value)

gemmi-protools 0.1.16__py3-none-any.whl → 1.0.0__py3-none-any.whl

Potentially problematic release.

gemmi-protools 0.1.16py3-none-any.whl → 1.0.0py3-none-any.whl