PyPI - gemmi-protools - Versions diffs - 0.1.17__py3-none-any.whl → 1.0.1__py3-none-any.whl - Mend

gemmi-protools 0.1.17py3-none-any.whl → 1.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of gemmi-protools might be problematic. Click here for more details.

Files changed (24) hide show

gemmi_protools/__init__.py +1 -4
gemmi_protools/io/convert.py +0 -3
gemmi_protools/io/reader.py +752 -309
gemmi_protools/{utils → tools}/align.py +38 -54
gemmi_protools/tools/dockq.py +128 -0
gemmi_protools/tools/mesh.py +197 -0
gemmi_protools/{utils → tools}/pdb_annot.py +21 -105
{gemmi_protools-0.1.17.dist-info → gemmi_protools-1.0.1.dist-info}/METADATA +20 -12
gemmi_protools-1.0.1.dist-info/RECORD +19 -0
gemmi_protools/io/cif_opts.py +0 -173
gemmi_protools/io/parse_pdb_header.py +0 -387
gemmi_protools/io/parser.py +0 -292
gemmi_protools/io/pdb_opts.py +0 -179
gemmi_protools/io/peptide.py +0 -32
gemmi_protools/io/struct_info.py +0 -91
gemmi_protools/utils/dockq.py +0 -139
gemmi_protools/utils/fixer.py +0 -274
gemmi_protools/utils/immune_complex.py +0 -787
gemmi_protools/utils/ppi.py +0 -74
gemmi_protools-0.1.17.dist-info/RECORD +0 -27
/gemmi_protools/{utils → tools}/__init__.py +0 -0
{gemmi_protools-0.1.17.dist-info → gemmi_protools-1.0.1.dist-info}/WHEEL +0 -0
{gemmi_protools-0.1.17.dist-info → gemmi_protools-1.0.1.dist-info}/licenses/LICENSE +0 -0
{gemmi_protools-0.1.17.dist-info → gemmi_protools-1.0.1.dist-info}/top_level.txt +0 -0

gemmi_protools/{utils → tools}/align.py RENAMED Viewed

@@ -2,33 +2,26 @@
 @Author: Luo Jiejian
 """
 import os
-import pathlib
 import re
 import shutil
 import subprocess
 import tempfile
-import uuid
-from copy import deepcopy
-from typing import Union, Dict, Any, List, Optional
+from typing import Dict, Any, List, Optional
 import numpy as np
 from Bio.PDB import Superimposer
-from typeguard import typechecked
 from gemmi_protools.io.convert import gemmi2bio, bio2gemmi
 from gemmi_protools.io.reader import StructureParser
 class StructureAligner(object):
-    @typechecked
-    def __init__(self, query_path: Union[str, pathlib.Path], ref_path: Union[str, pathlib.Path]):
+    def __init__(self, query_path: str, ref_path: str):
         self._query_st = StructureParser()
         self._query_st.load_from_file(query_path)
-        self._query_st.set_default_model()
         self._ref_st = StructureParser()
         self._ref_st.load_from_file(ref_path)
-        self._ref_st.set_default_model()
         self.values = dict()
         self.rot_mat = None
@@ -49,8 +42,7 @@ class StructureAligner(object):
             return _path
     @staticmethod
-    @typechecked
-    def __parser_rotation_matrix(matrix_file: Union[str, pathlib.Path]):
+    def __parser_rotation_matrix(matrix_file: str):
         rotation_matrix = []
         translation_vector = []
@@ -66,7 +58,6 @@ class StructureAligner(object):
                     T=np.array(translation_vector).astype(np.float32))
     @staticmethod
-    @typechecked
     def __parse_terminal_outputs(output_string: str) -> Dict[str, Any]:
         lines = re.split(pattern=r"\n", string=output_string)
         # chain mapping
@@ -108,7 +99,6 @@ class StructureAligner(object):
                         del patterns[key]
         return values
-    @typechecked
     def make_alignment(self, query_chains: Optional[List[str]] = None,
                        ref_chains: Optional[List[str]] = None, timeout=300.0):
         """
@@ -122,56 +112,50 @@ class StructureAligner(object):
         program_path = self.__mmalign_path
         # clone
-        q_st = deepcopy(self._query_st)
-        r_st = deepcopy(self._ref_st)
-        tmp_dir = os.path.join(tempfile.gettempdir(), str(uuid.uuid4()))
-        os.makedirs(tmp_dir)
         if isinstance(query_chains, list):
-            q_st.pick_chains(query_chains)
+            q_st = self._query_st.pick_chains(query_chains)
+        else:
+            q_st = self._query_st
         if isinstance(ref_chains, list):
-            r_st.pick_chains(ref_chains)
+            r_st = self._ref_st.pick_chains(query_chains)
+        else:
+            r_st = self._ref_st
-        q_ch_mapper = q_st.make_chain_names_to_one_letter()
-        r_ch_mapper = r_st.make_chain_names_to_one_letter()
+        q_ch_mapper = q_st.make_one_letter_chain()
+        r_ch_mapper = r_st.make_one_letter_chain()
         q_ch_mapper_r = {v: k for k, v in q_ch_mapper.items()}
         r_ch_mapper_r = {v: k for k, v in r_ch_mapper.items()}
-        _tmp_a = os.path.join(tmp_dir, "a.pdb")
-        q_st.to_pdb(_tmp_a)
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            _tmp_a = os.path.join(tmp_dir, "a.pdb")
+            q_st.to_pdb(_tmp_a)
+            _tmp_b = os.path.join(tmp_dir, "b.pdb")
+            r_st.to_pdb(_tmp_b)
+            matrix_file = os.path.join(tmp_dir, "m.txt")
+            _command = "%s %s %s -m %s" % (program_path, _tmp_a, _tmp_b, matrix_file)
+            try:
+                result = subprocess.run(_command, shell=True, check=True,
+                                        stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+                                        timeout=timeout)
+            except Exception as e:
+                print("%s: between files %s and %s; between chains: %s and %s" % (
+                    str(e), self.query_path, self.ref_path,
+                    str(q_st.chain_ids), str(r_st.chain_ids))
+                      )
+            else:
+                self.values = self.__parse_terminal_outputs(result.stdout.decode())
+                self.rot_mat = self.__parser_rotation_matrix(matrix_file)
+                self.is_aligned = True
+                self.by_query = q_st.chain_ids if query_chains is None else query_chains
+                self.by_ref = r_st.chain_ids if ref_chains is None else ref_chains
+                self.values["query_chain_ids"] = [q_ch_mapper_r.get(ch, ch) for ch in self.values["query_chain_ids"]]
+                self.values["ref_chain_ids"] = [r_ch_mapper_r.get(ch, ch) for ch in self.values["ref_chain_ids"]]
-        _tmp_b = os.path.join(tmp_dir, "b.pdb")
-        r_st.to_pdb(_tmp_b)
-        matrix_file = os.path.join(tmp_dir, "m.txt")
-        _command = "%s %s %s -m %s" % (program_path, _tmp_a, _tmp_b, matrix_file)
-        try:
-            result = subprocess.run(_command, shell=True, check=True,
-                                    stdout=subprocess.PIPE, stderr=subprocess.PIPE,
-                                    timeout=timeout)
-        except Exception as e:
-            print("%s: between files %s and %s; between chains: %s and %s" % (
-                str(e), self.query_path, self.ref_path,
-                str(q_st.chain_ids), str(r_st.chain_ids))
-                  )
-        else:
-            self.values = self.__parse_terminal_outputs(result.stdout.decode())
-            self.rot_mat = self.__parser_rotation_matrix(matrix_file)
-            self.is_aligned = True
-            self.by_query = q_st.chain_ids if query_chains is None else query_chains
-            self.by_ref = r_st.chain_ids if ref_chains is None else ref_chains
-            self.values["query_chain_ids"] = [q_ch_mapper_r.get(ch, ch) for ch in self.values["query_chain_ids"]]
-            self.values["ref_chain_ids"] = [r_ch_mapper_r.get(ch, ch) for ch in self.values["ref_chain_ids"]]
-        finally:
-            if os.path.isdir(tmp_dir):
-                shutil.rmtree(tmp_dir)
-    @typechecked
     def save_aligned_query(self, out_file: str):
         """

gemmi_protools/tools/dockq.py ADDED Viewed

@@ -0,0 +1,128 @@
+"""
+@Author: Luo Jiejian
+"""
+import json
+import os
+import shutil
+import subprocess
+import tempfile
+from copy import deepcopy
+from typing import List, Tuple
+import gemmi
+import pandas as pd
+from gemmi_protools.io.reader import StructureParser
+def dockq_score_interface(query_model: str,
+                          native_model: str,
+                          partner_1_mapping: List[Tuple[str, str]],
+                          partner_2_mapping: List[Tuple[str, str]],
+                          ):
+    """
+    Calculate Dockq Score for an interface (partner 1 vs partner 2)
+    :param query_model: str
+        path of query model, support .pdb, .pdb.gz, .cif, .cif.gz
+    :param native_model:
+    :param partner_1_mapping: a list of chain ID mapping between query and native for partner1 of the interface
+        e.g. [(q chain1, n chain1), (q chain2, n chain2)]
+    :param partner_2_mapping:
+    :return:
+    """
+    dockq_program = shutil.which("DockQ")
+    if dockq_program is None:
+        raise RuntimeError("DockQ is need")
+    assert len(partner_1_mapping) > 0, "partner_1_mapping must be a list of chain ID tuples, can't be empty"
+    assert len(partner_2_mapping) > 0, "partner_2_mapping must be a list of chain ID tuples, can't be empty"
+    def load_struct(path: str, partner_1: List[str], partner_2: List[str]):
+        st = StructureParser()
+        st.load_from_file(path)
+        st.clean_structure()
+        for ch in partner_1 + partner_2:
+            if ch not in st.chain_ids:
+                raise ValueError("Chain %s not found for %s (only [%s])" % (ch, path, " ".join(st.chain_ids)))
+        # merge chains in each each partner into on chain
+        # partner_1 with chain ID A
+        # partner_2 with chain ID B
+        chain_a = gemmi.Chain("A")
+        idx_a = 1
+        for ch in partner_1:
+            for res in st.get_chain(ch):
+                nr = deepcopy(res)
+                nr.seqid.icode = " "
+                nr.seqid.num = idx_a
+                chain_a.add_residue(nr)
+                idx_a += 1
+        chain_b = gemmi.Chain("B")
+        idx_b = 1
+        for ch in partner_2:
+            for res in st.get_chain(ch):
+                nr = deepcopy(res)
+                nr.seqid.icode = " "
+                nr.seqid.num = idx_b
+                chain_b.add_residue(nr)
+                idx_b += 1
+        model = gemmi.Model(1)
+        model.add_chain(chain_a)
+        model.add_chain(chain_b)
+        struct = gemmi.Structure()
+        struct.add_model(model)
+        output = StructureParser(struct)
+        return output
+    partner_1_query, partner_1_native = list(zip(*partner_1_mapping))
+    partner_2_query, partner_2_native = list(zip(*partner_2_mapping))
+    q_st = load_struct(query_model, list(partner_1_query), list(partner_2_query))
+    n_st = load_struct(native_model, list(partner_1_native), list(partner_2_native))
+    with tempfile.TemporaryDirectory() as tmp_dir:
+        result_file = os.path.join(tmp_dir, "result.json")
+        q_file = os.path.join(tmp_dir, "q.pdb")
+        n_file = os.path.join(tmp_dir, "n.pdb")
+        q_st.to_pdb(q_file, write_minimal_pdb=True)
+        n_st.to_pdb(n_file, write_minimal_pdb=True)
+        mapping = "AB:AB"
+        _command = "%s --mapping %s --json %s %s %s" % (dockq_program, mapping, result_file, q_file, n_file)
+        metrics = ['DockQ', 'F1', 'chain1', 'chain2']
+        try:
+            _ = subprocess.run(_command, shell=True, check=True,
+                               stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+                               timeout=300.0)
+        except subprocess.CalledProcessError as e:
+            # Handle errors in the called executable
+            msg = e.stderr.decode()
+            outputs = pd.DataFrame(columns=metrics)
+        except Exception as e:
+            # Handle other exceptions such as file not found or permissions issues
+            msg = str(e)
+            outputs = pd.DataFrame(columns=metrics)
+        else:
+            with open(result_file, "r") as fin:
+                vals = json.load(fin)
+            msg = "Finished"
+            result = []
+            for v in vals["best_result"].values():
+                result.append(v)
+            outputs = pd.DataFrame(result)[metrics]
+        if len(outputs) > 0:
+            score = "%.4f" % outputs.iloc[0]["DockQ"]
+        else:
+            score = ""
+        return dict(score=score, status=msg)

gemmi_protools/tools/mesh.py ADDED Viewed

@@ -0,0 +1,197 @@
+"""
+@Author: Luo Jiejian
+"""
+import os
+import subprocess
+import tempfile
+from collections import defaultdict
+from typing import List, Optional, Union
+import freesasa
+import numpy as np
+import trimesh
+from Bio.PDB import Selection
+from Bio.PDB.ResidueDepth import _get_atom_radius, _read_vertex_array
+from gemmi_protools import StructureParser
+from gemmi_protools import gemmi2bio
+def _read_face_array(filename: str):
+    with open(filename) as fp:
+        face_list = []
+        for line in fp:
+            sl = line.split()
+            if len(sl) != 5:
+                # skip header
+                continue
+            vl = [int(x) for x in sl[0:3]]
+            face_list.append(vl)
+    return np.array(face_list)
+def get_mesh(struct_file: str, chains: Optional[List[str]] = None, MSMS: str = "msms"):
+    """
+    :param struct_file: str
+        .pdb, .cif, .pdb.gz, .cif.gz
+    :param chains: a list of chain names
+        default None to include all chains
+    :param MSMS: str
+        path of msms executable
+    :return:
+        https://ccsb.scripps.edu/msms/downloads/
+    """
+    xyz_tmp = tempfile.NamedTemporaryFile(delete=False).name
+    surface_tmp = tempfile.NamedTemporaryFile(delete=False).name
+    msms_tmp = tempfile.NamedTemporaryFile(delete=False).name
+    face_file = surface_tmp + ".face"
+    surface_file = surface_tmp + ".vert"
+    try:
+        st = StructureParser()
+        st.load_from_file(struct_file)
+        st.clean_structure(remove_ligand=True)
+        if chains is None:
+            st_p = st
+        else:
+            for ch in chains:
+                if ch not in st.chain_ids:
+                    raise ValueError("Chain %s not found (only [%s])" % (ch, " ".join(st.chain_ids)))
+            st_p = st.pick_chains(chains)
+        bio_st = gemmi2bio(st_p.STRUCT)
+        model = bio_st[0]
+        # Replace pdb_to_xyzr
+        # Make x,y,z,radius file
+        atom_list = Selection.unfold_entities(model, "A")
+        with open(xyz_tmp, "w") as pdb_to_xyzr:
+            for atom in atom_list:
+                x, y, z = atom.coord
+                radius = _get_atom_radius(atom, rtype="united")
+                pdb_to_xyzr.write(f"{x:6.3f}\t{y:6.3f}\t{z:6.3f}\t{radius:1.2f}\n")
+        # Make surface
+        MSMS = MSMS + " -no_header -probe_radius 1.5 -if %s -of %s > " + msms_tmp
+        make_surface = MSMS % (xyz_tmp, surface_tmp)
+        subprocess.call(make_surface, shell=True)
+        if not os.path.isfile(surface_file):
+            raise RuntimeError(
+                f"Failed to generate surface file using command:\n{make_surface}"
+            )
+    except Exception as e:
+        print(str(e))
+        mesh = None
+    else:
+        # Read surface vertices from vertex file
+        vertices = _read_vertex_array(surface_file)
+        faces = _read_face_array(face_file)
+        mesh = trimesh.Trimesh(vertices=vertices, faces=faces - 1)
+        mesh.merge_vertices()
+        mesh.update_faces(mesh.unique_faces())
+        mesh.update_faces(mesh.nondegenerate_faces())
+        mesh.remove_unreferenced_vertices()
+    # Remove temporary files
+    for fn in [xyz_tmp, surface_tmp, msms_tmp, face_file, surface_file]:
+        try:
+            os.remove(fn)
+        except OSError:
+            pass
+    return mesh
+def get_surface_residues(struct_file: str,
+                         chains: Optional[List[str]] = None,
+                         relative_sasa_cutoff: Union[int, float] = 0.15):
+    ####################
+    # check and pick
+    ####################
+    st = StructureParser()
+    st.load_from_file(struct_file)
+    st.clean_structure()
+    if chains is None:
+        chains = st.chain_ids
+    if isinstance(chains, list):
+        if len(chains) == 0:
+            raise ValueError("chains is not set")
+        else:
+            # check if chains valid
+            for ch in chains:
+                if ch not in st.chain_ids:
+                    raise ValueError("Chain %s not found" % ch)
+    st_p = st.pick_chains(chains)
+    # sequences = {k: s.replace("-", "").upper() for k, s in st_p.polymer_sequences().items()}
+    # start from 1
+    seq_num_mapper = dict()
+    for chain in st_p.MODEL:
+        for i, res in enumerate(chain):
+            key = (chain.name, str(res.seqid.num) + res.seqid.icode.strip(), res.name)
+            seq_num_mapper[key] = i + 1
+    # make one upper letter chain ID
+    mapper = st_p.make_one_letter_chain(only_uppercase=True)
+    mapper_r = {v: k for k, v in mapper.items()}
+    ####################
+    # save to pdb
+    ####################
+    with tempfile.NamedTemporaryFile(delete=True, suffix=".pdb", mode='w') as tmp_file:
+        st_p.to_pdb(tmp_file.name)
+        structure = freesasa.Structure(tmp_file.name)
+    result = freesasa.calc(structure)
+    residue_areas = result.residueAreas()
+    surface_residues_relative_sasa = dict()
+    surface_atoms = defaultdict(list)
+    for atom_index in range(structure.nAtoms()):
+        ch = structure.chainLabel(atom_index)
+        ch = mapper_r.get(ch, ch)
+        res_num = structure.residueNumber(atom_index).strip()
+        res_name = structure.residueName(atom_index)
+        atom_sasa = result.atomArea(atom_index)
+        res_id = (ch, res_num, res_name)
+        res_relative_total = residue_areas[ch][res_num].relativeTotal
+        if res_relative_total > relative_sasa_cutoff:
+            if res_id not in surface_residues_relative_sasa:
+                surface_residues_relative_sasa[res_id] = res_relative_total
+            if atom_sasa > 0:
+                atom_name = structure.atomName(atom_index).strip()
+                pos = structure.coord(atom_index)
+                surface_atoms[res_id].append((atom_sasa, atom_name, pos))
+    results = []
+    for res_id, query_atoms in surface_atoms.items():
+        seq_loc = seq_num_mapper[res_id]
+        query_atoms.sort(reverse=True)
+        centroid = tuple(np.array([a[2] for a in query_atoms[0:3]]).mean(axis=0).tolist())
+        results.append((res_id[0],
+                        res_id[1],
+                        res_id[2],
+                        seq_loc,
+                        centroid,
+                        surface_residues_relative_sasa[res_id]
+                        )
+                       )
+    dtype = [("chain_name", "U5"),
+             ("residue_numi", "U8"),
+             ("residue_name", "U5"),
+             ("sequential_residue_num", "i4"),
+             ("centroid", ("f4", (3,))),
+             ("relative_sasa", "f4"),
+             ]
+    return np.array(results, dtype=dtype)

gemmi_protools/{utils → tools}/pdb_annot.py RENAMED Viewed

@@ -2,25 +2,18 @@
 @Author: Luo Jiejian
 """
 import hashlib
-import itertools
 import os
 import re
 import shutil
 import subprocess
 import uuid
 from collections import defaultdict
-from dataclasses import asdict
 from importlib.resources import files
-from typing import List
-import numpy as np
 from anarci import run_anarci
 from anarci.germlines import all_germlines
-from joblib import Parallel, delayed
-from scipy.spatial import cKDTree
 from gemmi_protools import StructureParser
-from gemmi_protools.utils.ppi import _ppi_atoms
 def hash_sequence(seq: str) -> str:
@@ -207,125 +200,48 @@ def annotate_mhc(seq_dict: dict):
     return out
-def _interface_residues(struct: StructureParser,
-                        chains_x: List[str],
-                        chains_y: List[str],
-                        threshold: float = 4.5):
-    """
-    identify PPI among protein, DNA, RNA
-    :param struct: StructureParser
-    :param chains_x:
-    :param chains_y:
-    :param threshold:
-    :return:
-     PPI residues of chains_x, PPI residues of chains_y
-    """
-    x_coord, x_id = _ppi_atoms(struct, chains_x)
-    y_coord, y_id = _ppi_atoms(struct, chains_y)
-    kd_tree_x = cKDTree(x_coord)
-    kd_tree_y = cKDTree(y_coord)
-    pairs = kd_tree_x.sparse_distance_matrix(kd_tree_y, threshold, output_type='coo_matrix')
-    x_res = np.unique(x_id[pairs.row][["ch_name", 'res_num', 'res_icode', 'res_name']])
-    y_res = np.unique(y_id[pairs.col][["ch_name", 'res_num', 'res_icode', 'res_name']])
-    x_out = ["%s/%d/%s/%s" % (a, b, c.strip(), d) for a, b, c, d in x_res.tolist()]
-    y_out = ["%s/%d/%s/%s" % (a, b, c.strip(), d) for a, b, c, d in y_res.tolist()]
-    return x_out, y_out
-def polymer_interface_residues(struct: StructureParser,
-                               ppi_threshold: float = 4.5,
-                               n_cpus: int = 1,
-                               ):
-    """
-    Args:
-        struct:
-        ppi_threshold:
-    Returns:
-    """
-    chains = [ch for ch, ct in struct.chain_types.items() if ct in ["protein", "dna", "rna"]]
-    ch_pairs = list(itertools.combinations(chains, r=2))
-    ch_pairs.sort()
-    def _run(ch_1, ch_2):
-        key = "%s/%s" % (ch_1, ch_2)
-        res_x, res_y = _interface_residues(struct, chains_x=[ch_1], chains_y=[ch_2], threshold=ppi_threshold)
-        if len(res_x) > 0:
-            return {key: [res_x, res_y]}
-        else:
-            return dict()
-    cpu2use = max(min(n_cpus, len(ch_pairs)), 1)
-    outputs = dict()
-    if cpu2use == 1 or len(ch_pairs) < 100:
-        for ch_1, ch_2 in ch_pairs:
-            outputs.update(_run(ch_1, ch_2))
-    else:
-        results = Parallel(n_jobs=cpu2use)(delayed(_run)(c1, c2) for c1, c2 in ch_pairs)
-        for item in results:
-            outputs.update(item)
-    return outputs
-def annotate_pdb(struct_file: str, ppi_threshold: float = 4.5,
-                 n_cpus: int = 1, max_seqs: int = 100):
+def annotate_pdb(struct_file: str):
     st = StructureParser()
     st.load_from_file(struct_file)
-    st.set_default_model()
-    st.STRUCT.remove_alternative_conformations()
-    st.STRUCT.remove_ligands_and_waters()
-    st.STRUCT.remove_hydrogens()
-    st.STRUCT.remove_empty_chains()
-    st.update_entity()
+    st.clean_structure()
-    if len(st.chain_ids) > max_seqs:
-        raise RuntimeError("Too many chains: %d > %d" % (len(st.chain_ids), max_seqs))
+    subchain_id2entity_id = dict()
+    for ent in st.STRUCT.entities:
+        for ch in ent.subchains:
+            subchain_id2entity_id[ch] = ent.name
     # Merge sequences
     polymers = dict()
-    for ch, seq in st.polymer_sequences.items():
-        hash_id = hash_sequence(seq)
+    for ch, seq in st.polymer_sequences().items():
+        subchain_id = st.get_chain(ch).get_polymer().subchain_id()
+        entity_id = subchain_id2entity_id[subchain_id]
+        hash_id = hash_sequence(seq.upper())
         if hash_id not in polymers:
             val = dict(chain_ids=[ch],
-                       sequence=seq,
-                       type=st.chain_types[ch],
-                       description=st.ENTITY.eid2desc.get(st.ENTITY.polymer2eid[ch], "Unknown"),
-                       specie=st.ENTITY.eid2specie.get(st.ENTITY.polymer2eid[ch], "Unknown"),
-                       taxid=st.ENTITY.eid2taxid.get(st.ENTITY.polymer2eid[ch], "Unknown"),
+                       sequence=seq.upper(),
+                       type=st.polymer_types[ch].name,
+                       description=st.INFO["description"].get(entity_id, "Unknown"),
                        )
             polymers[hash_id] = val
         else:
             polymers[hash_id]["chain_ids"].append(ch)
-    sdict = {k: v["sequence"] for k, v in polymers.items()}
+    proteins = dict()
     results = dict()
-    for hasd_id, val in polymers.items():
+    for hash_id, val in polymers.items():
         val["chain_ids"].sort()
-        if val["type"] == "protein":
+        if val["type"] == "PeptideL":
+            proteins[hash_id] = val["sequence"]
             anarci_info = get_fv_region(val["sequence"])
             fvt = fv_region_type(anarci_info)
             if fvt != "not-Fv":
-                results[hasd_id] = dict(fv_type=fvt, annotations=anarci_info)
-    struct_info = asdict(st.INFO)
-    struct_info.update(resolution=st.STRUCT.resolution)
-    struct_info["pdb_id"] = struct_info["pdb_id"].lower()
-    struct_info["exp_method"] = struct_info["exp_method"].lower()
+                results[hash_id] = dict(fv_type=fvt, annotations=anarci_info)
+    struct_info = {k: st.INFO[k] for k in ["resolution", "pdb_id", "deposition_date", "method", "title"]}
     return dict(path=os.path.abspath(os.path.expanduser(struct_file)),
                 info=struct_info,
                 polymers=polymers,
                 anarci=results,
-                mhc=annotate_mhc(sdict) if len(sdict) > 0 else dict(),
-                interfaces=polymer_interface_residues(st, ppi_threshold,
-                                                      n_cpus=n_cpus)
+                mhc=annotate_mhc(proteins) if len(proteins) > 0 else dict(),
                 )

gemmi-protools 0.1.17__py3-none-any.whl → 1.0.1__py3-none-any.whl

Potentially problematic release.

gemmi-protools 0.1.17py3-none-any.whl → 1.0.1py3-none-any.whl