PyPI - RNApolis - Versions diffs - 0.10.5__py3-none-any.whl → 0.10.7__py3-none-any.whl - Mend

RNApolis 0.10.5py3-none-any.whl → 0.10.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

rnapolis/adapter.py CHANGED Viewed

@@ -8,7 +8,7 @@ from collections import defaultdict
 from dataclasses import dataclass
 from enum import Enum
 from tempfile import NamedTemporaryFile
-from typing import DefaultDict, Dict, List, Optional, Set, Tuple
+from typing import Any, DefaultDict, Dict, List, Optional, Set, Tuple, Union
 import orjson
@@ -49,6 +49,7 @@ class ExternalTool(Enum):
     BPNET = "bpnet"
     MAXIT = "maxit"
     BARNABA = "barnaba"
+    MCANNOTATE = "mc-annotate"
 logging.basicConfig(level=os.getenv("LOGLEVEL", "INFO").upper())
@@ -68,25 +69,30 @@ def auto_detect_tool(external_files: List[str]) -> ExternalTool:
         return ExternalTool.MAXIT
     for file_path in external_files:
+        basename = os.path.basename(file_path)
         # Check for FR3D pattern
-        if file_path.endswith("basepair_detail.txt"):
+        if basename.endswith("basepair_detail.txt"):
             return ExternalTool.FR3D
         # Check for RNAView pattern
-        if file_path.endswith(".out"):
+        if basename.endswith(".out"):
             return ExternalTool.RNAVIEW
         # Check for BPNet pattern
-        if file_path.endswith("basepair.json"):
+        if basename.endswith("basepair.json"):
             return ExternalTool.BPNET
+        # Check for MC-Annotate pattern
+        if basename.endswith("stdout.txt"):
+            return ExternalTool.MCANNOTATE
         # Check for Barnaba pattern
-        basename = os.path.basename(file_path)
         if "pairing" in basename or "stacking" in basename:
             return ExternalTool.BARNABA
         # Check for JSON files (DSSR)
-        if file_path.endswith(".json"):
+        if basename.endswith(".json"):
             return ExternalTool.DSSR
     # Default to MAXIT if no patterns match
@@ -1157,6 +1163,346 @@ def parse_barnaba_output(
     )
+class MCAnnotateAdapter:
+    # Represents state of parsing MC-Annotate result
+    # Luckily every important part of file
+    # begins with a unique sentence
+    class ParseState(str, Enum):
+        RESIDUES_INFORMATION = "Residue conformations"
+        ADJACENT_STACKINGS = "Adjacent stackings"
+        NON_ADJACENT_STACKINGS = "Non-Adjacent stackings"
+        BASE_PAIRS_SECTION = "Base-pairs"
+        SUMMARY_SECTION = "Number of"
+    # This dictionary maps our model edges
+    # to edge representation used by MC-Annotate
+    EDGES: Dict[str, Tuple[str, ...]] = {
+        "H": ("Hh", "Hw", "Bh", "C8"),
+        "W": ("Wh", "Ww", "Ws"),
+        "S": ("Ss", "Sw", "Bs"),
+    }
+    # Contains flatten EDGES values (in one touple)
+    ALL_EDGES = sum(EDGES.values(), ())
+    # Based on these tokens
+    # BaseRiboseInteractions and BasePhosphateInteractions are created
+    RIBOSE_ATOM = "O2'"
+    PHOSPHATE_ATOM = "O2P"
+    # Single hydrogen bond - for us it's OtherInteraction
+    ONE_HBOND = "one_hbond"
+    # Cis/trans tokens used by MC-Annotate
+    CIS = "cis"
+    TRANS = "trans"
+    # Tokens used in PDB files
+    ATOM = "ATOM"
+    HETATM = "HETATM"
+    # This regex is used to capture 6 groups of residues information:
+    # (1) (2) (3) (4) (5) (6)
+    # 1, 4 - chain IDs
+    # 2, 5 - numbers
+    # 3, 6 - icodes (or empty string if no icode)
+    # Example - match and groups:
+    # A-100.X-B200
+    # ('A'), ('-100'), ('X'), ('B'), ('200'), ('')
+    RESIDUE_REGEX = re.compile(
+        r"'?(.)'?(-?[0-9]+)\.?([a-zA-Z]?)-'?(.)'?(-?[0-9]+)\.?([a-zA-Z]?)"
+    )
+    # Roman numerals used by Saenger
+    # both in our model and MC-Annotate
+    ROMAN_NUMERALS = ("I", "V", "X")
+    # Positions of residues info in PDB files
+    CHAIN_INDEX = 21
+    NUMBER_INDEX = slice(22, 26)
+    ICODE_INDEX = 26
+    NAME_INDEX = slice(17, 20)
+    def __init__(self) -> None:
+        # Since names are not present in adjacent and non-adjacent stackings
+        # we need save these values eariler
+        self.names: Dict[str, str] = {}
+        self.base_pairs: List[BasePair] = []
+        self.stackings: List[Stacking] = []
+        self.base_ribose_interactions: List[BaseRibose] = []
+        self.base_phosphate_interactions: List[BasePhosphate] = []
+        self.other_interactions: List[OtherInteraction] = []
+    def classify_edge(self, edge_type: str) -> Optional[str]:
+        for edge, edges in self.EDGES.items():
+            if edge_type in edges:
+                return edge
+        logging.warning('Edge type "{type}" unknown')
+        return None
+    def get_residue(self, residue_info_list: Tuple[Union[str, Any], ...]) -> Residue:
+        chain = residue_info_list[0]
+        number = int(residue_info_list[1])
+        if residue_info_list[2] == "":
+            icode = None
+            residue_info = f"{chain}{number}"
+        else:
+            icode = residue_info_list[2]
+            residue_info = f"{chain}{number}.{icode}"
+        return Residue(
+            None, ResidueAuth(chain, number, icode, self.names[residue_info])
+        )
+    def get_residues(
+        self, residues_info: str
+    ) -> Tuple[Optional[Residue], Optional[Residue]]:
+        regex_result = re.search(self.RESIDUE_REGEX, residues_info)
+        if regex_result is None:
+            logging.error("MC-Annotate regex failed: {residues_info}")
+            return None, None
+        residues_info_list = regex_result.groups()
+        # Expects (chain1, number1, icode1, chain2, number2, icode2)
+        if len(residues_info_list) != 6:
+            logging.error(f"MC-Annotate regex failed for {residues_info}")
+            return None, None
+        residue_left = self.get_residue(residues_info_list[:3])
+        residue_right = self.get_residue(residues_info_list[3:])
+        return residue_left, residue_right
+    def append_stacking(self, line: str, topology_position: int) -> None:
+        splitted_line = line.split()
+        topology_info = splitted_line[topology_position]
+        residue_left, residue_right = self.get_residues(splitted_line[0])
+        if residue_left is None or residue_right is None:
+            logging.warning(f"Could not parse residues in line: {line}")
+            return
+        stacking = Stacking(
+            residue_left, residue_right, StackingTopology[topology_info]
+        )
+        self.stackings.append(stacking)
+    def get_ribose_interaction(
+        self, residues: Tuple[Residue, Residue], token: str
+    ) -> BaseRibose:
+        # BasePair is preffered first so swap if necessary
+        if token.split("/", 1)[0] == self.RIBOSE_ATOM:
+            residue_left, residue_right = residues[1], residues[0]
+        else:
+            residue_left, residue_right = residues[0], residues[1]
+        return BaseRibose(residue_left, residue_right, None)
+    def get_phosphate_interaction(
+        self, residues: Tuple[Residue, Residue], token: str
+    ) -> BasePhosphate:
+        # BasePair is preffered first so swap if necessary
+        if token.split("/", 1)[0] == self.PHOSPHATE_ATOM:
+            residue_left, residue_right = residues[1], residues[0]
+        else:
+            residue_left, residue_right = residues[0], residues[1]
+        return BasePhosphate(residue_left, residue_right, None)
+    def get_base_interaction(
+        self,
+        residues: Tuple[Residue, Residue],
+        token: str,
+        tokens: List[str],
+    ) -> Optional[BasePair]:
+        if self.CIS in tokens:
+            cis_trans = "c"
+        elif self.TRANS in tokens:
+            cis_trans = "t"
+        else:
+            logging.warning(f"Cis/trans expected, but not present in {tokens}")
+            return None
+        # example saenger: XIX or XII,XIII (?)
+        for potential_saenger_token in tokens:
+            potential_saenger_without_comma = potential_saenger_token.split(",")[0]
+            if all(
+                char in self.ROMAN_NUMERALS for char in potential_saenger_without_comma
+            ):
+                saenger = Saenger[potential_saenger_without_comma]
+                break
+        else:
+            saenger = None
+        left_edge, right_edge = token.split("/", 1)
+        leontis_westhof_left = self.classify_edge(left_edge)
+        leontis_westohf_right = self.classify_edge(right_edge)
+        if leontis_westhof_left is None or leontis_westohf_right is None:
+            return None
+        leontis_westhof = LeontisWesthof[
+            f"{cis_trans}{leontis_westhof_left}{leontis_westohf_right}"
+        ]
+        residue_left, residue_right = residues
+        return BasePair(residue_left, residue_right, leontis_westhof, saenger)
+    def get_other_interaction(
+        self, residues: Tuple[Residue, Residue]
+    ) -> OtherInteraction:
+        return OtherInteraction(residues[0], residues[1])
+    def append_interactions(self, line: str) -> None:
+        splitted_line = line.split()
+        residues = self.get_residues(splitted_line[0])
+        if residues[0] is None or residues[1] is None:
+            logging.warning(f"Could not parse residues in line: {line}")
+            return
+        # Assumes that one pair can belong to every interaction type
+        # no more than once!
+        base_added, ribose_added, phosphate_added = False, False, False
+        # example tokens: Ww/Ww pairing antiparallel cis XX
+        tokens: List[str] = splitted_line[3:]
+        # Special case
+        # IF single hydrogen bond and base pairs only THEN
+        # append to OtherIneraction list
+        if self.ONE_HBOND in tokens:
+            for token in tokens:
+                if self.RIBOSE_ATOM in token or self.PHOSPHATE_ATOM in token:
+                    break
+            else:
+                other_interaction = self.get_other_interaction(residues)
+                self.other_interactions.append(other_interaction)
+                return
+        for token in tokens:
+            if self.RIBOSE_ATOM in token and not ribose_added:
+                # example token: Ss/O2'
+                ribose_interaction = self.get_ribose_interaction(residues, token)
+                self.base_ribose_interactions.append(ribose_interaction)
+                ribose_added = True
+            elif self.PHOSPHATE_ATOM in token and not phosphate_added:
+                # example token: O2P/Bh
+                phosphate_interaction = self.get_phosphate_interaction(residues, token)
+                self.base_phosphate_interactions.append(phosphate_interaction)
+                phosphate_added = True
+            elif len(token.split("/", 1)) > 1:
+                token_left, token_right = token.split("/", 1)
+                tokens_in_edges = (
+                    token_left in self.ALL_EDGES and token_right in self.ALL_EDGES
+                )
+                if tokens_in_edges and not base_added:
+                    # example token_left: Ww | example token_right: Ws
+                    base_pair_interaction = self.get_base_interaction(
+                        residues, token, tokens
+                    )
+                    if base_pair_interaction is not None:
+                        self.base_pairs.append(base_pair_interaction)
+                    base_added = True
+    def append_names(self, file_content: str) -> None:
+        for line in file_content.splitlines():
+            if line.startswith(self.ATOM) or line.startswith(self.HETATM):
+                chain = line[self.CHAIN_INDEX].strip()
+                number = line[self.NUMBER_INDEX].strip()
+                icode = line[self.ICODE_INDEX].strip()
+                name = line[self.NAME_INDEX].strip()
+                residue_info = (
+                    f"{chain}{number}" if icode == "" else f"{chain}{number}.{icode}"
+                )
+                self.names[residue_info] = name
+    def analyze_by_mc_annotate(
+        self, pdb_content: str, mc_result: str, **_: Dict[str, Any]
+    ) -> BaseInteractions:
+        self.append_names(pdb_content)
+        current_state = None
+        for line in mc_result.splitlines():
+            for state in self.ParseState:
+                if line.startswith(state.value):
+                    current_state = state
+                    break
+            # Loop ended without break - parse file
+            else:
+                if current_state == self.ParseState.RESIDUES_INFORMATION:
+                    # example line: X7.H : G C3p_endo anti
+                    # Skip residues information - meaningless information
+                    pass
+                elif current_state == self.ParseState.ADJACENT_STACKINGS:
+                    # example line: X4.E-X5.F : adjacent_5p upward
+                    self.append_stacking(line, 3)
+                elif current_state == self.ParseState.NON_ADJACENT_STACKINGS:
+                    # example line: Y40.M-Y67.N : inward pairing
+                    self.append_stacking(line, 2)
+                elif current_state == self.ParseState.BASE_PAIRS_SECTION:
+                    # example line: Y38.K-Y51.X : A-U Ww/Ww pairing antiparallel cis XX
+                    self.append_interactions(line)
+                elif current_state == self.ParseState.SUMMARY_SECTION:
+                    # example line: Number of non adjacent stackings = 26
+                    # Skip summary section - meaningless information
+                    pass
+        return (
+            self.base_pairs,
+            self.stackings,
+            self.base_ribose_interactions,
+            self.base_phosphate_interactions,
+            self.other_interactions,
+        )
+def parse_mcannotate_output(
+    file_paths: List[str], structure3d: Structure3D
+) -> BaseInteractions:
+    """
+    Parse mc-annotate output and convert to BaseInteractions.
+    This function expects a file with mc-annotate stdout and a PDB file.
+    """
+    stdout_file = None
+    structure_file = None
+    for file_path in file_paths:
+        if os.path.basename(file_path).endswith("stdout.txt"):
+            stdout_file = file_path
+        elif file_path.endswith(".pdb"):
+            structure_file = file_path
+    if not stdout_file:
+        logging.warning("No stdout.txt file found for mc-annotate.")
+        return BaseInteractions([], [], [], [], [])
+    if not structure_file:
+        logging.warning("No PDB file found for mc-annotate.")
+        return BaseInteractions([], [], [], [], [])
+    logging.info(f"Processing mc-annotate stdout file: {stdout_file}")
+    logging.info(f"Using structure file for residue names: {structure_file}")
+    try:
+        with open(stdout_file, "r") as f:
+            mc_result = f.read()
+        with open(structure_file, "r") as f:
+            pdb_content = f.read()
+    except Exception as e:
+        logging.warning(f"Could not read input files for mc-annotate: {e}")
+        return BaseInteractions([], [], [], [], [])
+    adapter = MCAnnotateAdapter()
+    (
+        base_pairs,
+        stackings,
+        base_ribose_interactions,
+        base_phosphate_interactions,
+        other_interactions,
+    ) = adapter.analyze_by_mc_annotate(pdb_content, mc_result)
+    return BaseInteractions.from_structure3d(
+        structure3d,
+        base_pairs,
+        stackings,
+        base_ribose_interactions,
+        base_phosphate_interactions,
+        other_interactions,
+    )
 def parse_external_output(
     file_paths: List[str], tool: ExternalTool, structure3d: Structure3D
 ) -> BaseInteractions:
@@ -1183,6 +1529,8 @@ def parse_external_output(
         return parse_rnaview_output(file_paths, structure3d)
     elif tool == ExternalTool.BARNABA:
         return parse_barnaba_output(file_paths, structure3d)
+    elif tool == ExternalTool.MCANNOTATE:
+        return parse_mcannotate_output(file_paths, structure3d)
     else:
         raise ValueError(f"Unsupported external tool: {tool}")
@@ -1259,6 +1607,9 @@ def process_external_tool_output(
     if not external_file_paths:
         # For MAXIT or when no external files are provided, use the input file
         file_paths_to_process = [input_file_path]
+    elif tool == ExternalTool.MCANNOTATE:
+        # MC-Annotate requires both the stdout and the PDB file
+        file_paths_to_process = external_file_paths + [input_file_path]
     else:
         # Process all external files
         file_paths_to_process = external_file_paths

rnapolis/common.py CHANGED Viewed

@@ -1084,33 +1084,41 @@ class BaseInteractions:
         base_phosphate_interactions: List[BasePhosphate],
         other_interactions: List[OtherInteraction],
     ) -> "BaseInteractions":
-        auth2residue3d = {}
-        auth2label = {}
-        label2auth = {}
+        cni2residue = {}
+        cni2label = {}
+        cni2auth = {}
         for residue3d in structure3d.residues:
-            auth2residue3d[residue3d.auth] = residue3d
-            auth2label[residue3d.auth] = residue3d.label
-            label2auth[residue3d.label] = residue3d.auth
+            cni = (residue3d.chain, residue3d.number, residue3d.icode or None)
+            cni2auth[cni] = residue3d.auth
+            cni2label[cni] = residue3d.label
+            cni2residue[cni] = residue3d
         def unify_nt(nt: Residue) -> Residue:
             if nt.auth is not None and nt.label is not None:
                 return nt
+            cni = (nt.chain, nt.number, nt.icode or None)
             if nt.auth is not None:
-                return Residue(label=auth2label.get(nt.auth, None), auth=nt.auth)
+                return Residue(label=cni2label.get(cni, None), auth=nt.auth)
             if nt.label is not None:
-                return Residue(label=nt.label, auth=label2auth.get(nt.label, None))
+                return Residue(label=nt.label, auth=cni2auth.get(cni, None))
             return nt
         base_pairs_new = []
         for base_pair in base_pairs:
             nt1 = unify_nt(base_pair.nt1)
             nt2 = unify_nt(base_pair.nt2)
-            saenger = base_pair.saenger or Saenger.from_leontis_westhof(
-                auth2residue3d[nt1.auth].one_letter_name,
-                auth2residue3d[nt2.auth].one_letter_name,
-                base_pair.lw,
-            )
+            cni1 = (nt1.chain, nt1.number, nt1.icode or None)
+            cni2 = (nt2.chain, nt2.number, nt2.icode or None)
+            if cni1 not in cni2residue or cni2 not in cni2residue:
+                saenger = base_pair.saenger
+            else:
+                saenger = base_pair.saenger or Saenger.from_leontis_westhof(
+                    cni2residue[cni1].one_letter_name,
+                    cni2residue[cni2].one_letter_name,
+                    base_pair.lw,
+                )
             if (
                 nt1 != base_pair.nt1
                 or nt2 != base_pair.nt2

{rnapolis-0.10.5.dist-info → rnapolis-0.10.7.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: RNApolis
-Version: 0.10.5
+Version: 0.10.7
 Summary: A Python library containing RNA-related bioinformatics functions and classes
 Home-page: https://github.com/tzok/rnapolis-py
 Author: Tomasz Zok

{rnapolis-0.10.5.dist-info → rnapolis-0.10.7.dist-info}/RECORD RENAMED Viewed

@@ -1,8 +1,8 @@
-rnapolis/adapter.py,sha256=0Awt6owSeGcWq8kgLtkDv19suhrmcJgQip_Hi4Y3tK4,48513
+rnapolis/adapter.py,sha256=6hJTweIqUXH8CEGvi8oupFzk5etkIt8Q2bqRvgsqako,62169
 rnapolis/aligner.py,sha256=o7rQyjAZ3n4VXcnSPY3HVB8nLNRkVbl552O3NVh0mfg,3429
 rnapolis/annotator.py,sha256=HA2hfEUXdmBElObqRlASAB1FgkysjiHgwMTjEhsDiDE,30277
 rnapolis/clashfinder.py,sha256=AC9_tIx7QIk57sELq_aKfU1u3UMOXbgcccQeGHhMR6c,8517
-rnapolis/common.py,sha256=hamlW892ZF5A0dSWsl7cOCZqOpbVQMgXjVPYDFzk3pE,36347
+rnapolis/common.py,sha256=qifqTIiq43jeR1xKK3301PbRMo7vaZgjQauG-G7asSc,36686
 rnapolis/component_A.csv,sha256=koirS-AwUZwoYGItT8yn3wS6Idvmh2FANfTQcOS_xh8,2897
 rnapolis/component_C.csv,sha256=NtvsAu_YrUgTjzZm3j4poW4IZ99x3dPARB09XVIiMCc,2803
 rnapolis/component_G.csv,sha256=Z5wl8OnHRyx4XhTyBiWgRZiEvmZXhoxtVRH8bn6Vxf0,2898
@@ -22,9 +22,9 @@ rnapolis/tertiary_v2.py,sha256=SgijTv0bPqMJwsMqyQk0O8QAnS2Ozk45vk8igxt9hRs,38001
 rnapolis/transformer.py,sha256=aC0nBmHHJf5TyLvBIV57Jj3tlwpvHbPo347opfAOlQA,3844
 rnapolis/unifier.py,sha256=2ge7IB9FdRgzSAiVD39U_ciwtdDJ2fGzf8mUIudbrqY,5820
 rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
-rnapolis-0.10.5.dist-info/licenses/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
-rnapolis-0.10.5.dist-info/METADATA,sha256=LbY2r44uwiIoVASkHv1_TAwwKIOCi8Az33PDuTs7gdg,54611
-rnapolis-0.10.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-rnapolis-0.10.5.dist-info/entry_points.txt,sha256=MZMWnYBUYnis-zWDmFfuA5yXtU3W5YdQrm5HA5LrkeM,474
-rnapolis-0.10.5.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
-rnapolis-0.10.5.dist-info/RECORD,,
+rnapolis-0.10.7.dist-info/licenses/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
+rnapolis-0.10.7.dist-info/METADATA,sha256=QPuGPZ96VIjvQPiLkk4bS4vstWqO6cok6e4vID33vg0,54611
+rnapolis-0.10.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+rnapolis-0.10.7.dist-info/entry_points.txt,sha256=MZMWnYBUYnis-zWDmFfuA5yXtU3W5YdQrm5HA5LrkeM,474
+rnapolis-0.10.7.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
+rnapolis-0.10.7.dist-info/RECORD,,

{rnapolis-0.10.5.dist-info → rnapolis-0.10.7.dist-info}/WHEEL RENAMED Viewed

File without changes

{rnapolis-0.10.5.dist-info → rnapolis-0.10.7.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{rnapolis-0.10.5.dist-info → rnapolis-0.10.7.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{rnapolis-0.10.5.dist-info → rnapolis-0.10.7.dist-info}/top_level.txt RENAMED Viewed

File without changes

RNApolis 0.10.5__py3-none-any.whl → 0.10.7__py3-none-any.whl

RNApolis 0.10.5py3-none-any.whl → 0.10.7py3-none-any.whl