PyPI - RNApolis - Versions diffs - 0.4.4__py3-none-any.whl → 0.4.7__py3-none-any.whl - Mend

RNApolis 0.4.4py3-none-any.whl → 0.4.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

{RNApolis-0.4.4.dist-info → RNApolis-0.4.7.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: RNApolis
-Version: 0.4.4
+Version: 0.4.7
 Summary: A Python library containing RNA-related bioinformatics functions and classes
 Home-page: https://github.com/tzok/rnapolis-py
 Author: Tomasz Zok

{RNApolis-0.4.4.dist-info → RNApolis-0.4.7.dist-info}/RECORD RENAMED Viewed

@@ -1,17 +1,17 @@
 rnapolis/annotator.py,sha256=7U3f0gchKdIGc6FwJx0UAc_95HJI5SgECj-b7-1yBhc,22086
 rnapolis/clashfinder.py,sha256=i95kp0o6OWNqmJDBr-PbsZd7RY2iJtBDr7QqolJSuAQ,8513
-rnapolis/common.py,sha256=PUYF01P2vevhyImhZjGYE0jJlsxWHX6GQmsxI4W7S-E,30255
+rnapolis/common.py,sha256=NWhlPwT521jCSWcDcm_TNoYENjoZWpllf9sS-WuTEmA,30361
 rnapolis/metareader.py,sha256=I1-cXc2YNBPwa3zihAnMTjEsAo79tEKzSmWu5yvN1Pk,2071
 rnapolis/molecule_filter.py,sha256=hB6-nXgjmw7FAsQ3bj0cZ2FvuW2I1PXunEfcdwEUB1o,7389
 rnapolis/motif_extractor.py,sha256=duHvpi9Ulcny9K60E6VBpz5RpJZw-KdTB4_Ph0iP478,774
-rnapolis/parser.py,sha256=wCA9rXqt51iLECgeBqOShFpuT8JwanNkHYD5uXYvLzU,13988
+rnapolis/parser.py,sha256=2pQYy0sh8TCpeluMmmSJ7C5dudK_bsfstTWCdpwwpNU,15193
 rnapolis/rfam_folder.py,sha256=SjiiyML_T1__saruFwSMJEoQ7Y55GIU8ktS8ZUn5-fw,11111
-rnapolis/tertiary.py,sha256=SQyiYWA0RJhAK70f88CKZvS4EzGKHQ2RoL1s4MueEDQ,21657
+rnapolis/tertiary.py,sha256=6t9ZB4w33-5n_M3sns1RoFXCOTgVAgGH4WDNG5OG9Kg,23426
 rnapolis/transformer.py,sha256=V9nOQvdq4-p7yUWo0vQg0CDQMpmyxz9t4TMSRVEKHnw,1817
 rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
-RNApolis-0.4.4.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
-RNApolis-0.4.4.dist-info/METADATA,sha256=irtWJbeg1LWun2r3WtnsnDDSHlLvru0hO9wz1e67cIE,54322
-RNApolis-0.4.4.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
-RNApolis-0.4.4.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
-RNApolis-0.4.4.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
-RNApolis-0.4.4.dist-info/RECORD,,
+RNApolis-0.4.7.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
+RNApolis-0.4.7.dist-info/METADATA,sha256=551L8oU_7CdBw7v0jezfHQX7YzF9Fo83E6NVbLVfA50,54322
+RNApolis-0.4.7.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
+RNApolis-0.4.7.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
+RNApolis-0.4.7.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
+RNApolis-0.4.7.dist-info/RECORD,,

{RNApolis-0.4.4.dist-info → RNApolis-0.4.7.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (74.1.2)
+Generator: setuptools (75.6.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

rnapolis/common.py CHANGED Viewed

@@ -338,6 +338,9 @@ class Entry(Sequence):
             return self.pair
         raise IndexError()
+    def __lt__(self, other):
+        return self.index_ < other.index_
     def __len__(self) -> int:
         return 3
@@ -838,7 +841,7 @@ class BpSeq:
         for i in range(1, len(regions)):
             k, l, _ = regions[i]
-            available = [True for i in range(10)]
+            available = [True for _ in range(len("([{<" + string.ascii_uppercase))]
             for j in range(i):
                 m, n, _ = regions[j]

rnapolis/parser.py CHANGED Viewed

@@ -1,7 +1,10 @@
 import logging
 from typing import IO, Dict, List, Optional, Tuple, Union
+import numpy as np
 from mmcif.io.IoAdapterPy import IoAdapterPy
+from scipy.spatial import KDTree
 from rnapolis.common import ResidueAuth, ResidueLabel
 from rnapolis.tertiary import BASE_ATOMS, Atom, Residue3D, Structure3D
@@ -53,10 +56,10 @@ def parse_cif(
     io_adapter = IoAdapterPy()
     data = io_adapter.readFile(cif.name)
-    atoms: List[Atom] = []
+    atoms_to_process: List[Atom] = []
     modified: Dict[Union[ResidueLabel, ResidueAuth], str] = {}
-    sequence_by_entity = {}
-    is_nucleic_acid_by_entity = {}
+    sequence_by_entity: Dict[str, str] = {}
+    is_nucleic_acid_by_entity: Dict[str, bool] = {}
     if data:
         atom_site = data[0].getObj("atom_site")
@@ -136,7 +139,7 @@ def parse_cif(
                     else None
                 )
-                atoms.append(
+                atoms_to_process.append(
                     Atom(
                         label_entity_id,
                         label,
@@ -216,6 +219,7 @@ def parse_cif(
                 if entity_id and pdbx_seq_one_letter_code_can:
                     sequence_by_entity[entity_id] = pdbx_seq_one_letter_code_can
+    atoms = filter_clashing_atoms(atoms_to_process)
     return atoms, modified, sequence_by_entity, is_nucleic_acid_by_entity
@@ -228,7 +232,7 @@ def parse_pdb(
     Dict[str, bool],
 ]:
     pdb.seek(0)
-    atoms: List[Atom] = []
+    atoms_to_process: List[Atom] = []
     modified: Dict[Union[ResidueLabel, ResidueAuth], str] = {}
     model = 1
@@ -236,9 +240,6 @@ def parse_pdb(
         if line.startswith("MODEL"):
             model = int(line[10:14].strip())
         elif line.startswith("ATOM") or line.startswith("HETATM"):
-            alternate_location = line[16]
-            if alternate_location != " ":
-                continue
             atom_name = line[12:16].strip()
             residue_name = line[17:20].strip()
             chain_identifier = line[21]
@@ -251,7 +252,10 @@ def parse_pdb(
             auth = ResidueAuth(
                 chain_identifier, residue_number, insertion_code, residue_name
             )
-            atoms.append(Atom(None, None, auth, model, atom_name, x, y, z, occupancy))
+            atoms_to_process.append(
+                Atom(None, None, auth, model, atom_name, x, y, z, occupancy)
+            )
         elif line.startswith("MODRES"):
             original_name = line[12:15]
             chain_identifier = line[16]
@@ -263,6 +267,7 @@ def parse_pdb(
             )
             modified[auth] = standard_residue_name
+    atoms = filter_clashing_atoms(atoms_to_process)
     return atoms, modified, {}, {}
@@ -392,3 +397,36 @@ def try_parse_int(s: str) -> Optional[int]:
         return int(s)
     except ValueError:
         return None
+def filter_clashing_atoms(atoms: List[Atom], clash_distance: float = 0.5) -> List[Atom]:
+    # First, remove duplicate atoms
+    unique_atoms = {}
+    for i, atom in enumerate(atoms):
+        key = (atom.label, atom.auth, atom.name)
+        if key not in unique_atoms or atom.occupancy > unique_atoms[key].occupancy:
+            unique_atoms[key] = atom
+    unique_atoms_list = list(unique_atoms.values())
+    # Now handle clashing atoms
+    coords = np.array([(atom.x, atom.y, atom.z) for atom in unique_atoms_list])
+    tree = KDTree(coords)
+    pairs = tree.query_pairs(r=clash_distance)
+    atoms_to_keep = set(range(len(unique_atoms_list)))
+    for i, j in pairs:
+        if (
+            unique_atoms_list[i].occupancy is None
+            or unique_atoms_list[j].occupancy is None
+        ):
+            continue
+        if unique_atoms_list[i].occupancy > unique_atoms_list[j].occupancy:
+            atoms_to_keep.discard(j)
+        else:
+            atoms_to_keep.discard(i)
+    return [unique_atoms_list[i] for i in atoms_to_keep]

rnapolis/tertiary.py CHANGED Viewed

@@ -124,36 +124,17 @@ class Residue3D(Residue):
     outermost_atoms = {"A": "N9", "G": "N9", "C": "N1", "U": "N1", "T": "N1"}
     # Dist representing expected name of atom closest to the tetrad center
     innermost_atoms = {"A": "N6", "G": "O6", "C": "N4", "U": "O4", "T": "O4"}
+    # Heavy atoms in phosphate and ribose
+    phosphate_atoms = {"P", "OP1", "OP2", "O3'", "O5'"}
+    sugar_atoms = {"C1'", "C2'", "C3'", "C4'", "C5'", "O4'"}
     # Heavy atoms for each main nucleobase
     nucleobase_heavy_atoms = {
         "A": set(["N1", "C2", "N3", "C4", "C5", "C6", "N6", "N7", "C8", "N9"]),
         "G": set(["N1", "C2", "N2", "N3", "C4", "C5", "C6", "O6", "N7", "C8", "N9"]),
         "C": set(["N1", "C2", "O2", "N3", "C4", "N4", "C5", "C6"]),
         "U": set(["N1", "C2", "O2", "N3", "C4", "O4", "C5", "C6"]),
+        "T": set(["N1", "C2", "O2", "N3", "C4", "O4", "C5", "C5M", "C6"]),
     }
-    # Heavy atoms in nucleotide
-    nucleotide_heavy_atoms = (
-        set(
-            [
-                "P",
-                "OP1",
-                "OP2",
-                "O5'",
-                "C5'",
-                "C4'",
-                "O4'",
-                "C3'",
-                "O3'",
-                "C2'",
-                "O2'",
-                "C1'",
-            ]
-        )
-        .union(nucleobase_heavy_atoms["A"])
-        .union(nucleobase_heavy_atoms["G"])
-        .union(nucleobase_heavy_atoms["C"])
-        .union(nucleobase_heavy_atoms["U"])
-    )
     def __lt__(self, other):
         return (self.model, self.chain, self.number, self.icode or " ") < (
@@ -202,9 +183,59 @@ class Residue3D(Residue):
     @cached_property
     def is_nucleotide(self) -> bool:
-        return self.nucleotide_heavy_atoms.intersection(
-            set([atom.name for atom in self.atoms])
+        scores = {"phosphate": 0.0, "sugar": 0.0, "base": 0.0, "connections": 0.0}
+        weights = {"phosphate": 0.25, "sugar": 0.25, "base": 0.25, "connections": 0.25}
+        residue_atoms = {atom.name for atom in self.atoms}
+        phosphate_match = len(residue_atoms.intersection(self.phosphate_atoms))
+        scores["phosphate"] = phosphate_match / len(self.phosphate_atoms)
+        sugar_match = len(residue_atoms.intersection(self.sugar_atoms))
+        scores["sugar"] = sugar_match / len(self.sugar_atoms)
+        nucleobase_atoms = {
+            key: self.nucleobase_heavy_atoms[key] for key in self.nucleobase_heavy_atoms
+        }
+        matches = {
+            key: len(residue_atoms.intersection(nucleobase_atoms[key]))
+            / len(nucleobase_atoms[key])
+            for key in nucleobase_atoms
+        }
+        best_match = max(matches.items(), key=lambda x: x[1])
+        scores["base"] = best_match[1]
+        connection_score = 0.0
+        distance_threshold = 2.0
+        if "P" in residue_atoms and "O5'" in residue_atoms:
+            p_atom = next(atom for atom in self.atoms if atom.name == "P")
+            o5_atom = next(atom for atom in self.atoms if atom.name == "O5'")
+            if (
+                numpy.linalg.norm(p_atom.coordinates - o5_atom.coordinates)
+                <= distance_threshold
+            ):
+                connection_score += 0.5
+        if "C1'" in residue_atoms:
+            c1_atom = next(atom for atom in self.atoms if atom.name == "C1'")
+            for base_connection in ["N9", "N1"]:
+                if base_connection in residue_atoms:
+                    base_atom = next(
+                        atom for atom in self.atoms if atom.name == base_connection
+                    )
+                    if (
+                        numpy.linalg.norm(c1_atom.coordinates - base_atom.coordinates)
+                        <= distance_threshold
+                    ):
+                        connection_score += 0.5
+                        break
+        scores["connections"] = connection_score
+        probability = sum(
+            scores[component] * weights[component] for component in scores.keys()
         )
+        return probability > 0.5
     @cached_property
     def base_normal_vector(self) -> Optional[numpy.typing.NDArray[numpy.floating]]:
@@ -566,15 +597,14 @@ class Mapping2D3D:
         return self.__generate_bpseq(canonical)
     def __generate_bpseq(self, base_pairs):
+        nucleotides = list(filter(lambda r: r.is_nucleotide, self.structure3d.residues))
         result: Dict[int, List] = {}
         residue_map: Dict[Residue3D, int] = {}
         i = 1
-        for j, residue in enumerate(
-            filter(lambda r: r.is_nucleotide, self.structure3d.residues)
-        ):
+        for j, residue in enumerate(nucleotides):
             if self.find_gaps and j > 0:
-                previous = self.structure3d.residues[j - 1]
+                previous = nucleotides[j - 1]
                 if (
                     not previous.is_connected(residue)

{RNApolis-0.4.4.dist-info → RNApolis-0.4.7.dist-info}/LICENSE RENAMED Viewed

File without changes

{RNApolis-0.4.4.dist-info → RNApolis-0.4.7.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{RNApolis-0.4.4.dist-info → RNApolis-0.4.7.dist-info}/top_level.txt RENAMED Viewed

File without changes

RNApolis 0.4.4__py3-none-any.whl → 0.4.7__py3-none-any.whl

RNApolis 0.4.4py3-none-any.whl → 0.4.7py3-none-any.whl