PyPI - RNApolis - Versions diffs - 0.4.13__tar.gz → 0.4.15__tar.gz - Mend

RNApolis 0.4.13tar.gz → 0.4.15tar.gz

Files changed (33) hide show

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.2
 Name: RNApolis
-Version: 0.4.13
+Version: 0.4.15
 Summary: A Python library containing RNA-related bioinformatics functions and classes
 Home-page: https://github.com/tzok/rnapolis-py
 Author: Tomasz Zok
@@ -26,6 +26,15 @@ Requires-Dist: pulp
 Requires-Dist: requests
 Requires-Dist: scipy
 Requires-Dist: viennarna
+Dynamic: author
+Dynamic: author-email
+Dynamic: classifier
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: home-page
+Dynamic: project-url
+Dynamic: requires-dist
+Dynamic: summary
 # RNApolis

{rnapolis-0.4.13 → rnapolis-0.4.15}/setup.py RENAMED Viewed

@@ -5,7 +5,7 @@ with open("README.md") as f:
 setup(
     name="RNApolis",
-    version="0.4.13",
+    version="0.4.15",
     packages=["rnapolis"],
     package_dir={"": "src"},
     author="Tomasz Zok",

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.2
 Name: RNApolis
-Version: 0.4.13
+Version: 0.4.15
 Summary: A Python library containing RNA-related bioinformatics functions and classes
 Home-page: https://github.com/tzok/rnapolis-py
 Author: Tomasz Zok
@@ -26,6 +26,15 @@ Requires-Dist: pulp
 Requires-Dist: requests
 Requires-Dist: scipy
 Requires-Dist: viennarna
+Dynamic: author
+Dynamic: author-email
+Dynamic: classifier
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: home-page
+Dynamic: project-url
+Dynamic: requires-dist
+Dynamic: summary
 # RNApolis

@@ -11,6 +11,8 @@ import numpy
 import numpy.typing
 import orjson
 from ordered_set import OrderedSet
+from scipy.spatial import KDTree
 from rnapolis.common import (
     BR,
     BaseInteractions,
@@ -42,7 +44,6 @@ from rnapolis.tertiary import (
     torsion_angle,
 )
 from rnapolis.util import handle_input_file
-from scipy.spatial import KDTree
 HYDROGEN_BOND_MAX_DISTANCE = 4.0
 HYDROGEN_BOND_ANGLE_RANGE = (50.0, 130.0)  # 90 degrees is ideal, so allow +- 40 degrees

@@ -156,7 +156,9 @@ def select_category_by_id(
 def filter_by_poly_types(
-    file_content: str, entity_poly_types: Iterable[str] = ["polyribonucleotide"]
+    file_content: str,
+    entity_poly_types: Iterable[str] = ["polyribonucleotide"],
+    retain_categories: Iterable[str] = [],
 ) -> str:
     adapter = IoAdapterPy()
@@ -187,13 +189,20 @@ def filter_by_poly_types(
                 obj = DataCategory(category, attributes, rows)
                 output.append(obj)
+    for category in retain_categories:
+        obj = data[0].getObj(category)
+        if obj:
+            output.append(obj)
     with tempfile.NamedTemporaryFile("rt+") as tmp:
         adapter.writeFile(tmp.name, [output])
         tmp.seek(0)
         return tmp.read()
-def filter_by_chains(file_content: str, chains: Iterable[str]) -> str:
+def filter_by_chains(
+    file_content: str, chains: Iterable[str], retain_categories: Iterable[str] = []
+) -> str:
     """
     Filter a PDBx/mmCIF file by chain IDs. The function returns a new PDBx/mmCIF file.
@@ -228,6 +237,11 @@ def filter_by_chains(file_content: str, chains: Iterable[str]) -> str:
                 obj = DataCategory(category, attributes, rows)
                 output.append(obj)
+    for category in retain_categories:
+        obj = data[0].getObj(category)
+        if obj:
+            output.append(obj)
     with tempfile.NamedTemporaryFile("rt+") as tmp:
         adapter.writeFile(tmp.name, [output])
         tmp.seek(0)

@@ -218,7 +218,9 @@ def parse_cif(
                     )
                 if entity_id and pdbx_seq_one_letter_code_can:
-                    sequence_by_entity[entity_id] = pdbx_seq_one_letter_code_can
+                    sequence_by_entity[entity_id] = (
+                        pdbx_seq_one_letter_code_can.replace("\n", "")
+                    )
         if entity:
             for row in entity.getRowList():
@@ -234,7 +236,16 @@ def parse_cif(
                     if type_:
                         is_nucleic_acid_by_entity[entity_id] = (
-                            is_nucleic_acid_by_entity.get(entity_id, type_)
+                            is_nucleic_acid_by_entity.get(
+                                entity_id,
+                                type_
+                                in (
+                                    "peptide nucleic acid",
+                                    "polydeoxyribonucleotide",
+                                    "polydeoxyribonucleotide/polyribonucleotide hybrid",
+                                    "polyribonucleotide",
+                                ),
+                            )
                         )
     atoms = filter_clashing_atoms(atoms_to_process)

@@ -9,8 +9,9 @@ def test_filter_by_poly_types():
     with open("tests/1a9n.cif") as f:
         content = f.read()
-    filtered = filter_by_poly_types(content, ["polyribonucleotide"])
-    assert re.search(r"^_entity.id", filtered, re.MULTILINE) is not None
+    filtered = filter_by_poly_types(content, ["polyribonucleotide"], ["chem_comp"])
+    assert re.search(r"^_entity\.id", filtered, re.MULTILINE) is not None
+    assert re.search(r"^_chem_comp\.id", filtered, re.MULTILINE) is not None
     with tempfile.NamedTemporaryFile("rt+") as f:
         f.write(filtered)
@@ -25,8 +26,9 @@ def test_filter_by_chains():
     with open("tests/1a9n.cif") as f:
         content = f.read()
-    filtered = filter_by_chains(content, ["A", "C"])
-    assert re.search(r"^_entity.id", filtered, re.MULTILINE) is not None
+    filtered = filter_by_chains(content, ["A", "C"], ["chem_comp"])
+    assert re.search(r"^_entity\.id", filtered, re.MULTILINE) is not None
+    assert re.search(r"^_chem_comp\.id", filtered, re.MULTILINE) is not None
     with tempfile.NamedTemporaryFile("rt+") as f:
         f.write(filtered)

@@ -1,3 +1,5 @@
+import gzip
 from rnapolis.parser import read_3d_structure
@@ -31,3 +33,20 @@ def test_4qln_no_duplicate_atoms():
             assert len(atom_names) == len(
                 set(atom_names)
             ), f"Duplicate atoms found in residue {residue.auth}"
+def test_1gid():
+    expected_sequence = "GAAUUGCGGGAAAGGGGUCAACAGCCGUUCAGUACCAAGUCUCAGGGGAAACUUUGAGAUGGCCUUGCAAAGGGUAUGGUAAUAAGCUGACGGACAUGGUCCUAACCACGCAGCCAAGUCCUAAGUCAACAGAUCUUCUGUUGAUAUGGAUGCAGUUC"
+    with gzip.open("tests/1gid.cif.gz", "rt") as f:
+        structure3d = read_3d_structure(f, nucleic_acid_only=True)
+    residues_a = [r for r in structure3d.residues if r.auth.chain == "A"]
+    residues_b = [r for r in structure3d.residues if r.auth.chain == "B"]
+    assert len(residues_a) == len(expected_sequence)
+    assert len(residues_b) == len(expected_sequence)
+    actual_sequence_a = "".join([residue.one_letter_name for residue in residues_a])
+    actual_sequence_b = "".join([residue.one_letter_name for residue in residues_b])
+    assert actual_sequence_a == expected_sequence
+    assert actual_sequence_b == expected_sequence