PyPI - RNApolis - Versions diffs - 0.4.9__tar.gz → 0.4.11__tar.gz - Mend

RNApolis 0.4.9tar.gz → 0.4.11tar.gz

Files changed (33) hide show

{rnapolis-0.4.9/src/RNApolis.egg-info → rnapolis-0.4.11}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: RNApolis
-Version: 0.4.9
+Version: 0.4.11
 Summary: A Python library containing RNA-related bioinformatics functions and classes
 Home-page: https://github.com/tzok/rnapolis-py
 Author: Tomasz Zok

{rnapolis-0.4.9 → rnapolis-0.4.11}/setup.py RENAMED Viewed

@@ -5,7 +5,7 @@ with open("README.md") as f:
 setup(
     name="RNApolis",
-    version="0.4.9",
+    version="0.4.11",
     packages=["rnapolis"],
     package_dir={"": "src"},
     author="Tomasz Zok",

{rnapolis-0.4.9 → rnapolis-0.4.11/src/RNApolis.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: RNApolis
-Version: 0.4.9
+Version: 0.4.11
 Summary: A Python library containing RNA-related bioinformatics functions and classes
 Home-page: https://github.com/tzok/rnapolis-py
 Author: Tomasz Zok

{rnapolis-0.4.9 → rnapolis-0.4.11}/src/RNApolis.egg-info/SOURCES.txt RENAMED Viewed

@@ -26,4 +26,5 @@ tests/test_metareader.py
 tests/test_parser.py
 tests/test_quadruplexes.py
 tests/test_rfam_folder.py
-tests/test_tertiary.py
+tests/test_tertiary.py
+tests/test_transformer.py

{rnapolis-0.4.9 → rnapolis-0.4.11}/src/rnapolis/common.py RENAMED Viewed

@@ -940,6 +940,27 @@ class BpSeq:
             solutions.add(self.__make_dot_bracket(regions, orders))
         return list(solutions)
+    def without_pseudoknots(self):
+        return BpSeq.from_dotbracket(self.dot_bracket.without_pseudoknots())
+    def without_isolated(self):
+        stems, _, _, _ = self.elements
+        to_unpair = []
+        for stem in stems:
+            if stem.strand5p.first == stem.strand5p.last:
+                to_unpair.append(stem.strand5p.first - 1)
+                to_unpair.append(stem.strand3p.first - 1)
+        if not to_unpair:
+            return self
+        entries = self.entries.copy()
+        for i in to_unpair:
+            entries[i].pair = 0
+        return BpSeq(entries)
 @dataclass
 class DotBracket:
@@ -990,6 +1011,10 @@ class DotBracket:
     def __hash__(self) -> int:
         return hash((self.sequence, self.structure))
+    def without_pseudoknots(self):
+        structure = re.sub(r"[\[\]\{\}\<\>A-Za-z]", ".", self.structure)
+        return DotBracket(self.sequence, structure)
 @dataclass
 class MultiStrandDotBracket(DotBracket):

{rnapolis-0.4.9 → rnapolis-0.4.11}/src/rnapolis/motif_extractor.py RENAMED Viewed

@@ -9,6 +9,12 @@ def main():
     parser = argparse.ArgumentParser()
     parser.add_argument("--dbn", help="path to DotBracket file")
     parser.add_argument("--bpseq", help="path to BpSeq file")
+    parser.add_argument(
+        "--remove-pseudoknots", action="store_true", help="remove pseudoknots"
+    )
+    parser.add_argument(
+        "--remove-isolated", action="store_true", help="remove isolated base pairs"
+    )
     args = parser.parse_args()
     if args.dbn:
@@ -19,6 +25,12 @@ def main():
         parser.print_help()
         return
+    if args.remove_isolated:
+        bpseq = bpseq.without_isolated()
+    if args.remove_pseudoknots:
+        bpseq = bpseq.without_pseudoknots()
     print(f"Full dot-bracket:\n{bpseq.dot_bracket}")
     stems, single_strands, hairpins, loops = bpseq.elements

{rnapolis-0.4.9 → rnapolis-0.4.11}/src/rnapolis/parser.py RENAMED Viewed

@@ -65,6 +65,7 @@ def parse_cif(
         atom_site = data[0].getObj("atom_site")
         mod_residue = data[0].getObj("pdbx_struct_mod_residue")
         entity_poly = data[0].getObj("entity_poly")
+        entity = data[0].getObj("entity")
         if atom_site:
             for row in atom_site.getRowList():
@@ -219,6 +220,23 @@ def parse_cif(
                 if entity_id and pdbx_seq_one_letter_code_can:
                     sequence_by_entity[entity_id] = pdbx_seq_one_letter_code_can
+        if entity:
+            for row in entity.getRowList():
+                row_dict = dict(zip(entity.getAttributeList(), row))
+                entity_id = row_dict.get("id", None)
+                type_ = row_dict.get("type", None)
+                if entity_id:
+                    sequence_by_entity[entity_id] = sequence_by_entity.get(
+                        entity_id, ""
+                    )
+                    if type_:
+                        is_nucleic_acid_by_entity[entity_id] = (
+                            is_nucleic_acid_by_entity.get(entity_id, type_)
+                        )
     atoms = filter_clashing_atoms(atoms_to_process)
     return atoms, modified, sequence_by_entity, is_nucleic_acid_by_entity

rnapolis-0.4.11/src/rnapolis/transformer.py ADDED Viewed

@@ -0,0 +1,134 @@
+#! /usr/bin/env python
+import argparse
+import string
+import tempfile
+from typing import Dict, Tuple
+from mmcif.io.IoAdapterPy import IoAdapterPy
+from mmcif.io.PdbxReader import DataCategory
+def copy_from_to(
+    file_content: str,
+    category: str = "atom_site",
+    copy_from: str = "label_asym_id",
+    copy_to: str = "auth_asym_id",
+) -> str:
+    adapter = IoAdapterPy()
+    with tempfile.NamedTemporaryFile(mode="wt") as f:
+        f.write(file_content)
+        f.seek(0)
+        data = adapter.readFile(f.name)
+    if len(data) == 0 or category not in data[0].getObjNameList():
+        return file_content
+    category_obj = data[0].getObj(category)
+    attributes = category_obj.getAttributeList()
+    if copy_from not in attributes:
+        return file_content
+    transformed = []
+    if copy_to not in attributes:
+        attributes.append(copy_to)
+    for row in category_obj.getRowList():
+        i = attributes.index(copy_from)
+        j = attributes.index(copy_to)
+        if j >= len(row):
+            row.append(row[i])
+        else:
+            row[j] = row[i]
+        transformed.append(row)
+    data[0].replace(DataCategory(category_obj, attributes, transformed))
+    with tempfile.NamedTemporaryFile(mode="rt+") as f:
+        adapter.writeFile(f.name, data)
+        f.seek(0)
+        return f.read()
+def replace_value(
+    file_content: str,
+    category: str = "atom_site",
+    column: str = "auth_asym_id",
+    values: str = "".join([c for c in string.printable if c not in string.whitespace]),
+) -> Tuple[str, Dict]:
+    adapter = IoAdapterPy()
+    with tempfile.NamedTemporaryFile(mode="wt") as f:
+        f.write(file_content)
+        f.seek(0)
+        data = adapter.readFile(f.name)
+    if len(data) == 0 or category not in data[0].getObjNameList():
+        return file_content, {}
+    category_obj = data[0].getObj(category)
+    attributes = category_obj.getAttributeList()
+    if column not in attributes:
+        return file_content, {}
+    transformed = []
+    mapping = {}
+    for row in category_obj.getRowList():
+        i = attributes.index(column)
+        if row[i] not in mapping:
+            mapping[row[i]] = values[len(mapping)]
+        row[i] = mapping[row[i]]
+        transformed.append(row)
+    data[0].replace(DataCategory(category_obj, attributes, transformed))
+    with tempfile.NamedTemporaryFile(mode="rt+") as f:
+        adapter.writeFile(f.name, data)
+        f.seek(0)
+        return f.read(), mapping
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("input", help="path to input mmCIF file")
+    parser.add_argument("output", help="path to output mmCIF file")
+    parser.add_argument(
+        "--category", help="name of the category to work on, e.g., atom_site"
+    )
+    parser.add_argument(
+        "--copy-from",
+        help="name of a data item to copy from, e.g., label_asym_id (exclusive with --replace)",
+    )
+    parser.add_argument(
+        "--copy-to",
+        help="name of a data item to copy to, e.g., auth_asym_id (exclusive with --replace)",
+    )
+    parser.add_argument(
+        "--replace",
+        help="name of a data item to replace values, e.g., auth_asym_id (exclusive with --copy-from and --copy-to)",
+    )
+    parser.add_argument(
+        "--values",
+        help="values to replace with, e.g., ABCDEFGHIJKLMNOPQRSTUVWXYZ (exclusive with --copy-from and --copy-to)",
+    )
+    args = parser.parse_args()
+    if args.copy_from and args.copy_to:
+        output = copy_from_to(args.input, args.category, args.copy_from, args.copy_to)
+    elif args.replace and args.values:
+        output = replace_value(args.input, args.category, args.replace, args.values)
+    else:
+        parser.print_help()
+        return
+    with open(args.output, "w") as f:
+        f.write(output)
+if __name__ == "__main__":
+    main()

{rnapolis-0.4.9 → rnapolis-0.4.11}/tests/test_common.py RENAMED Viewed

@@ -199,3 +199,37 @@ def test_high_level_pseudoknot():
         dot_bracket.structure
         == "([{<" + string.ascii_uppercase + ")]}>" + string.ascii_lowercase
     )
+def test_bpseq_removal_options():
+    sequence = (
+        "GCGGAUUUAGCUCAGUUGGGAGAGCGCCAGACUGAAGAUCUGGAGGUCCUGUGUUCCAUCCACAGAAUUCGCACCA"
+    )
+    structure = (
+        "(((((((..((((....[[..)))).((((..(...)..)))).....(((((..]]...))))))))))))...."
+    )
+    bpseq = BpSeq.from_dotbracket(DotBracket(sequence, structure))
+    assert bpseq.dot_bracket.sequence == sequence
+    assert bpseq.dot_bracket.structure == structure
+    bpseq_without_isolated = bpseq.without_isolated()
+    assert bpseq_without_isolated.dot_bracket.sequence == sequence
+    assert (
+        bpseq_without_isolated.dot_bracket.structure
+        == "(((((((..((((....[[..)))).((((.........)))).....(((((..]]...))))))))))))...."
+    )
+    bpseq_without_pseudoknots = bpseq.without_pseudoknots()
+    assert bpseq_without_pseudoknots.dot_bracket.sequence == sequence
+    assert (
+        bpseq_without_pseudoknots.dot_bracket.structure
+        == "(((((((..((((........)))).((((..(...)..)))).....(((((.......))))))))))))...."
+    )
+    bpseq_without_both = bpseq.without_isolated().without_pseudoknots()
+    assert bpseq_without_both.dot_bracket.sequence == sequence
+    assert (
+        bpseq_without_both.dot_bracket.structure
+        == "(((((((..((((........)))).((((.........)))).....(((((.......))))))))))))...."
+    )

rnapolis-0.4.11/tests/test_transformer.py ADDED Viewed

@@ -0,0 +1,63 @@
+import tempfile
+from rnapolis.parser import parse_cif
+from rnapolis.transformer import copy_from_to, replace_value
+def test_replace_value():
+    with open("tests/4gqj-assembly1.cif") as f:
+        content = f.read()
+    with tempfile.NamedTemporaryFile(mode="wt") as f:
+        f.write(content)
+        f.seek(0)
+        org_atoms, _, _, _ = parse_cif(f)
+    org_label_asym_id = set([atom.label.chain for atom in org_atoms if atom.label])
+    org_auth_asym_id = set([atom.auth.chain for atom in org_atoms if atom.auth])
+    assert org_label_asym_id == set(["A", "B", "A-2", "B-2"])
+    assert org_auth_asym_id == set(["A", "B", "A-2", "B-2"])
+    replaced_content, mapping = replace_value(
+        content, "atom_site", "auth_asym_id", "ABCD"
+    )
+    assert mapping == {"A": "A", "B": "B", "A-2": "C", "B-2": "D"}
+    with tempfile.NamedTemporaryFile(mode="rt+") as f:
+        f.write(replaced_content)
+        f.seek(0)
+        rep_atoms, _, _, _ = parse_cif(f)
+    rep_label_asym_id = set([atom.label.chain for atom in rep_atoms if atom.label])
+    rep_auth_asym_id = set([atom.auth.chain for atom in rep_atoms if atom.auth])
+    assert rep_label_asym_id == set(["A", "B", "A-2", "B-2"])
+    assert rep_auth_asym_id == set(["A", "B", "C", "D"])
+def test_copy_from_to():
+    with open("tests/5it9.cif") as f:
+        content = f.read()
+    with tempfile.NamedTemporaryFile(mode="wt") as f:
+        f.write(content)
+        f.seek(0)
+        org_atoms, _, _, _ = parse_cif(f)
+    org_label_asym_id = set([atom.label.chain for atom in org_atoms if atom.label])
+    org_auth_asym_id = set([atom.auth.chain for atom in org_atoms if atom.auth])
+    assert org_label_asym_id == set(["HA", "IA"])
+    assert org_auth_asym_id == set(["2", "i"])
+    replaced_content = copy_from_to(
+        content, "atom_site", "label_asym_id", "auth_asym_id"
+    )
+    with tempfile.NamedTemporaryFile(mode="rt+") as f:
+        f.write(replaced_content)
+        f.seek(0)
+        rep_atoms, _, _, _ = parse_cif(f)
+    rep_label_asym_id = set([atom.label.chain for atom in rep_atoms if atom.label])
+    rep_auth_asym_id = set([atom.auth.chain for atom in rep_atoms if atom.auth])
+    assert rep_label_asym_id == set(["HA", "IA"])
+    assert rep_auth_asym_id == set(["HA", "IA"])

rnapolis-0.4.9/src/rnapolis/transformer.py DELETED Viewed

@@ -1,65 +0,0 @@
-#! /usr/bin/env python
-import argparse
-import sys
-from mmcif.io.IoAdapterPy import IoAdapterPy
-from mmcif.io.PdbxReader import DataCategory
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("input", help="path to input mmCIF file")
-    parser.add_argument("output", help="path to output mmCIF file")
-    parser.add_argument(
-        "--category", help="name of the category to work on, e.g., atom_site"
-    )
-    parser.add_argument(
-        "--copy-from", help="name of a data item to copy from, e.g., label_asym_id"
-    )
-    parser.add_argument(
-        "--copy-to", help="name of a data item to copy to, e.g., auth_asym_id"
-    )
-    args = parser.parse_args()
-    adapter = IoAdapterPy()
-    data = adapter.readFile(args.input)
-    if len(data) == 0:
-        print("Empty mmCIF file", file=sys.stderr)
-        sys.exit(1)
-    if args.category not in data[0].getObjNameList():
-        print(f"Failed to find {args.category} in the mmCIF file", file=sys.stderr)
-        sys.exit(1)
-    category = data[0].getObj(args.category)
-    attributes = category.getAttributeList()
-    if args.copy_from not in attributes:
-        print(
-            f"Failed to find data item {args.copy_from} in {args.category}",
-            file=sys.stderr,
-        )
-        sys.exit(1)
-    transformed = []
-    if args.copy_to not in attributes:
-        attributes.append(args.copy_to)
-    for row in category.getRowList():
-        i = attributes.index(args.copy_from)
-        j = attributes.index(args.copy_to)
-        if j >= len(row):
-            row.append(row[i])
-        else:
-            row[j] = row[i]
-        transformed.append(row)
-    data[0].replace(DataCategory(args.category, attributes, transformed))
-    adapter.writeFile(args.output, data)
-if __name__ == "__main__":
-    main()