PyPI - RNApolis - Versions diffs - 0.4.11__tar.gz → 0.4.13__tar.gz - Mend

RNApolis 0.4.11tar.gz → 0.4.13tar.gz

Files changed (33) hide show

{rnapolis-0.4.11/src/RNApolis.egg-info → rnapolis-0.4.13}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: RNApolis
-Version: 0.4.11
+Version: 0.4.13
 Summary: A Python library containing RNA-related bioinformatics functions and classes
 Home-page: https://github.com/tzok/rnapolis-py
 Author: Tomasz Zok

{rnapolis-0.4.11 → rnapolis-0.4.13}/setup.py RENAMED Viewed

@@ -5,7 +5,7 @@ with open("README.md") as f:
 setup(
     name="RNApolis",
-    version="0.4.11",
+    version="0.4.13",
     packages=["rnapolis"],
     package_dir={"": "src"},
     author="Tomasz Zok",

{rnapolis-0.4.11 → rnapolis-0.4.13/src/RNApolis.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: RNApolis
-Version: 0.4.11
+Version: 0.4.13
 Summary: A Python library containing RNA-related bioinformatics functions and classes
 Home-page: https://github.com/tzok/rnapolis-py
 Author: Tomasz Zok

{rnapolis-0.4.11 → rnapolis-0.4.13}/src/RNApolis.egg-info/SOURCES.txt RENAMED Viewed

@@ -23,6 +23,7 @@ tests/test_annotator.py
 tests/test_bugfixes.py
 tests/test_common.py
 tests/test_metareader.py
+tests/test_molecule_filter.py
 tests/test_parser.py
 tests/test_quadruplexes.py
 tests/test_rfam_folder.py

{rnapolis-0.4.11 → rnapolis-0.4.13}/src/rnapolis/molecule_filter.py RENAMED Viewed

@@ -1,10 +1,11 @@
 #! /usr/bin/env python
 import argparse
 import tempfile
-from typing import List, Set, Tuple
+from typing import Iterable, List, Set, Tuple
 from mmcif.io.IoAdapterPy import IoAdapterPy
 from mmcif.io.PdbxReader import DataCategory, DataContainer
 from rnapolis.util import handle_input_file
 # Source: https://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Items/_entity_poly.type.html
@@ -20,6 +21,7 @@ ENTITY_POLY_TYPES = [
 ]
 CATEGORIES_WITH_ENTITY_ID = [
+    ("entity", "id"),
     ("atom_site", "label_entity_id"),
     ("entity_keywords", "entity_id"),
     ("entity_name_com", "entity_id"),
@@ -153,38 +155,63 @@ def select_category_by_id(
     return attributes, rows
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--type",
-        help="a type of molecule to select, you can provide this argument multiple times (default: polyribonucleotide)",
-        action="append",
-        default=["polyribonucleotide"],
-        choices=ENTITY_POLY_TYPES,
+def filter_by_poly_types(
+    file_content: str, entity_poly_types: Iterable[str] = ["polyribonucleotide"]
+) -> str:
+    adapter = IoAdapterPy()
+    with tempfile.NamedTemporaryFile("rt+") as f:
+        f.write(file_content)
+        f.seek(0)
+        data = adapter.readFile(f.name)
+    entity_ids = select_ids(
+        data, "entity_poly", "type", "entity_id", set(entity_poly_types)
     )
-    parser.add_argument(
-        "--chain",
-        help="a chain ID (label_asym_id) to select, you can provide this argument multiple times (if provided, it overrides the --type argument)",
-        action="append",
-        default=[],
+    asym_ids = select_ids(data, "struct_asym", "entity_id", "id", entity_ids)
+    auth_asym_ids = select_ids(
+        data, "atom_site", "label_asym_id", "auth_asym_id", asym_ids
     )
-    parser.add_argument("path", help="path to a PDBx/mmCIF file")
-    args = parser.parse_args()
-    file = handle_input_file(args.path)
-    adapter = IoAdapterPy()
-    data = adapter.readFile(file.name)
     output = DataContainer("rnapolis")
-    if args.chain:
-        entity_ids = select_ids(data, "struct_asym", "id", "entity_id", set(args.chain))
-        asym_ids = set(args.chain)
-    else:
-        entity_ids = select_ids(
-            data, "entity_poly", "type", "entity_id", set(args.type)
-        )
-        asym_ids = select_ids(data, "struct_asym", "entity_id", "id", entity_ids)
+    for table, ids in (
+        (CATEGORIES_WITH_ENTITY_ID, entity_ids),
+        (CATEGORIES_WITH_ASYM_ID, asym_ids),
+        (CATEGORIES_WITH_AUTH_ASYM_ID, auth_asym_ids),
+    ):
+        for category, field_name in table:
+            attributes, rows = select_category_by_id(data, category, field_name, ids)
+            if attributes and rows:
+                obj = DataCategory(category, attributes, rows)
+                output.append(obj)
+    with tempfile.NamedTemporaryFile("rt+") as tmp:
+        adapter.writeFile(tmp.name, [output])
+        tmp.seek(0)
+        return tmp.read()
+def filter_by_chains(file_content: str, chains: Iterable[str]) -> str:
+    """
+    Filter a PDBx/mmCIF file by chain IDs. The function returns a new PDBx/mmCIF file.
+    Warning! The new file might contain more chains than provided in the `chains` argument.
+    This is because the function filters by entity, so if you ask for chain "A",
+    which is part of entity 1 having chains "A", "B" and "C", then you will get all three chains.
+    """
+    adapter = IoAdapterPy()
+    with tempfile.NamedTemporaryFile("rt+") as f:
+        f.write(file_content)
+        f.seek(0)
+        data = adapter.readFile(f.name)
+    output = DataContainer("rnapolis")
+    entity_ids = select_ids(data, "struct_asym", "id", "entity_id", set(chains))
+    asym_ids = set(chains)
     auth_asym_ids = select_ids(
         data, "atom_site", "label_asym_id", "auth_asym_id", asym_ids
     )
@@ -201,9 +228,38 @@ def main():
                 obj = DataCategory(category, attributes, rows)
                 output.append(obj)
-    with tempfile.NamedTemporaryFile() as tmp:
+    with tempfile.NamedTemporaryFile("rt+") as tmp:
         adapter.writeFile(tmp.name, [output])
-        print(tmp.read().decode())
+        tmp.seek(0)
+        return tmp.read()
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--type",
+        help="a type of molecule to select, you can provide this argument multiple times (default: polyribonucleotide)",
+        action="append",
+        default=["polyribonucleotide"],
+        choices=ENTITY_POLY_TYPES,
+    )
+    parser.add_argument(
+        "--chain",
+        help="a chain ID (label_asym_id) to select, you can provide this argument multiple times (if provided, it overrides the --type argument)",
+        action="append",
+        default=[],
+    )
+    parser.add_argument("path", help="path to a PDBx/mmCIF file")
+    args = parser.parse_args()
+    file = handle_input_file(args.path)
+    if args.chain:
+        print(filter_by_chains(file.read(), args.chain))
+    elif args.type:
+        print(filter_by_poly_types(file.read(), args.type))
+    else:
+        parser.print_help()
 if __name__ == "__main__":

rnapolis-0.4.13/tests/test_molecule_filter.py ADDED Viewed

@@ -0,0 +1,37 @@
+import re
+import tempfile
+from rnapolis.molecule_filter import filter_by_chains, filter_by_poly_types
+from rnapolis.parser import parse_cif
+def test_filter_by_poly_types():
+    with open("tests/1a9n.cif") as f:
+        content = f.read()
+    filtered = filter_by_poly_types(content, ["polyribonucleotide"])
+    assert re.search(r"^_entity.id", filtered, re.MULTILINE) is not None
+    with tempfile.NamedTemporaryFile("rt+") as f:
+        f.write(filtered)
+        f.seek(0)
+        atoms, _, _, _ = parse_cif(f)
+    chains = set([atom.label.chain for atom in atoms if atom.label])
+    assert chains == {"A", "B"}
+def test_filter_by_chains():
+    with open("tests/1a9n.cif") as f:
+        content = f.read()
+    filtered = filter_by_chains(content, ["A", "C"])
+    assert re.search(r"^_entity.id", filtered, re.MULTILINE) is not None
+    with tempfile.NamedTemporaryFile("rt+") as f:
+        f.write(filtered)
+        f.seek(0)
+        atoms, _, _, _ = parse_cif(f)
+    chains = set([atom.label.chain for atom in atoms if atom.label])
+    assert chains >= {"A", "C"}