PyPI - RNApolis - Versions diffs - 0.4.10__tar.gz → 0.4.12__tar.gz - Mend

RNApolis 0.4.10tar.gz → 0.4.12tar.gz

Files changed (34) hide show

{rnapolis-0.4.10/src/RNApolis.egg-info → rnapolis-0.4.12}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: RNApolis
-Version: 0.4.10
+Version: 0.4.12
 Summary: A Python library containing RNA-related bioinformatics functions and classes
 Home-page: https://github.com/tzok/rnapolis-py
 Author: Tomasz Zok

{rnapolis-0.4.10 → rnapolis-0.4.12}/setup.py RENAMED Viewed

@@ -5,7 +5,7 @@ with open("README.md") as f:
 setup(
     name="RNApolis",
-    version="0.4.10",
+    version="0.4.12",
     packages=["rnapolis"],
     package_dir={"": "src"},
     author="Tomasz Zok",

{rnapolis-0.4.10 → rnapolis-0.4.12/src/RNApolis.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: RNApolis
-Version: 0.4.10
+Version: 0.4.12
 Summary: A Python library containing RNA-related bioinformatics functions and classes
 Home-page: https://github.com/tzok/rnapolis-py
 Author: Tomasz Zok

{rnapolis-0.4.10 → rnapolis-0.4.12}/src/RNApolis.egg-info/SOURCES.txt RENAMED Viewed

@@ -23,7 +23,9 @@ tests/test_annotator.py
 tests/test_bugfixes.py
 tests/test_common.py
 tests/test_metareader.py
+tests/test_molecule_filter.py
 tests/test_parser.py
 tests/test_quadruplexes.py
 tests/test_rfam_folder.py
-tests/test_tertiary.py
+tests/test_tertiary.py
+tests/test_transformer.py

{rnapolis-0.4.10 → rnapolis-0.4.12}/src/rnapolis/molecule_filter.py RENAMED Viewed

@@ -1,10 +1,11 @@
 #! /usr/bin/env python
 import argparse
 import tempfile
-from typing import List, Set, Tuple
+from typing import Iterable, List, Set, Tuple
 from mmcif.io.IoAdapterPy import IoAdapterPy
 from mmcif.io.PdbxReader import DataCategory, DataContainer
 from rnapolis.util import handle_input_file
 # Source: https://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Items/_entity_poly.type.html
@@ -153,38 +154,63 @@ def select_category_by_id(
     return attributes, rows
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--type",
-        help="a type of molecule to select, you can provide this argument multiple times (default: polyribonucleotide)",
-        action="append",
-        default=["polyribonucleotide"],
-        choices=ENTITY_POLY_TYPES,
+def filter_by_poly_types(
+    file_content: str, entity_poly_types: Iterable[str] = ["polyribonucleotide"]
+) -> str:
+    adapter = IoAdapterPy()
+    with tempfile.NamedTemporaryFile("rt+") as f:
+        f.write(file_content)
+        f.seek(0)
+        data = adapter.readFile(f.name)
+    entity_ids = select_ids(
+        data, "entity_poly", "type", "entity_id", set(entity_poly_types)
     )
-    parser.add_argument(
-        "--chain",
-        help="a chain ID (label_asym_id) to select, you can provide this argument multiple times (if provided, it overrides the --type argument)",
-        action="append",
-        default=[],
+    asym_ids = select_ids(data, "struct_asym", "entity_id", "id", entity_ids)
+    auth_asym_ids = select_ids(
+        data, "atom_site", "label_asym_id", "auth_asym_id", asym_ids
     )
-    parser.add_argument("path", help="path to a PDBx/mmCIF file")
-    args = parser.parse_args()
-    file = handle_input_file(args.path)
-    adapter = IoAdapterPy()
-    data = adapter.readFile(file.name)
     output = DataContainer("rnapolis")
-    if args.chain:
-        entity_ids = select_ids(data, "struct_asym", "id", "entity_id", set(args.chain))
-        asym_ids = set(args.chain)
-    else:
-        entity_ids = select_ids(
-            data, "entity_poly", "type", "entity_id", set(args.type)
-        )
-        asym_ids = select_ids(data, "struct_asym", "entity_id", "id", entity_ids)
+    for table, ids in (
+        (CATEGORIES_WITH_ENTITY_ID, entity_ids),
+        (CATEGORIES_WITH_ASYM_ID, asym_ids),
+        (CATEGORIES_WITH_AUTH_ASYM_ID, auth_asym_ids),
+    ):
+        for category, field_name in table:
+            attributes, rows = select_category_by_id(data, category, field_name, ids)
+            if attributes and rows:
+                obj = DataCategory(category, attributes, rows)
+                output.append(obj)
+    with tempfile.NamedTemporaryFile("rt+") as tmp:
+        adapter.writeFile(tmp.name, [output])
+        tmp.seek(0)
+        return tmp.read()
+def filter_by_chains(file_content: str, chains: Iterable[str]) -> str:
+    """
+    Filter a PDBx/mmCIF file by chain IDs. The function returns a new PDBx/mmCIF file.
+    Warning! The new file might contain more chains than provided in the `chains` argument.
+    This is because the function filters by entity, so if you ask for chain "A",
+    which is part of entity 1 having chains "A", "B" and "C", then you will get all three chains.
+    """
+    adapter = IoAdapterPy()
+    with tempfile.NamedTemporaryFile("rt+") as f:
+        f.write(file_content)
+        f.seek(0)
+        data = adapter.readFile(f.name)
+    output = DataContainer("rnapolis")
+    entity_ids = select_ids(data, "struct_asym", "id", "entity_id", set(chains))
+    asym_ids = set(chains)
     auth_asym_ids = select_ids(
         data, "atom_site", "label_asym_id", "auth_asym_id", asym_ids
     )
@@ -201,9 +227,38 @@ def main():
                 obj = DataCategory(category, attributes, rows)
                 output.append(obj)
-    with tempfile.NamedTemporaryFile() as tmp:
+    with tempfile.NamedTemporaryFile("rt+") as tmp:
         adapter.writeFile(tmp.name, [output])
-        print(tmp.read().decode())
+        tmp.seek(0)
+        return tmp.read()
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--type",
+        help="a type of molecule to select, you can provide this argument multiple times (default: polyribonucleotide)",
+        action="append",
+        default=["polyribonucleotide"],
+        choices=ENTITY_POLY_TYPES,
+    )
+    parser.add_argument(
+        "--chain",
+        help="a chain ID (label_asym_id) to select, you can provide this argument multiple times (if provided, it overrides the --type argument)",
+        action="append",
+        default=[],
+    )
+    parser.add_argument("path", help="path to a PDBx/mmCIF file")
+    args = parser.parse_args()
+    file = handle_input_file(args.path)
+    if args.chain:
+        print(filter_by_chains(file.read(), args.chain))
+    elif args.type:
+        print(filter_by_poly_types(file.read(), args.type))
+    else:
+        parser.print_help()
 if __name__ == "__main__":

{rnapolis-0.4.10 → rnapolis-0.4.12}/src/rnapolis/parser.py RENAMED Viewed

@@ -65,6 +65,7 @@ def parse_cif(
         atom_site = data[0].getObj("atom_site")
         mod_residue = data[0].getObj("pdbx_struct_mod_residue")
         entity_poly = data[0].getObj("entity_poly")
+        entity = data[0].getObj("entity")
         if atom_site:
             for row in atom_site.getRowList():
@@ -219,6 +220,23 @@ def parse_cif(
                 if entity_id and pdbx_seq_one_letter_code_can:
                     sequence_by_entity[entity_id] = pdbx_seq_one_letter_code_can
+        if entity:
+            for row in entity.getRowList():
+                row_dict = dict(zip(entity.getAttributeList(), row))
+                entity_id = row_dict.get("id", None)
+                type_ = row_dict.get("type", None)
+                if entity_id:
+                    sequence_by_entity[entity_id] = sequence_by_entity.get(
+                        entity_id, ""
+                    )
+                    if type_:
+                        is_nucleic_acid_by_entity[entity_id] = (
+                            is_nucleic_acid_by_entity.get(entity_id, type_)
+                        )
     atoms = filter_clashing_atoms(atoms_to_process)
     return atoms, modified, sequence_by_entity, is_nucleic_acid_by_entity

rnapolis-0.4.12/src/rnapolis/transformer.py ADDED Viewed

@@ -0,0 +1,134 @@
+#! /usr/bin/env python
+import argparse
+import string
+import tempfile
+from typing import Dict, Tuple
+from mmcif.io.IoAdapterPy import IoAdapterPy
+from mmcif.io.PdbxReader import DataCategory
+def copy_from_to(
+    file_content: str,
+    category: str = "atom_site",
+    copy_from: str = "label_asym_id",
+    copy_to: str = "auth_asym_id",
+) -> str:
+    adapter = IoAdapterPy()
+    with tempfile.NamedTemporaryFile(mode="wt") as f:
+        f.write(file_content)
+        f.seek(0)
+        data = adapter.readFile(f.name)
+    if len(data) == 0 or category not in data[0].getObjNameList():
+        return file_content
+    category_obj = data[0].getObj(category)
+    attributes = category_obj.getAttributeList()
+    if copy_from not in attributes:
+        return file_content
+    transformed = []
+    if copy_to not in attributes:
+        attributes.append(copy_to)
+    for row in category_obj.getRowList():
+        i = attributes.index(copy_from)
+        j = attributes.index(copy_to)
+        if j >= len(row):
+            row.append(row[i])
+        else:
+            row[j] = row[i]
+        transformed.append(row)
+    data[0].replace(DataCategory(category_obj, attributes, transformed))
+    with tempfile.NamedTemporaryFile(mode="rt+") as f:
+        adapter.writeFile(f.name, data)
+        f.seek(0)
+        return f.read()
+def replace_value(
+    file_content: str,
+    category: str = "atom_site",
+    column: str = "auth_asym_id",
+    values: str = "".join([c for c in string.printable if c not in string.whitespace]),
+) -> Tuple[str, Dict]:
+    adapter = IoAdapterPy()
+    with tempfile.NamedTemporaryFile(mode="wt") as f:
+        f.write(file_content)
+        f.seek(0)
+        data = adapter.readFile(f.name)
+    if len(data) == 0 or category not in data[0].getObjNameList():
+        return file_content, {}
+    category_obj = data[0].getObj(category)
+    attributes = category_obj.getAttributeList()
+    if column not in attributes:
+        return file_content, {}
+    transformed = []
+    mapping = {}
+    for row in category_obj.getRowList():
+        i = attributes.index(column)
+        if row[i] not in mapping:
+            mapping[row[i]] = values[len(mapping)]
+        row[i] = mapping[row[i]]
+        transformed.append(row)
+    data[0].replace(DataCategory(category_obj, attributes, transformed))
+    with tempfile.NamedTemporaryFile(mode="rt+") as f:
+        adapter.writeFile(f.name, data)
+        f.seek(0)
+        return f.read(), mapping
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("input", help="path to input mmCIF file")
+    parser.add_argument("output", help="path to output mmCIF file")
+    parser.add_argument(
+        "--category", help="name of the category to work on, e.g., atom_site"
+    )
+    parser.add_argument(
+        "--copy-from",
+        help="name of a data item to copy from, e.g., label_asym_id (exclusive with --replace)",
+    )
+    parser.add_argument(
+        "--copy-to",
+        help="name of a data item to copy to, e.g., auth_asym_id (exclusive with --replace)",
+    )
+    parser.add_argument(
+        "--replace",
+        help="name of a data item to replace values, e.g., auth_asym_id (exclusive with --copy-from and --copy-to)",
+    )
+    parser.add_argument(
+        "--values",
+        help="values to replace with, e.g., ABCDEFGHIJKLMNOPQRSTUVWXYZ (exclusive with --copy-from and --copy-to)",
+    )
+    args = parser.parse_args()
+    if args.copy_from and args.copy_to:
+        output = copy_from_to(args.input, args.category, args.copy_from, args.copy_to)
+    elif args.replace and args.values:
+        output = replace_value(args.input, args.category, args.replace, args.values)
+    else:
+        parser.print_help()
+        return
+    with open(args.output, "w") as f:
+        f.write(output)
+if __name__ == "__main__":
+    main()

rnapolis-0.4.12/tests/test_molecule_filter.py ADDED Viewed

@@ -0,0 +1,34 @@
+import tempfile
+from rnapolis.molecule_filter import filter_by_chains, filter_by_poly_types
+from rnapolis.parser import parse_cif
+def test_filter_by_poly_types():
+    with open("tests/1a9n.cif") as f:
+        content = f.read()
+    filtered = filter_by_poly_types(content, ["polyribonucleotide"])
+    with tempfile.NamedTemporaryFile("rt+") as f:
+        f.write(filtered)
+        f.seek(0)
+        atoms, _, _, _ = parse_cif(f)
+    chains = set([atom.label.chain for atom in atoms if atom.label])
+    assert chains == {"A", "B"}
+def test_filter_by_chains():
+    with open("tests/1a9n.cif") as f:
+        content = f.read()
+    filtered = filter_by_chains(content, ["A", "C"])
+    with tempfile.NamedTemporaryFile("rt+") as f:
+        f.write(filtered)
+        f.seek(0)
+        atoms, _, _, _ = parse_cif(f)
+    chains = set([atom.label.chain for atom in atoms if atom.label])
+    assert chains >= {"A", "C"}

rnapolis-0.4.12/tests/test_transformer.py ADDED Viewed

@@ -0,0 +1,63 @@
+import tempfile
+from rnapolis.parser import parse_cif
+from rnapolis.transformer import copy_from_to, replace_value
+def test_replace_value():
+    with open("tests/4gqj-assembly1.cif") as f:
+        content = f.read()
+    with tempfile.NamedTemporaryFile(mode="wt") as f:
+        f.write(content)
+        f.seek(0)
+        org_atoms, _, _, _ = parse_cif(f)
+    org_label_asym_id = set([atom.label.chain for atom in org_atoms if atom.label])
+    org_auth_asym_id = set([atom.auth.chain for atom in org_atoms if atom.auth])
+    assert org_label_asym_id == set(["A", "B", "A-2", "B-2"])
+    assert org_auth_asym_id == set(["A", "B", "A-2", "B-2"])
+    replaced_content, mapping = replace_value(
+        content, "atom_site", "auth_asym_id", "ABCD"
+    )
+    assert mapping == {"A": "A", "B": "B", "A-2": "C", "B-2": "D"}
+    with tempfile.NamedTemporaryFile(mode="rt+") as f:
+        f.write(replaced_content)
+        f.seek(0)
+        rep_atoms, _, _, _ = parse_cif(f)
+    rep_label_asym_id = set([atom.label.chain for atom in rep_atoms if atom.label])
+    rep_auth_asym_id = set([atom.auth.chain for atom in rep_atoms if atom.auth])
+    assert rep_label_asym_id == set(["A", "B", "A-2", "B-2"])
+    assert rep_auth_asym_id == set(["A", "B", "C", "D"])
+def test_copy_from_to():
+    with open("tests/5it9.cif") as f:
+        content = f.read()
+    with tempfile.NamedTemporaryFile(mode="wt") as f:
+        f.write(content)
+        f.seek(0)
+        org_atoms, _, _, _ = parse_cif(f)
+    org_label_asym_id = set([atom.label.chain for atom in org_atoms if atom.label])
+    org_auth_asym_id = set([atom.auth.chain for atom in org_atoms if atom.auth])
+    assert org_label_asym_id == set(["HA", "IA"])
+    assert org_auth_asym_id == set(["2", "i"])
+    replaced_content = copy_from_to(
+        content, "atom_site", "label_asym_id", "auth_asym_id"
+    )
+    with tempfile.NamedTemporaryFile(mode="rt+") as f:
+        f.write(replaced_content)
+        f.seek(0)
+        rep_atoms, _, _, _ = parse_cif(f)
+    rep_label_asym_id = set([atom.label.chain for atom in rep_atoms if atom.label])
+    rep_auth_asym_id = set([atom.auth.chain for atom in rep_atoms if atom.auth])
+    assert rep_label_asym_id == set(["HA", "IA"])
+    assert rep_auth_asym_id == set(["HA", "IA"])

rnapolis-0.4.10/src/rnapolis/transformer.py DELETED Viewed

@@ -1,65 +0,0 @@
-#! /usr/bin/env python
-import argparse
-import sys
-from mmcif.io.IoAdapterPy import IoAdapterPy
-from mmcif.io.PdbxReader import DataCategory
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("input", help="path to input mmCIF file")
-    parser.add_argument("output", help="path to output mmCIF file")
-    parser.add_argument(
-        "--category", help="name of the category to work on, e.g., atom_site"
-    )
-    parser.add_argument(
-        "--copy-from", help="name of a data item to copy from, e.g., label_asym_id"
-    )
-    parser.add_argument(
-        "--copy-to", help="name of a data item to copy to, e.g., auth_asym_id"
-    )
-    args = parser.parse_args()
-    adapter = IoAdapterPy()
-    data = adapter.readFile(args.input)
-    if len(data) == 0:
-        print("Empty mmCIF file", file=sys.stderr)
-        sys.exit(1)
-    if args.category not in data[0].getObjNameList():
-        print(f"Failed to find {args.category} in the mmCIF file", file=sys.stderr)
-        sys.exit(1)
-    category = data[0].getObj(args.category)
-    attributes = category.getAttributeList()
-    if args.copy_from not in attributes:
-        print(
-            f"Failed to find data item {args.copy_from} in {args.category}",
-            file=sys.stderr,
-        )
-        sys.exit(1)
-    transformed = []
-    if args.copy_to not in attributes:
-        attributes.append(args.copy_to)
-    for row in category.getRowList():
-        i = attributes.index(args.copy_from)
-        j = attributes.index(args.copy_to)
-        if j >= len(row):
-            row.append(row[i])
-        else:
-            row[j] = row[i]
-        transformed.append(row)
-    data[0].replace(DataCategory(args.category, attributes, transformed))
-    adapter.writeFile(args.output, data)
-if __name__ == "__main__":
-    main()