PyPI - RNApolis - Versions diffs - 0.4.10__py3-none-any.whl → 0.4.12__py3-none-any.whl - Mend

RNApolis 0.4.10py3-none-any.whl → 0.4.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of RNApolis might be problematic. Click here for more details.

Files changed (9) hide show

{RNApolis-0.4.10.dist-info → RNApolis-0.4.12.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: RNApolis
-Version: 0.4.10
+Version: 0.4.12
 Summary: A Python library containing RNA-related bioinformatics functions and classes
 Home-page: https://github.com/tzok/rnapolis-py
 Author: Tomasz Zok

{RNApolis-0.4.10.dist-info → RNApolis-0.4.12.dist-info}/RECORD RENAMED Viewed

@@ -2,16 +2,16 @@ rnapolis/annotator.py,sha256=_hsSX2VHFvIQ47l_EA7lwGFXLiVLbhFPEsOQzBKbjRk,22100
 rnapolis/clashfinder.py,sha256=i95kp0o6OWNqmJDBr-PbsZd7RY2iJtBDr7QqolJSuAQ,8513
 rnapolis/common.py,sha256=LY6Uz96Br8ki_gA8LpfatgtvVbt9jOTkwgagayqTgf8,31251
 rnapolis/metareader.py,sha256=I1-cXc2YNBPwa3zihAnMTjEsAo79tEKzSmWu5yvN1Pk,2071
-rnapolis/molecule_filter.py,sha256=hB6-nXgjmw7FAsQ3bj0cZ2FvuW2I1PXunEfcdwEUB1o,7389
+rnapolis/molecule_filter.py,sha256=F_xkAe7q2NZAaDpRaeikv-twUvbNflWdlLte7oFn2Ms,9130
 rnapolis/motif_extractor.py,sha256=Lfn1iEkhkP9eZD3GPEWNAfy00QO7QPCc8wM_XS1ory8,1147
-rnapolis/parser.py,sha256=2pQYy0sh8TCpeluMmmSJ7C5dudK_bsfstTWCdpwwpNU,15193
+rnapolis/parser.py,sha256=lHI6LyFbEEPdHOzbged1-Ov0tl6MpSungIPacip0Py0,15838
 rnapolis/rfam_folder.py,sha256=SjiiyML_T1__saruFwSMJEoQ7Y55GIU8ktS8ZUn5-fw,11111
 rnapolis/tertiary.py,sha256=6t9ZB4w33-5n_M3sns1RoFXCOTgVAgGH4WDNG5OG9Kg,23426
-rnapolis/transformer.py,sha256=V9nOQvdq4-p7yUWo0vQg0CDQMpmyxz9t4TMSRVEKHnw,1817
+rnapolis/transformer.py,sha256=aC0nBmHHJf5TyLvBIV57Jj3tlwpvHbPo347opfAOlQA,3844
 rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
-RNApolis-0.4.10.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
-RNApolis-0.4.10.dist-info/METADATA,sha256=AiTwfWTRaaJ_Zd_E1UIYMu54Hi0vu9WN8dYA67x3SLk,54323
-RNApolis-0.4.10.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
-RNApolis-0.4.10.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
-RNApolis-0.4.10.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
-RNApolis-0.4.10.dist-info/RECORD,,
+RNApolis-0.4.12.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
+RNApolis-0.4.12.dist-info/METADATA,sha256=fPrlpVrM83UYwPYwaeymBP7SeVLMv7XftCZOl7hUnes,54323
+RNApolis-0.4.12.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
+RNApolis-0.4.12.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
+RNApolis-0.4.12.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
+RNApolis-0.4.12.dist-info/RECORD,,

rnapolis/molecule_filter.py CHANGED Viewed

@@ -1,10 +1,11 @@
 #! /usr/bin/env python
 import argparse
 import tempfile
-from typing import List, Set, Tuple
+from typing import Iterable, List, Set, Tuple
 from mmcif.io.IoAdapterPy import IoAdapterPy
 from mmcif.io.PdbxReader import DataCategory, DataContainer
 from rnapolis.util import handle_input_file
 # Source: https://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Items/_entity_poly.type.html
@@ -153,38 +154,63 @@ def select_category_by_id(
     return attributes, rows
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--type",
-        help="a type of molecule to select, you can provide this argument multiple times (default: polyribonucleotide)",
-        action="append",
-        default=["polyribonucleotide"],
-        choices=ENTITY_POLY_TYPES,
+def filter_by_poly_types(
+    file_content: str, entity_poly_types: Iterable[str] = ["polyribonucleotide"]
+) -> str:
+    adapter = IoAdapterPy()
+    with tempfile.NamedTemporaryFile("rt+") as f:
+        f.write(file_content)
+        f.seek(0)
+        data = adapter.readFile(f.name)
+    entity_ids = select_ids(
+        data, "entity_poly", "type", "entity_id", set(entity_poly_types)
     )
-    parser.add_argument(
-        "--chain",
-        help="a chain ID (label_asym_id) to select, you can provide this argument multiple times (if provided, it overrides the --type argument)",
-        action="append",
-        default=[],
+    asym_ids = select_ids(data, "struct_asym", "entity_id", "id", entity_ids)
+    auth_asym_ids = select_ids(
+        data, "atom_site", "label_asym_id", "auth_asym_id", asym_ids
     )
-    parser.add_argument("path", help="path to a PDBx/mmCIF file")
-    args = parser.parse_args()
-    file = handle_input_file(args.path)
-    adapter = IoAdapterPy()
-    data = adapter.readFile(file.name)
     output = DataContainer("rnapolis")
-    if args.chain:
-        entity_ids = select_ids(data, "struct_asym", "id", "entity_id", set(args.chain))
-        asym_ids = set(args.chain)
-    else:
-        entity_ids = select_ids(
-            data, "entity_poly", "type", "entity_id", set(args.type)
-        )
-        asym_ids = select_ids(data, "struct_asym", "entity_id", "id", entity_ids)
+    for table, ids in (
+        (CATEGORIES_WITH_ENTITY_ID, entity_ids),
+        (CATEGORIES_WITH_ASYM_ID, asym_ids),
+        (CATEGORIES_WITH_AUTH_ASYM_ID, auth_asym_ids),
+    ):
+        for category, field_name in table:
+            attributes, rows = select_category_by_id(data, category, field_name, ids)
+            if attributes and rows:
+                obj = DataCategory(category, attributes, rows)
+                output.append(obj)
+    with tempfile.NamedTemporaryFile("rt+") as tmp:
+        adapter.writeFile(tmp.name, [output])
+        tmp.seek(0)
+        return tmp.read()
+def filter_by_chains(file_content: str, chains: Iterable[str]) -> str:
+    """
+    Filter a PDBx/mmCIF file by chain IDs. The function returns a new PDBx/mmCIF file.
+    Warning! The new file might contain more chains than provided in the `chains` argument.
+    This is because the function filters by entity, so if you ask for chain "A",
+    which is part of entity 1 having chains "A", "B" and "C", then you will get all three chains.
+    """
+    adapter = IoAdapterPy()
+    with tempfile.NamedTemporaryFile("rt+") as f:
+        f.write(file_content)
+        f.seek(0)
+        data = adapter.readFile(f.name)
+    output = DataContainer("rnapolis")
+    entity_ids = select_ids(data, "struct_asym", "id", "entity_id", set(chains))
+    asym_ids = set(chains)
     auth_asym_ids = select_ids(
         data, "atom_site", "label_asym_id", "auth_asym_id", asym_ids
     )
@@ -201,9 +227,38 @@ def main():
                 obj = DataCategory(category, attributes, rows)
                 output.append(obj)
-    with tempfile.NamedTemporaryFile() as tmp:
+    with tempfile.NamedTemporaryFile("rt+") as tmp:
         adapter.writeFile(tmp.name, [output])
-        print(tmp.read().decode())
+        tmp.seek(0)
+        return tmp.read()
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--type",
+        help="a type of molecule to select, you can provide this argument multiple times (default: polyribonucleotide)",
+        action="append",
+        default=["polyribonucleotide"],
+        choices=ENTITY_POLY_TYPES,
+    )
+    parser.add_argument(
+        "--chain",
+        help="a chain ID (label_asym_id) to select, you can provide this argument multiple times (if provided, it overrides the --type argument)",
+        action="append",
+        default=[],
+    )
+    parser.add_argument("path", help="path to a PDBx/mmCIF file")
+    args = parser.parse_args()
+    file = handle_input_file(args.path)
+    if args.chain:
+        print(filter_by_chains(file.read(), args.chain))
+    elif args.type:
+        print(filter_by_poly_types(file.read(), args.type))
+    else:
+        parser.print_help()
 if __name__ == "__main__":

rnapolis/parser.py CHANGED Viewed

@@ -65,6 +65,7 @@ def parse_cif(
         atom_site = data[0].getObj("atom_site")
         mod_residue = data[0].getObj("pdbx_struct_mod_residue")
         entity_poly = data[0].getObj("entity_poly")
+        entity = data[0].getObj("entity")
         if atom_site:
             for row in atom_site.getRowList():
@@ -219,6 +220,23 @@ def parse_cif(
                 if entity_id and pdbx_seq_one_letter_code_can:
                     sequence_by_entity[entity_id] = pdbx_seq_one_letter_code_can
+        if entity:
+            for row in entity.getRowList():
+                row_dict = dict(zip(entity.getAttributeList(), row))
+                entity_id = row_dict.get("id", None)
+                type_ = row_dict.get("type", None)
+                if entity_id:
+                    sequence_by_entity[entity_id] = sequence_by_entity.get(
+                        entity_id, ""
+                    )
+                    if type_:
+                        is_nucleic_acid_by_entity[entity_id] = (
+                            is_nucleic_acid_by_entity.get(entity_id, type_)
+                        )
     atoms = filter_clashing_atoms(atoms_to_process)
     return atoms, modified, sequence_by_entity, is_nucleic_acid_by_entity

rnapolis/transformer.py CHANGED Viewed

@@ -1,64 +1,133 @@
 #! /usr/bin/env python
 import argparse
-import sys
+import string
+import tempfile
+from typing import Dict, Tuple
 from mmcif.io.IoAdapterPy import IoAdapterPy
 from mmcif.io.PdbxReader import DataCategory
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("input", help="path to input mmCIF file")
-    parser.add_argument("output", help="path to output mmCIF file")
-    parser.add_argument(
-        "--category", help="name of the category to work on, e.g., atom_site"
-    )
-    parser.add_argument(
-        "--copy-from", help="name of a data item to copy from, e.g., label_asym_id"
-    )
-    parser.add_argument(
-        "--copy-to", help="name of a data item to copy to, e.g., auth_asym_id"
-    )
-    args = parser.parse_args()
+def copy_from_to(
+    file_content: str,
+    category: str = "atom_site",
+    copy_from: str = "label_asym_id",
+    copy_to: str = "auth_asym_id",
+) -> str:
     adapter = IoAdapterPy()
-    data = adapter.readFile(args.input)
-    if len(data) == 0:
-        print("Empty mmCIF file", file=sys.stderr)
-        sys.exit(1)
+    with tempfile.NamedTemporaryFile(mode="wt") as f:
+        f.write(file_content)
+        f.seek(0)
+        data = adapter.readFile(f.name)
-    if args.category not in data[0].getObjNameList():
-        print(f"Failed to find {args.category} in the mmCIF file", file=sys.stderr)
-        sys.exit(1)
+    if len(data) == 0 or category not in data[0].getObjNameList():
+        return file_content
-    category = data[0].getObj(args.category)
-    attributes = category.getAttributeList()
+    category_obj = data[0].getObj(category)
+    attributes = category_obj.getAttributeList()
-    if args.copy_from not in attributes:
-        print(
-            f"Failed to find data item {args.copy_from} in {args.category}",
-            file=sys.stderr,
-        )
-        sys.exit(1)
+    if copy_from not in attributes:
+        return file_content
     transformed = []
-    if args.copy_to not in attributes:
-        attributes.append(args.copy_to)
+    if copy_to not in attributes:
+        attributes.append(copy_to)
-    for row in category.getRowList():
-        i = attributes.index(args.copy_from)
-        j = attributes.index(args.copy_to)
+    for row in category_obj.getRowList():
+        i = attributes.index(copy_from)
+        j = attributes.index(copy_to)
         if j >= len(row):
             row.append(row[i])
         else:
             row[j] = row[i]
         transformed.append(row)
-    data[0].replace(DataCategory(args.category, attributes, transformed))
+    data[0].replace(DataCategory(category_obj, attributes, transformed))
+    with tempfile.NamedTemporaryFile(mode="rt+") as f:
+        adapter.writeFile(f.name, data)
+        f.seek(0)
+        return f.read()
+def replace_value(
+    file_content: str,
+    category: str = "atom_site",
+    column: str = "auth_asym_id",
+    values: str = "".join([c for c in string.printable if c not in string.whitespace]),
+) -> Tuple[str, Dict]:
+    adapter = IoAdapterPy()
+    with tempfile.NamedTemporaryFile(mode="wt") as f:
+        f.write(file_content)
+        f.seek(0)
+        data = adapter.readFile(f.name)
+    if len(data) == 0 or category not in data[0].getObjNameList():
+        return file_content, {}
+    category_obj = data[0].getObj(category)
+    attributes = category_obj.getAttributeList()
+    if column not in attributes:
+        return file_content, {}
+    transformed = []
+    mapping = {}
+    for row in category_obj.getRowList():
+        i = attributes.index(column)
+        if row[i] not in mapping:
+            mapping[row[i]] = values[len(mapping)]
+        row[i] = mapping[row[i]]
+        transformed.append(row)
+    data[0].replace(DataCategory(category_obj, attributes, transformed))
+    with tempfile.NamedTemporaryFile(mode="rt+") as f:
+        adapter.writeFile(f.name, data)
+        f.seek(0)
+        return f.read(), mapping
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("input", help="path to input mmCIF file")
+    parser.add_argument("output", help="path to output mmCIF file")
+    parser.add_argument(
+        "--category", help="name of the category to work on, e.g., atom_site"
+    )
+    parser.add_argument(
+        "--copy-from",
+        help="name of a data item to copy from, e.g., label_asym_id (exclusive with --replace)",
+    )
+    parser.add_argument(
+        "--copy-to",
+        help="name of a data item to copy to, e.g., auth_asym_id (exclusive with --replace)",
+    )
+    parser.add_argument(
+        "--replace",
+        help="name of a data item to replace values, e.g., auth_asym_id (exclusive with --copy-from and --copy-to)",
+    )
+    parser.add_argument(
+        "--values",
+        help="values to replace with, e.g., ABCDEFGHIJKLMNOPQRSTUVWXYZ (exclusive with --copy-from and --copy-to)",
+    )
+    args = parser.parse_args()
+    if args.copy_from and args.copy_to:
+        output = copy_from_to(args.input, args.category, args.copy_from, args.copy_to)
+    elif args.replace and args.values:
+        output = replace_value(args.input, args.category, args.replace, args.values)
+    else:
+        parser.print_help()
+        return
-    adapter.writeFile(args.output, data)
+    with open(args.output, "w") as f:
+        f.write(output)
 if __name__ == "__main__":

{RNApolis-0.4.10.dist-info → RNApolis-0.4.12.dist-info}/LICENSE RENAMED Viewed

File without changes

{RNApolis-0.4.10.dist-info → RNApolis-0.4.12.dist-info}/WHEEL RENAMED Viewed

File without changes

{RNApolis-0.4.10.dist-info → RNApolis-0.4.12.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{RNApolis-0.4.10.dist-info → RNApolis-0.4.12.dist-info}/top_level.txt RENAMED Viewed

File without changes

RNApolis 0.4.10__py3-none-any.whl → 0.4.12__py3-none-any.whl

Potentially problematic release.

RNApolis 0.4.10py3-none-any.whl → 0.4.12py3-none-any.whl