PyPI - RNApolis - Versions diffs - 0.3.18__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

RNApolis 0.3.18py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

{RNApolis-0.3.18.dist-info → RNApolis-0.4.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: RNApolis
-Version: 0.3.18
+Version: 0.4.1
 Summary: A Python library containing RNA-related bioinformatics functions and classes
 Home-page: https://github.com/tzok/rnapolis-py
 Author: Tomasz Zok

{RNApolis-0.3.18.dist-info → RNApolis-0.4.1.dist-info}/RECORD RENAMED Viewed

@@ -4,14 +4,14 @@ rnapolis/common.py,sha256=PUYF01P2vevhyImhZjGYE0jJlsxWHX6GQmsxI4W7S-E,30255
 rnapolis/metareader.py,sha256=I1-cXc2YNBPwa3zihAnMTjEsAo79tEKzSmWu5yvN1Pk,2071
 rnapolis/molecule_filter.py,sha256=hB6-nXgjmw7FAsQ3bj0cZ2FvuW2I1PXunEfcdwEUB1o,7389
 rnapolis/motif_extractor.py,sha256=duHvpi9Ulcny9K60E6VBpz5RpJZw-KdTB4_Ph0iP478,774
-rnapolis/parser.py,sha256=rQuzaRqsNTdHCS8_dKW5uT_nSi0xLnixF5xfy8puo_s,12665
+rnapolis/parser.py,sha256=wCA9rXqt51iLECgeBqOShFpuT8JwanNkHYD5uXYvLzU,13988
 rnapolis/rfam_folder.py,sha256=SjiiyML_T1__saruFwSMJEoQ7Y55GIU8ktS8ZUn5-fw,11111
-rnapolis/tertiary.py,sha256=wy8n7gfLHj_lIAydGgIwn_RMvBYiJzkSxXU_eff3ym0,19657
+rnapolis/tertiary.py,sha256=qk1te8GPDuvQsnm4rTiw96VDYyNoO5x4IPf98zDzxPw,20824
 rnapolis/transformer.py,sha256=V9nOQvdq4-p7yUWo0vQg0CDQMpmyxz9t4TMSRVEKHnw,1817
 rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
-RNApolis-0.3.18.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
-RNApolis-0.3.18.dist-info/METADATA,sha256=OkVdmxEbvCPPkUD1R49oZv5j89ng8-tds1yQeZYzORk,54323
-RNApolis-0.3.18.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
-RNApolis-0.3.18.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
-RNApolis-0.3.18.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
-RNApolis-0.3.18.dist-info/RECORD,,
+RNApolis-0.4.1.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
+RNApolis-0.4.1.dist-info/METADATA,sha256=EayMQbE4Y5raff-7pFMmeh4EB81JaLEEMrbvEA4mbAk,54322
+RNApolis-0.4.1.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
+RNApolis-0.4.1.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
+RNApolis-0.4.1.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
+RNApolis-0.4.1.dist-info/RECORD,,

rnapolis/parser.py CHANGED Viewed

@@ -12,7 +12,7 @@ logger = logging.getLogger(__name__)
 def read_3d_structure(
     cif_or_pdb: IO[str], model: Optional[int] = None, nucleic_acid_only: bool = False
 ) -> Structure3D:
-    atoms, modified, sequence = (
+    atoms, modified, sequence_by_entity, is_nucleic_acid_by_entity = (
         parse_cif(cif_or_pdb) if is_cif(cif_or_pdb) else parse_pdb(cif_or_pdb)
     )
     available_models = {atom.model: None for atom in atoms}
@@ -24,7 +24,13 @@ def read_3d_structure(
         atoms = atoms_by_model[model]
     else:
         atoms = atoms_by_model[list(available_models.keys())[0]]
-    return group_atoms(atoms, modified, sequence, nucleic_acid_only)
+    return group_atoms(
+        atoms,
+        modified,
+        sequence_by_entity,
+        is_nucleic_acid_by_entity,
+        nucleic_acid_only,
+    )
 def is_cif(cif_or_pdb: IO[str]) -> bool:
@@ -40,7 +46,8 @@ def parse_cif(
 ) -> Tuple[
     List[Atom],
     Dict[Union[ResidueLabel, ResidueAuth], str],
-    Dict[Tuple[str, int], str],
+    Dict[str, str],
+    Dict[str, bool],
 ]:
     cif.seek(0)
@@ -48,7 +55,8 @@ def parse_cif(
     data = io_adapter.readFile(cif.name)
     atoms: List[Atom] = []
     modified: Dict[Union[ResidueLabel, ResidueAuth], str] = {}
-    sequence = {}
+    sequence_by_entity = {}
+    is_nucleic_acid_by_entity = {}
     if data:
         atom_site = data[0].getObj("atom_site")
@@ -59,6 +67,7 @@ def parse_cif(
             for row in atom_site.getRowList():
                 row_dict = dict(zip(atom_site.getAttributeList(), row))
+                label_entity_id = row_dict.get("label_entity_id", None)
                 label_chain_name = row_dict.get("label_asym_id", None)
                 label_residue_number = try_parse_int(row_dict.get("label_seq_id", None))
                 label_residue_name = row_dict.get("label_comp_id", None)
@@ -127,7 +136,19 @@ def parse_cif(
                     else None
                 )
-                atoms.append(Atom(label, auth, model, atom_name, x, y, z, occupancy))
+                atoms.append(
+                    Atom(
+                        label_entity_id,
+                        label,
+                        auth,
+                        model,
+                        atom_name,
+                        x,
+                        y,
+                        z,
+                        occupancy,
+                    )
+                )
         if mod_residue:
             for row in mod_residue.getRowList():
@@ -178,17 +199,24 @@ def parse_cif(
             for row in entity_poly.getRowList():
                 row_dict = dict(zip(entity_poly.getAttributeList(), row))
-                pdbx_strand_id = row_dict.get("pdbx_strand_id", None)
+                entity_id = row_dict.get("entity_id", None)
+                type_ = row_dict.get("type", None)
                 pdbx_seq_one_letter_code_can = row_dict.get(
                     "pdbx_seq_one_letter_code_can", None
                 )
-                if pdbx_strand_id and pdbx_seq_one_letter_code_can:
-                    for strand in pdbx_strand_id.split(","):
-                        for i, letter in enumerate(pdbx_seq_one_letter_code_can):
-                            sequence[(strand, i + 1)] = letter
+                if entity_id and type_:
+                    is_nucleic_acid_by_entity[entity_id] = type_ in (
+                        "peptide nucleic acid",
+                        "polydeoxyribonucleotide",
+                        "polydeoxyribonucleotide/polyribonucleotide hybrid",
+                        "polyribonucleotide",
+                    )
+                if entity_id and pdbx_seq_one_letter_code_can:
+                    sequence_by_entity[entity_id] = pdbx_seq_one_letter_code_can
-    return atoms, modified, sequence
+    return atoms, modified, sequence_by_entity, is_nucleic_acid_by_entity
 def parse_pdb(
@@ -196,7 +224,8 @@ def parse_pdb(
 ) -> Tuple[
     List[Atom],
     Dict[Union[ResidueLabel, ResidueAuth], str],
-    Dict[Tuple[str, int], str],
+    Dict[str, str],
+    Dict[str, bool],
 ]:
     pdb.seek(0)
     atoms: List[Atom] = []
@@ -222,7 +251,7 @@ def parse_pdb(
             auth = ResidueAuth(
                 chain_identifier, residue_number, insertion_code, residue_name
             )
-            atoms.append(Atom(None, auth, model, atom_name, x, y, z, occupancy))
+            atoms.append(Atom(None, None, auth, model, atom_name, x, y, z, occupancy))
         elif line.startswith("MODRES"):
             original_name = line[12:15]
             chain_identifier = line[16]
@@ -234,13 +263,14 @@ def parse_pdb(
             )
             modified[auth] = standard_residue_name
-    return atoms, modified, {}
+    return atoms, modified, {}, {}
 def group_atoms(
     atoms: List[Atom],
     modified: Dict[Union[ResidueLabel, ResidueAuth], str],
-    sequence: Dict[Tuple[str, int], str],
+    sequence_by_entity: Dict[str, str],
+    is_nucleic_acid_by_entity: Dict[str, bool],
     nucleic_acid_only: bool,
 ) -> Structure3D:
     if not atoms:
@@ -258,28 +288,45 @@ def group_atoms(
             label = key_previous[0]
             auth = key_previous[1]
             model = key_previous[2]
+            entity_id = residue_atoms[-1].entity_id
             name = get_residue_name(auth, label, modified)
-            one_letter_name = get_one_letter_name(label, sequence, name)
-            if one_letter_name not in "ACGUT":
+            one_letter_name = get_one_letter_name(
+                entity_id, label, sequence_by_entity, name
+            )
+            if one_letter_name not in "ACGUTN":
                 one_letter_name = detect_one_letter_name(residue_atoms)
-            residue = Residue3D(
-                label, auth, model, one_letter_name, tuple(residue_atoms)
+            residues.append(
+                Residue3D(label, auth, model, one_letter_name, tuple(residue_atoms))
             )
-            if not nucleic_acid_only or (nucleic_acid_only and residue.is_nucleotide):
-                residues.append(residue)
             key_previous = key
             residue_atoms = [atom]
     label = key_previous[0]
     auth = key_previous[1]
     model = key_previous[2]
+    entity_id = residue_atoms[-1].entity_id
     name = get_residue_name(auth, label, modified)
-    one_letter_name = get_one_letter_name(label, sequence, name)
-    if one_letter_name not in "ACGUT":
+    one_letter_name = get_one_letter_name(entity_id, label, sequence_by_entity, name)
+    if one_letter_name not in "ACGUTN":
         one_letter_name = detect_one_letter_name(residue_atoms)
-    residue = Residue3D(label, auth, model, one_letter_name, tuple(residue_atoms))
-    if not nucleic_acid_only or (nucleic_acid_only and residue.is_nucleotide):
-        residues.append(residue)
+    residues.append(
+        Residue3D(label, auth, model, one_letter_name, tuple(residue_atoms))
+    )
+    if nucleic_acid_only:
+        if is_nucleic_acid_by_entity:
+            residues = [
+                residue
+                for residue in residues
+                if is_nucleic_acid_by_entity[residue.atoms[0].entity_id]
+            ]
+        else:
+            residues = [residue for residue in residues if residue.is_nucleotide]
     return Structure3D(residues)
@@ -304,13 +351,14 @@ def get_residue_name(
 def get_one_letter_name(
-    label: Optional[ResidueLabel], sequence: Dict[Tuple[str, int], str], name: str
+    entity_id: Optional[str],
+    label: Optional[ResidueLabel],
+    sequence_by_entity: Dict[str, str],
+    name: str,
 ) -> str:
     # try getting the value from _entity_poly first
-    if label is not None:
-        key = (label.chain, label.number)
-        if key in sequence:
-            return sequence[key]
+    if entity_id is not None and label is not None and entity_id in sequence_by_entity:
+        return sequence_by_entity[entity_id][label.number - 1]
     # RNA
     if len(name) == 1:
         return name
@@ -334,11 +382,13 @@ def detect_one_letter_name(atoms: List[Atom]) -> str:
         ) / len(atom_names_expected)
         score[candidate] = count
     items = sorted(score.items(), key=lambda kv: kv[1], reverse=True)
+    if items[0][1] == 0:
+        return "?"
     return items[0][0]
 def try_parse_int(s: str) -> Optional[int]:
     try:
         return int(s)
-    except:
+    except ValueError:
         return None

rnapolis/tertiary.py CHANGED Viewed

@@ -96,6 +96,7 @@ AVERAGE_OXYGEN_PHOSPHORUS_DISTANCE_COVALENT = 1.6
 @dataclass(frozen=True, order=True)
 class Atom:
+    entity_id: Optional[str]
     label: Optional[ResidueLabel]
     auth: Optional[ResidueAuth]
     model: int
@@ -128,6 +129,29 @@ class Residue3D(Residue):
         "C": set(["N1", "C2", "O2", "N3", "C4", "N4", "C5", "C6"]),
         "U": set(["N1", "C2", "O2", "N3", "C4", "O4", "C5", "C6"]),
     }
+    # Heavy atoms in nucleotide
+    nucleotide_heavy_atoms = (
+        set(
+            [
+                "P",
+                "OP1",
+                "OP2",
+                "O5'",
+                "C5'",
+                "C4'",
+                "O4'",
+                "C3'",
+                "O3'",
+                "C2'",
+                "O2'",
+                "C1'",
+            ]
+        )
+        .union(nucleobase_heavy_atoms["A"])
+        .union(nucleobase_heavy_atoms["G"])
+        .union(nucleobase_heavy_atoms["C"])
+        .union(nucleobase_heavy_atoms["U"])
+    )
     def __lt__(self, other):
         return (self.model, self.chain, self.number, self.icode or " ") < (
@@ -176,8 +200,8 @@ class Residue3D(Residue):
     @cached_property
     def is_nucleotide(self) -> bool:
-        return len(self.atoms) > 1 and any(
-            [atom for atom in self.atoms if atom.name == "C1'"]
+        return self.nucleotide_heavy_atoms.intersection(
+            set([atom.name for atom in self.atoms])
         )
     @cached_property
@@ -268,7 +292,7 @@ class Residue3D(Residue):
         logging.error(
             f"Failed to determine the outermost atom for nucleotide {self}, so an arbitrary atom will be used"
         )
-        yield Atom(self.label, self.auth, self.model, "UNK", 0.0, 0.0, 0.0, None)
+        yield Atom(None, self.label, self.auth, self.model, "UNK", 0.0, 0.0, 0.0, None)
     def __inner_generator(self):
         # try to find expected atom name
@@ -296,7 +320,7 @@ class Residue3D(Residue):
         logging.error(
             f"Failed to determine the innermost atom for nucleotide {self}, so an arbitrary atom will be used"
         )
-        yield Atom(self.label, self.auth, self.model, "UNK", 0.0, 0.0, 0.0, None)
+        yield Atom(None, self.label, self.auth, self.model, "UNK", 0.0, 0.0, 0.0, None)
 @dataclass(frozen=True, order=True)
@@ -524,7 +548,20 @@ class Mapping2D3D:
         result: Dict[int, List] = {}
         residue_map: Dict[Residue3D, int] = {}
         i = 1
-        for residue in self.structure3d.residues:
+        for j, residue in enumerate(self.structure3d.residues):
+            if self.find_gaps and j > 0:
+                previous = self.structure3d.residues[j - 1]
+                if (
+                    previous.is_nucleotide
+                    and residue.is_nucleotide
+                    and previous.label
+                    and residue.label
+                    and previous.label.chain == residue.label.chain
+                ):
+                    for k in range(residue.label.number - previous.label.number - 1):
+                        result[i] = [i, "?", 0]
+                        i += 1
             if residue.is_nucleotide:
                 result[i] = [i, residue.one_letter_name, 0]
                 residue_map[residue] = i

{RNApolis-0.3.18.dist-info → RNApolis-0.4.1.dist-info}/LICENSE RENAMED Viewed

File without changes

{RNApolis-0.3.18.dist-info → RNApolis-0.4.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{RNApolis-0.3.18.dist-info → RNApolis-0.4.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{RNApolis-0.3.18.dist-info → RNApolis-0.4.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

RNApolis 0.3.18__py3-none-any.whl → 0.4.1__py3-none-any.whl

RNApolis 0.3.18py3-none-any.whl → 0.4.1py3-none-any.whl