RNApolis 0.4.17__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rnapolis/unifier.py ADDED
@@ -0,0 +1,153 @@
1
+ #!/usr/bin/env python3
2
+ import argparse
3
+ import os
4
+ import sys
5
+ from collections import Counter
6
+
7
+ import pandas as pd
8
+
9
+ from rnapolis.parser import is_cif
10
+ from rnapolis.parser_v2 import parse_cif_atoms, parse_pdb_atoms, write_cif, write_pdb
11
+ from rnapolis.tertiary_v2 import Structure
12
+
13
+
14
+ def load_components():
15
+ result = {}
16
+ for residue in "ACGU":
17
+ component = os.path.join(
18
+ os.path.abspath(os.path.dirname(__file__)), f"component_{residue}.csv"
19
+ )
20
+ result[residue] = pd.read_csv(component)
21
+ return result
22
+
23
+
24
+ def main():
25
+ """Main function to run the unifier tool."""
26
+ parser = argparse.ArgumentParser(
27
+ description="Unify content of a set of PDB or mmCIF files."
28
+ )
29
+ parser.add_argument("--output", "-o", help="Output directory", required=True)
30
+ parser.add_argument(
31
+ "--format",
32
+ "-f",
33
+ help="Output format (possible values: PDB, mmCIF, keep. Default: keep)",
34
+ default="keep",
35
+ )
36
+ parser.add_argument("files", nargs="+", help="PDB or mmCIF files to compare")
37
+ args = parser.parse_args()
38
+
39
+ components = load_components()
40
+ structures = []
41
+
42
+ for path in args.files:
43
+ with open(path) as f:
44
+ if is_cif(f):
45
+ atoms = parse_cif_atoms(f)
46
+ else:
47
+ atoms = parse_pdb_atoms(f)
48
+
49
+ residues = []
50
+
51
+ for residue in Structure(atoms).residues:
52
+ if residue.residue_name not in "ACGU":
53
+ continue
54
+
55
+ component = components[residue.residue_name]
56
+ mapping_dict = dict(
57
+ [row["alt_atom_id"], row["atom_id"]] for _, row in component.iterrows()
58
+ )
59
+ valid_names = component["atom_id"]
60
+ valid_names = valid_names[~valid_names.str.startswith("H")]
61
+ valid_order = {value: idx for idx, value in enumerate(valid_names.tolist())}
62
+ column = "name" if residue.format == "PDB" else "auth_atom_id"
63
+
64
+ # Replace alternative name with standard name
65
+ residue.atoms[column] = residue.atoms[column].replace(mapping_dict)
66
+ # Leave only standard, non-hydrogen atoms
67
+ residue.atoms = residue.atoms[residue.atoms[column].isin(valid_names)]
68
+ # Reorder atoms
69
+ residue.atoms = residue.atoms.sort_values(
70
+ by=[column], key=lambda col: col.map(valid_order)
71
+ )
72
+ residues.append(residue)
73
+
74
+ structures.append((path, residues))
75
+
76
+ for path, residues in structures:
77
+ ref_path, ref_residues = structures[0]
78
+
79
+ # Validity check 1: residue count must be equal
80
+ if len(residues) != len(ref_residues):
81
+ print(
82
+ f"Number of residues in {path} does not match {ref_path}, cannot continue"
83
+ )
84
+ sys.exit(1)
85
+
86
+ # Validity check 2: residue names must be equal
87
+ for i, (residue, ref_residue) in enumerate(zip(residues, ref_residues)):
88
+ if residue.residue_name != ref_residue.residue_name:
89
+ print(
90
+ f"Residue {str(residue)} in {path} does not match {str(ref_residue)} in {ref_path}, cannot continue"
91
+ )
92
+ sys.exit(1)
93
+
94
+ # Find residues with different number of atoms
95
+ residues_to_remove = set()
96
+ for i, (residue, ref_residue) in enumerate(zip(residues, ref_residues)):
97
+ if len(residue.atoms) != len(ref_residue.atoms):
98
+ print(
99
+ f"Number of atoms in {str(residue)} in {path} does not match {str(ref_residue)} in {ref_path}, will unify this"
100
+ )
101
+ residues_to_remove.add(i)
102
+
103
+ # Remove residues with different number of atoms
104
+ for _, residues in structures:
105
+ for i in sorted(residues_to_remove, reverse=True):
106
+ del residues[i]
107
+
108
+ # Find most common residue identifiers for each residue
109
+ n = len(structures[0][1])
110
+ counters = [Counter() for _ in range(n)]
111
+ for _, residues in structures:
112
+ for i, residue in enumerate(residues):
113
+ counters[i].update(
114
+ [(residue.chain_id, residue.residue_number, residue.insertion_code)]
115
+ )
116
+
117
+ # If any residue has different identifiers, use the most common one in all structures
118
+ for i, counter in enumerate(counters):
119
+ (chain_id, residue_number, insertion_code), count = counter.most_common(1)[0]
120
+ if count != len(structures):
121
+ print(
122
+ f"Residue {i + 1} has different identifiers in different structures, will unify this"
123
+ )
124
+ for _, residues in structures:
125
+ residue = residues[i]
126
+ residue.chain_id = chain_id
127
+ residue.residue_number = residue_number
128
+ residue.insertion_code = insertion_code
129
+
130
+ # Write output
131
+ os.makedirs(args.output, exist_ok=True)
132
+
133
+ for path, residues in structures:
134
+ base, _ = os.path.splitext(os.path.basename(path))
135
+
136
+ if args.format == "keep":
137
+ format = residues[0].atoms.attrs["format"]
138
+ else:
139
+ format = args.format
140
+
141
+ ext = ".pdb" if format == "PDB" else ".cif"
142
+
143
+ with open(f"{args.output}/{base}{ext}", "w") as f:
144
+ df = pd.concat([residue.atoms for residue in residues])
145
+
146
+ if format == "PDB":
147
+ write_pdb(df, f)
148
+ else:
149
+ write_cif(df, f)
150
+
151
+
152
+ if __name__ == "__main__":
153
+ main()
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: RNApolis
3
- Version: 0.4.17
3
+ Version: 0.6.0
4
4
  Summary: A Python library containing RNA-related bioinformatics functions and classes
5
5
  Home-page: https://github.com/tzok/rnapolis-py
6
6
  Author: Tomasz Zok
@@ -32,6 +32,7 @@ Dynamic: classifier
32
32
  Dynamic: description
33
33
  Dynamic: description-content-type
34
34
  Dynamic: home-page
35
+ Dynamic: license-file
35
36
  Dynamic: project-url
36
37
  Dynamic: requires-dist
37
38
  Dynamic: summary
@@ -0,0 +1,26 @@
1
+ rnapolis/aligner.py,sha256=oJ81FrjlEEzqJcYJdZUE1PrPjabIOT7j0idwAHXVQMI,3156
2
+ rnapolis/annotator.py,sha256=hRRzRmneYxbg2tvwVHMWLfzmJb4szV0JL_6EOC09Gwg,22101
3
+ rnapolis/clashfinder.py,sha256=AC9_tIx7QIk57sELq_aKfU1u3UMOXbgcccQeGHhMR6c,8517
4
+ rnapolis/common.py,sha256=LY6Uz96Br8ki_gA8LpfatgtvVbt9jOTkwgagayqTgf8,31251
5
+ rnapolis/component_A.csv,sha256=koirS-AwUZwoYGItT8yn3wS6Idvmh2FANfTQcOS_xh8,2897
6
+ rnapolis/component_C.csv,sha256=NtvsAu_YrUgTjzZm3j4poW4IZ99x3dPARB09XVIiMCc,2803
7
+ rnapolis/component_G.csv,sha256=Z5wl8OnHRyx4XhTyBiWgRZiEvmZXhoxtVRH8bn6Vxf0,2898
8
+ rnapolis/component_U.csv,sha256=8BUoU1m2YzGmi8_kw1xdpf3pucszHjFEtTex87CuXiE,2645
9
+ rnapolis/metareader.py,sha256=I1-cXc2YNBPwa3zihAnMTjEsAo79tEKzSmWu5yvN1Pk,2071
10
+ rnapolis/mmcif_pdbx_v50.dic,sha256=5QFx1ssDaehR4_DQ-tS9VQux262SiLXaqcwmwwejF5c,5744659
11
+ rnapolis/molecule_filter.py,sha256=jgcpJxx_oXEBX0d30v4k_FdwRouRUPUsEtCYWgLGpD4,7310
12
+ rnapolis/motif_extractor.py,sha256=Lfn1iEkhkP9eZD3GPEWNAfy00QO7QPCc8wM_XS1ory8,1147
13
+ rnapolis/parser.py,sha256=3g4mtFvpiEENFcSBBtx_E_x1vJPF9BujWnts0kb9XjE,16340
14
+ rnapolis/parser_v2.py,sha256=ltesVKBiIKk9JlM02ttTJzLm1g5MHdPzDgQTcl40GP8,16257
15
+ rnapolis/rfam_folder.py,sha256=SjiiyML_T1__saruFwSMJEoQ7Y55GIU8ktS8ZUn5-fw,11111
16
+ rnapolis/tertiary.py,sha256=6t9ZB4w33-5n_M3sns1RoFXCOTgVAgGH4WDNG5OG9Kg,23426
17
+ rnapolis/tertiary_v2.py,sha256=I1uyHWIUePNGO5m-suoL4ibtz02qAJUMvYm0BUKUygY,22480
18
+ rnapolis/transformer.py,sha256=aC0nBmHHJf5TyLvBIV57Jj3tlwpvHbPo347opfAOlQA,3844
19
+ rnapolis/unifier.py,sha256=bXscX3lxeSxT4K1fm2UEURcU9_0JA0HdTbd8ZoHZFAY,5442
20
+ rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
21
+ rnapolis-0.6.0.dist-info/licenses/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
22
+ rnapolis-0.6.0.dist-info/METADATA,sha256=TcGmjLlYH8jPvWJr48a2ce-UhIIl_dAO_wygm4ZPrKY,54537
23
+ rnapolis-0.6.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
24
+ rnapolis-0.6.0.dist-info/entry_points.txt,sha256=kS_Ji3_6UaomxkOaYpGHh4aZKaIh9CAfzoexbaS3y50,372
25
+ rnapolis-0.6.0.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
26
+ rnapolis-0.6.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.0)
2
+ Generator: setuptools (78.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,4 +1,5 @@
1
1
  [console_scripts]
2
+ aligner = rnapolis.aligner:main
2
3
  annotator = rnapolis.annotator:main
3
4
  clashfinder = rnapolis.clashfinder:main
4
5
  metareader = rnapolis.metareader:main
@@ -6,3 +7,4 @@ molecule-filter = rnapolis.molecule_filter:main
6
7
  motif-extractor = rnapolis.motif_extractor:main
7
8
  rfam-folder = rnapolis.rfam_folder:main
8
9
  transformer = rnapolis.transformer:main
10
+ unifier = rnapolis.unifier:main
@@ -1,18 +0,0 @@
1
- rnapolis/annotator.py,sha256=hRRzRmneYxbg2tvwVHMWLfzmJb4szV0JL_6EOC09Gwg,22101
2
- rnapolis/clashfinder.py,sha256=i95kp0o6OWNqmJDBr-PbsZd7RY2iJtBDr7QqolJSuAQ,8513
3
- rnapolis/common.py,sha256=LY6Uz96Br8ki_gA8LpfatgtvVbt9jOTkwgagayqTgf8,31251
4
- rnapolis/metareader.py,sha256=I1-cXc2YNBPwa3zihAnMTjEsAo79tEKzSmWu5yvN1Pk,2071
5
- rnapolis/mmcif_pdbx_v50.dic,sha256=5QFx1ssDaehR4_DQ-tS9VQux262SiLXaqcwmwwejF5c,5744659
6
- rnapolis/molecule_filter.py,sha256=jgcpJxx_oXEBX0d30v4k_FdwRouRUPUsEtCYWgLGpD4,7310
7
- rnapolis/motif_extractor.py,sha256=Lfn1iEkhkP9eZD3GPEWNAfy00QO7QPCc8wM_XS1ory8,1147
8
- rnapolis/parser.py,sha256=3g4mtFvpiEENFcSBBtx_E_x1vJPF9BujWnts0kb9XjE,16340
9
- rnapolis/rfam_folder.py,sha256=SjiiyML_T1__saruFwSMJEoQ7Y55GIU8ktS8ZUn5-fw,11111
10
- rnapolis/tertiary.py,sha256=6t9ZB4w33-5n_M3sns1RoFXCOTgVAgGH4WDNG5OG9Kg,23426
11
- rnapolis/transformer.py,sha256=aC0nBmHHJf5TyLvBIV57Jj3tlwpvHbPo347opfAOlQA,3844
12
- rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
13
- RNApolis-0.4.17.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
14
- RNApolis-0.4.17.dist-info/METADATA,sha256=NXwscUxsO3lpMD3eukldViwH6JUSFlEC9ExXwirgfLM,54516
15
- RNApolis-0.4.17.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
16
- RNApolis-0.4.17.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
17
- RNApolis-0.4.17.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
18
- RNApolis-0.4.17.dist-info/RECORD,,