RNApolis 0.4.17__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rnapolis/aligner.py +103 -0
- rnapolis/clashfinder.py +4 -4
- rnapolis/component_A.csv +38 -0
- rnapolis/component_C.csv +36 -0
- rnapolis/component_G.csv +39 -0
- rnapolis/component_U.csv +35 -0
- rnapolis/parser_v2.py +506 -0
- rnapolis/tertiary_v2.py +651 -0
- rnapolis/unifier.py +153 -0
- {RNApolis-0.4.17.dist-info → rnapolis-0.6.0.dist-info}/METADATA +3 -2
- rnapolis-0.6.0.dist-info/RECORD +26 -0
- {RNApolis-0.4.17.dist-info → rnapolis-0.6.0.dist-info}/WHEEL +1 -1
- {RNApolis-0.4.17.dist-info → rnapolis-0.6.0.dist-info}/entry_points.txt +2 -0
- RNApolis-0.4.17.dist-info/RECORD +0 -18
- {RNApolis-0.4.17.dist-info → rnapolis-0.6.0.dist-info/licenses}/LICENSE +0 -0
- {RNApolis-0.4.17.dist-info → rnapolis-0.6.0.dist-info}/top_level.txt +0 -0
rnapolis/unifier.py
ADDED
@@ -0,0 +1,153 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
import argparse
|
3
|
+
import os
|
4
|
+
import sys
|
5
|
+
from collections import Counter
|
6
|
+
|
7
|
+
import pandas as pd
|
8
|
+
|
9
|
+
from rnapolis.parser import is_cif
|
10
|
+
from rnapolis.parser_v2 import parse_cif_atoms, parse_pdb_atoms, write_cif, write_pdb
|
11
|
+
from rnapolis.tertiary_v2 import Structure
|
12
|
+
|
13
|
+
|
14
|
+
def load_components():
|
15
|
+
result = {}
|
16
|
+
for residue in "ACGU":
|
17
|
+
component = os.path.join(
|
18
|
+
os.path.abspath(os.path.dirname(__file__)), f"component_{residue}.csv"
|
19
|
+
)
|
20
|
+
result[residue] = pd.read_csv(component)
|
21
|
+
return result
|
22
|
+
|
23
|
+
|
24
|
+
def main():
|
25
|
+
"""Main function to run the unifier tool."""
|
26
|
+
parser = argparse.ArgumentParser(
|
27
|
+
description="Unify content of a set of PDB or mmCIF files."
|
28
|
+
)
|
29
|
+
parser.add_argument("--output", "-o", help="Output directory", required=True)
|
30
|
+
parser.add_argument(
|
31
|
+
"--format",
|
32
|
+
"-f",
|
33
|
+
help="Output format (possible values: PDB, mmCIF, keep. Default: keep)",
|
34
|
+
default="keep",
|
35
|
+
)
|
36
|
+
parser.add_argument("files", nargs="+", help="PDB or mmCIF files to compare")
|
37
|
+
args = parser.parse_args()
|
38
|
+
|
39
|
+
components = load_components()
|
40
|
+
structures = []
|
41
|
+
|
42
|
+
for path in args.files:
|
43
|
+
with open(path) as f:
|
44
|
+
if is_cif(f):
|
45
|
+
atoms = parse_cif_atoms(f)
|
46
|
+
else:
|
47
|
+
atoms = parse_pdb_atoms(f)
|
48
|
+
|
49
|
+
residues = []
|
50
|
+
|
51
|
+
for residue in Structure(atoms).residues:
|
52
|
+
if residue.residue_name not in "ACGU":
|
53
|
+
continue
|
54
|
+
|
55
|
+
component = components[residue.residue_name]
|
56
|
+
mapping_dict = dict(
|
57
|
+
[row["alt_atom_id"], row["atom_id"]] for _, row in component.iterrows()
|
58
|
+
)
|
59
|
+
valid_names = component["atom_id"]
|
60
|
+
valid_names = valid_names[~valid_names.str.startswith("H")]
|
61
|
+
valid_order = {value: idx for idx, value in enumerate(valid_names.tolist())}
|
62
|
+
column = "name" if residue.format == "PDB" else "auth_atom_id"
|
63
|
+
|
64
|
+
# Replace alternative name with standard name
|
65
|
+
residue.atoms[column] = residue.atoms[column].replace(mapping_dict)
|
66
|
+
# Leave only standard, non-hydrogen atoms
|
67
|
+
residue.atoms = residue.atoms[residue.atoms[column].isin(valid_names)]
|
68
|
+
# Reorder atoms
|
69
|
+
residue.atoms = residue.atoms.sort_values(
|
70
|
+
by=[column], key=lambda col: col.map(valid_order)
|
71
|
+
)
|
72
|
+
residues.append(residue)
|
73
|
+
|
74
|
+
structures.append((path, residues))
|
75
|
+
|
76
|
+
for path, residues in structures:
|
77
|
+
ref_path, ref_residues = structures[0]
|
78
|
+
|
79
|
+
# Validity check 1: residue count must be equal
|
80
|
+
if len(residues) != len(ref_residues):
|
81
|
+
print(
|
82
|
+
f"Number of residues in {path} does not match {ref_path}, cannot continue"
|
83
|
+
)
|
84
|
+
sys.exit(1)
|
85
|
+
|
86
|
+
# Validity check 2: residue names must be equal
|
87
|
+
for i, (residue, ref_residue) in enumerate(zip(residues, ref_residues)):
|
88
|
+
if residue.residue_name != ref_residue.residue_name:
|
89
|
+
print(
|
90
|
+
f"Residue {str(residue)} in {path} does not match {str(ref_residue)} in {ref_path}, cannot continue"
|
91
|
+
)
|
92
|
+
sys.exit(1)
|
93
|
+
|
94
|
+
# Find residues with different number of atoms
|
95
|
+
residues_to_remove = set()
|
96
|
+
for i, (residue, ref_residue) in enumerate(zip(residues, ref_residues)):
|
97
|
+
if len(residue.atoms) != len(ref_residue.atoms):
|
98
|
+
print(
|
99
|
+
f"Number of atoms in {str(residue)} in {path} does not match {str(ref_residue)} in {ref_path}, will unify this"
|
100
|
+
)
|
101
|
+
residues_to_remove.add(i)
|
102
|
+
|
103
|
+
# Remove residues with different number of atoms
|
104
|
+
for _, residues in structures:
|
105
|
+
for i in sorted(residues_to_remove, reverse=True):
|
106
|
+
del residues[i]
|
107
|
+
|
108
|
+
# Find most common residue identifiers for each residue
|
109
|
+
n = len(structures[0][1])
|
110
|
+
counters = [Counter() for _ in range(n)]
|
111
|
+
for _, residues in structures:
|
112
|
+
for i, residue in enumerate(residues):
|
113
|
+
counters[i].update(
|
114
|
+
[(residue.chain_id, residue.residue_number, residue.insertion_code)]
|
115
|
+
)
|
116
|
+
|
117
|
+
# If any residue has different identifiers, use the most common one in all structures
|
118
|
+
for i, counter in enumerate(counters):
|
119
|
+
(chain_id, residue_number, insertion_code), count = counter.most_common(1)[0]
|
120
|
+
if count != len(structures):
|
121
|
+
print(
|
122
|
+
f"Residue {i + 1} has different identifiers in different structures, will unify this"
|
123
|
+
)
|
124
|
+
for _, residues in structures:
|
125
|
+
residue = residues[i]
|
126
|
+
residue.chain_id = chain_id
|
127
|
+
residue.residue_number = residue_number
|
128
|
+
residue.insertion_code = insertion_code
|
129
|
+
|
130
|
+
# Write output
|
131
|
+
os.makedirs(args.output, exist_ok=True)
|
132
|
+
|
133
|
+
for path, residues in structures:
|
134
|
+
base, _ = os.path.splitext(os.path.basename(path))
|
135
|
+
|
136
|
+
if args.format == "keep":
|
137
|
+
format = residues[0].atoms.attrs["format"]
|
138
|
+
else:
|
139
|
+
format = args.format
|
140
|
+
|
141
|
+
ext = ".pdb" if format == "PDB" else ".cif"
|
142
|
+
|
143
|
+
with open(f"{args.output}/{base}{ext}", "w") as f:
|
144
|
+
df = pd.concat([residue.atoms for residue in residues])
|
145
|
+
|
146
|
+
if format == "PDB":
|
147
|
+
write_pdb(df, f)
|
148
|
+
else:
|
149
|
+
write_cif(df, f)
|
150
|
+
|
151
|
+
|
152
|
+
if __name__ == "__main__":
|
153
|
+
main()
|
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: RNApolis
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.6.0
|
4
4
|
Summary: A Python library containing RNA-related bioinformatics functions and classes
|
5
5
|
Home-page: https://github.com/tzok/rnapolis-py
|
6
6
|
Author: Tomasz Zok
|
@@ -32,6 +32,7 @@ Dynamic: classifier
|
|
32
32
|
Dynamic: description
|
33
33
|
Dynamic: description-content-type
|
34
34
|
Dynamic: home-page
|
35
|
+
Dynamic: license-file
|
35
36
|
Dynamic: project-url
|
36
37
|
Dynamic: requires-dist
|
37
38
|
Dynamic: summary
|
@@ -0,0 +1,26 @@
|
|
1
|
+
rnapolis/aligner.py,sha256=oJ81FrjlEEzqJcYJdZUE1PrPjabIOT7j0idwAHXVQMI,3156
|
2
|
+
rnapolis/annotator.py,sha256=hRRzRmneYxbg2tvwVHMWLfzmJb4szV0JL_6EOC09Gwg,22101
|
3
|
+
rnapolis/clashfinder.py,sha256=AC9_tIx7QIk57sELq_aKfU1u3UMOXbgcccQeGHhMR6c,8517
|
4
|
+
rnapolis/common.py,sha256=LY6Uz96Br8ki_gA8LpfatgtvVbt9jOTkwgagayqTgf8,31251
|
5
|
+
rnapolis/component_A.csv,sha256=koirS-AwUZwoYGItT8yn3wS6Idvmh2FANfTQcOS_xh8,2897
|
6
|
+
rnapolis/component_C.csv,sha256=NtvsAu_YrUgTjzZm3j4poW4IZ99x3dPARB09XVIiMCc,2803
|
7
|
+
rnapolis/component_G.csv,sha256=Z5wl8OnHRyx4XhTyBiWgRZiEvmZXhoxtVRH8bn6Vxf0,2898
|
8
|
+
rnapolis/component_U.csv,sha256=8BUoU1m2YzGmi8_kw1xdpf3pucszHjFEtTex87CuXiE,2645
|
9
|
+
rnapolis/metareader.py,sha256=I1-cXc2YNBPwa3zihAnMTjEsAo79tEKzSmWu5yvN1Pk,2071
|
10
|
+
rnapolis/mmcif_pdbx_v50.dic,sha256=5QFx1ssDaehR4_DQ-tS9VQux262SiLXaqcwmwwejF5c,5744659
|
11
|
+
rnapolis/molecule_filter.py,sha256=jgcpJxx_oXEBX0d30v4k_FdwRouRUPUsEtCYWgLGpD4,7310
|
12
|
+
rnapolis/motif_extractor.py,sha256=Lfn1iEkhkP9eZD3GPEWNAfy00QO7QPCc8wM_XS1ory8,1147
|
13
|
+
rnapolis/parser.py,sha256=3g4mtFvpiEENFcSBBtx_E_x1vJPF9BujWnts0kb9XjE,16340
|
14
|
+
rnapolis/parser_v2.py,sha256=ltesVKBiIKk9JlM02ttTJzLm1g5MHdPzDgQTcl40GP8,16257
|
15
|
+
rnapolis/rfam_folder.py,sha256=SjiiyML_T1__saruFwSMJEoQ7Y55GIU8ktS8ZUn5-fw,11111
|
16
|
+
rnapolis/tertiary.py,sha256=6t9ZB4w33-5n_M3sns1RoFXCOTgVAgGH4WDNG5OG9Kg,23426
|
17
|
+
rnapolis/tertiary_v2.py,sha256=I1uyHWIUePNGO5m-suoL4ibtz02qAJUMvYm0BUKUygY,22480
|
18
|
+
rnapolis/transformer.py,sha256=aC0nBmHHJf5TyLvBIV57Jj3tlwpvHbPo347opfAOlQA,3844
|
19
|
+
rnapolis/unifier.py,sha256=bXscX3lxeSxT4K1fm2UEURcU9_0JA0HdTbd8ZoHZFAY,5442
|
20
|
+
rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
|
21
|
+
rnapolis-0.6.0.dist-info/licenses/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
|
22
|
+
rnapolis-0.6.0.dist-info/METADATA,sha256=TcGmjLlYH8jPvWJr48a2ce-UhIIl_dAO_wygm4ZPrKY,54537
|
23
|
+
rnapolis-0.6.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
24
|
+
rnapolis-0.6.0.dist-info/entry_points.txt,sha256=kS_Ji3_6UaomxkOaYpGHh4aZKaIh9CAfzoexbaS3y50,372
|
25
|
+
rnapolis-0.6.0.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
|
26
|
+
rnapolis-0.6.0.dist-info/RECORD,,
|
@@ -1,4 +1,5 @@
|
|
1
1
|
[console_scripts]
|
2
|
+
aligner = rnapolis.aligner:main
|
2
3
|
annotator = rnapolis.annotator:main
|
3
4
|
clashfinder = rnapolis.clashfinder:main
|
4
5
|
metareader = rnapolis.metareader:main
|
@@ -6,3 +7,4 @@ molecule-filter = rnapolis.molecule_filter:main
|
|
6
7
|
motif-extractor = rnapolis.motif_extractor:main
|
7
8
|
rfam-folder = rnapolis.rfam_folder:main
|
8
9
|
transformer = rnapolis.transformer:main
|
10
|
+
unifier = rnapolis.unifier:main
|
RNApolis-0.4.17.dist-info/RECORD
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
rnapolis/annotator.py,sha256=hRRzRmneYxbg2tvwVHMWLfzmJb4szV0JL_6EOC09Gwg,22101
|
2
|
-
rnapolis/clashfinder.py,sha256=i95kp0o6OWNqmJDBr-PbsZd7RY2iJtBDr7QqolJSuAQ,8513
|
3
|
-
rnapolis/common.py,sha256=LY6Uz96Br8ki_gA8LpfatgtvVbt9jOTkwgagayqTgf8,31251
|
4
|
-
rnapolis/metareader.py,sha256=I1-cXc2YNBPwa3zihAnMTjEsAo79tEKzSmWu5yvN1Pk,2071
|
5
|
-
rnapolis/mmcif_pdbx_v50.dic,sha256=5QFx1ssDaehR4_DQ-tS9VQux262SiLXaqcwmwwejF5c,5744659
|
6
|
-
rnapolis/molecule_filter.py,sha256=jgcpJxx_oXEBX0d30v4k_FdwRouRUPUsEtCYWgLGpD4,7310
|
7
|
-
rnapolis/motif_extractor.py,sha256=Lfn1iEkhkP9eZD3GPEWNAfy00QO7QPCc8wM_XS1ory8,1147
|
8
|
-
rnapolis/parser.py,sha256=3g4mtFvpiEENFcSBBtx_E_x1vJPF9BujWnts0kb9XjE,16340
|
9
|
-
rnapolis/rfam_folder.py,sha256=SjiiyML_T1__saruFwSMJEoQ7Y55GIU8ktS8ZUn5-fw,11111
|
10
|
-
rnapolis/tertiary.py,sha256=6t9ZB4w33-5n_M3sns1RoFXCOTgVAgGH4WDNG5OG9Kg,23426
|
11
|
-
rnapolis/transformer.py,sha256=aC0nBmHHJf5TyLvBIV57Jj3tlwpvHbPo347opfAOlQA,3844
|
12
|
-
rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
|
13
|
-
RNApolis-0.4.17.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
|
14
|
-
RNApolis-0.4.17.dist-info/METADATA,sha256=NXwscUxsO3lpMD3eukldViwH6JUSFlEC9ExXwirgfLM,54516
|
15
|
-
RNApolis-0.4.17.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
16
|
-
RNApolis-0.4.17.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
|
17
|
-
RNApolis-0.4.17.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
|
18
|
-
RNApolis-0.4.17.dist-info/RECORD,,
|
File without changes
|
File without changes
|