RNApolis 0.7.0__py3-none-any.whl → 0.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rnapolis/parser_v2.py +807 -186
- rnapolis/splitter.py +128 -0
- rnapolis/unifier.py +20 -5
- {rnapolis-0.7.0.dist-info → rnapolis-0.8.1.dist-info}/METADATA +1 -1
- {rnapolis-0.7.0.dist-info → rnapolis-0.8.1.dist-info}/RECORD +9 -8
- {rnapolis-0.7.0.dist-info → rnapolis-0.8.1.dist-info}/entry_points.txt +1 -0
- {rnapolis-0.7.0.dist-info → rnapolis-0.8.1.dist-info}/WHEEL +0 -0
- {rnapolis-0.7.0.dist-info → rnapolis-0.8.1.dist-info}/licenses/LICENSE +0 -0
- {rnapolis-0.7.0.dist-info → rnapolis-0.8.1.dist-info}/top_level.txt +0 -0
rnapolis/splitter.py
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
import argparse
|
3
|
+
import os
|
4
|
+
import sys
|
5
|
+
|
6
|
+
from rnapolis.parser import is_cif
|
7
|
+
from rnapolis.parser_v2 import (
|
8
|
+
fit_to_pdb,
|
9
|
+
parse_cif_atoms,
|
10
|
+
parse_pdb_atoms,
|
11
|
+
write_cif,
|
12
|
+
write_pdb,
|
13
|
+
)
|
14
|
+
|
15
|
+
|
16
|
+
def main():
|
17
|
+
"""Main function to run the splitter tool."""
|
18
|
+
parser = argparse.ArgumentParser(
|
19
|
+
description="Split a multi-model PDB or mmCIF file into separate files per model."
|
20
|
+
)
|
21
|
+
parser.add_argument("--output", "-o", help="Output directory", required=True)
|
22
|
+
parser.add_argument(
|
23
|
+
"--format",
|
24
|
+
"-f",
|
25
|
+
help="Output format (possible values: PDB, mmCIF, keep. Default: keep)",
|
26
|
+
default="keep",
|
27
|
+
)
|
28
|
+
parser.add_argument("file", help="Input PDB or mmCIF file to split")
|
29
|
+
args = parser.parse_args()
|
30
|
+
|
31
|
+
# Check if input file exists
|
32
|
+
if not os.path.exists(args.file):
|
33
|
+
print(f"Error: Input file not found: {args.file}", file=sys.stderr)
|
34
|
+
sys.exit(1)
|
35
|
+
|
36
|
+
# Read and parse the input file
|
37
|
+
input_format = "mmCIF"
|
38
|
+
try:
|
39
|
+
with open(args.file) as f:
|
40
|
+
if is_cif(f):
|
41
|
+
atoms_df = parse_cif_atoms(f)
|
42
|
+
model_column = "pdbx_PDB_model_num"
|
43
|
+
else:
|
44
|
+
atoms_df = parse_pdb_atoms(f)
|
45
|
+
input_format = "PDB"
|
46
|
+
model_column = "model"
|
47
|
+
except Exception as e:
|
48
|
+
print(f"Error parsing file {args.file}: {e}", file=sys.stderr)
|
49
|
+
sys.exit(1)
|
50
|
+
|
51
|
+
if atoms_df.empty:
|
52
|
+
print(f"Warning: No atoms found in {args.file}", file=sys.stderr)
|
53
|
+
sys.exit(0)
|
54
|
+
|
55
|
+
# Check if model column exists
|
56
|
+
if model_column not in atoms_df.columns:
|
57
|
+
print(
|
58
|
+
f"Error: Model column '{model_column}' not found in the parsed data from {args.file}.",
|
59
|
+
file=sys.stderr,
|
60
|
+
)
|
61
|
+
print(
|
62
|
+
"This might indicate an issue with the input file or the parser.",
|
63
|
+
file=sys.stderr,
|
64
|
+
)
|
65
|
+
sys.exit(1)
|
66
|
+
|
67
|
+
# Determine output format
|
68
|
+
output_format = args.format.upper()
|
69
|
+
if output_format == "KEEP":
|
70
|
+
output_format = input_format
|
71
|
+
elif output_format not in ["PDB", "MMCIF"]:
|
72
|
+
print(
|
73
|
+
f"Error: Invalid output format '{args.format}'. Choose PDB, mmCIF, or keep.",
|
74
|
+
file=sys.stderr,
|
75
|
+
)
|
76
|
+
sys.exit(1)
|
77
|
+
|
78
|
+
# Ensure output directory exists
|
79
|
+
os.makedirs(args.output, exist_ok=True)
|
80
|
+
|
81
|
+
# Group by model number
|
82
|
+
grouped_by_model = atoms_df.groupby(model_column)
|
83
|
+
|
84
|
+
# Get base name for output files
|
85
|
+
base_name = os.path.splitext(os.path.basename(args.file))[0]
|
86
|
+
|
87
|
+
# Write each model to a separate file
|
88
|
+
for model_num, model_df in grouped_by_model:
|
89
|
+
# Ensure model_df is a DataFrame copy to avoid SettingWithCopyWarning
|
90
|
+
model_df = model_df.copy()
|
91
|
+
|
92
|
+
# Set the correct format attribute for the writer function
|
93
|
+
model_df.attrs["format"] = input_format
|
94
|
+
|
95
|
+
# Construct output filename
|
96
|
+
ext = ".pdb" if output_format == "PDB" else ".cif"
|
97
|
+
output_filename = f"{base_name}_model_{model_num}{ext}"
|
98
|
+
output_path = os.path.join(args.output, output_filename)
|
99
|
+
|
100
|
+
print(f"Writing model {model_num} to {output_path}...")
|
101
|
+
|
102
|
+
try:
|
103
|
+
if output_format == "PDB":
|
104
|
+
df_to_write = fit_to_pdb(model_df)
|
105
|
+
write_pdb(df_to_write, output_path)
|
106
|
+
else: # mmCIF
|
107
|
+
write_cif(model_df, output_path)
|
108
|
+
except ValueError as e:
|
109
|
+
# Handle errors specifically from fit_to_pdb
|
110
|
+
print(
|
111
|
+
f"Error fitting model {model_num} from {args.file} to PDB: {e}. Skipping model.",
|
112
|
+
file=sys.stderr,
|
113
|
+
)
|
114
|
+
continue
|
115
|
+
except Exception as e:
|
116
|
+
# Handle general writing errors
|
117
|
+
print(
|
118
|
+
f"Error writing file {output_path} for model {model_num}: {e}",
|
119
|
+
file=sys.stderr,
|
120
|
+
)
|
121
|
+
# Optionally continue to next model or exit
|
122
|
+
# sys.exit(1)
|
123
|
+
|
124
|
+
print("Splitting complete.")
|
125
|
+
|
126
|
+
|
127
|
+
if __name__ == "__main__":
|
128
|
+
main()
|
rnapolis/unifier.py
CHANGED
@@ -7,7 +7,13 @@ from collections import Counter
|
|
7
7
|
import pandas as pd
|
8
8
|
|
9
9
|
from rnapolis.parser import is_cif
|
10
|
-
from rnapolis.parser_v2 import
|
10
|
+
from rnapolis.parser_v2 import (
|
11
|
+
fit_to_pdb,
|
12
|
+
parse_cif_atoms,
|
13
|
+
parse_pdb_atoms,
|
14
|
+
write_cif,
|
15
|
+
write_pdb,
|
16
|
+
)
|
11
17
|
from rnapolis.tertiary_v2 import Structure
|
12
18
|
|
13
19
|
|
@@ -140,13 +146,22 @@ def main():
|
|
140
146
|
|
141
147
|
ext = ".pdb" if format == "PDB" else ".cif"
|
142
148
|
|
143
|
-
|
144
|
-
df = pd.concat([residue.atoms for residue in residues])
|
149
|
+
df = pd.concat([residue.atoms for residue in residues])
|
145
150
|
|
151
|
+
try:
|
146
152
|
if format == "PDB":
|
147
|
-
|
153
|
+
df_to_write = fit_to_pdb(df)
|
154
|
+
with open(f"{args.output}/{base}{ext}", "w") as f:
|
155
|
+
write_pdb(df_to_write, f)
|
148
156
|
else:
|
149
|
-
|
157
|
+
with open(f"{args.output}/{base}{ext}", "w") as f:
|
158
|
+
write_cif(df, f)
|
159
|
+
except ValueError as e:
|
160
|
+
print(
|
161
|
+
f"Error processing {path} for PDB output: {e}. Skipping file.",
|
162
|
+
file=sys.stderr,
|
163
|
+
)
|
164
|
+
continue
|
150
165
|
|
151
166
|
|
152
167
|
if __name__ == "__main__":
|
@@ -12,16 +12,17 @@ rnapolis/mmcif_pdbx_v50.dic,sha256=5QFx1ssDaehR4_DQ-tS9VQux262SiLXaqcwmwwejF5c,5
|
|
12
12
|
rnapolis/molecule_filter.py,sha256=jgcpJxx_oXEBX0d30v4k_FdwRouRUPUsEtCYWgLGpD4,7310
|
13
13
|
rnapolis/motif_extractor.py,sha256=Lfn1iEkhkP9eZD3GPEWNAfy00QO7QPCc8wM_XS1ory8,1147
|
14
14
|
rnapolis/parser.py,sha256=3g4mtFvpiEENFcSBBtx_E_x1vJPF9BujWnts0kb9XjE,16340
|
15
|
-
rnapolis/parser_v2.py,sha256=
|
15
|
+
rnapolis/parser_v2.py,sha256=qG6CO3or7zmuJu368g9Nzokiqdeip4yjD14F163uH6w,40618
|
16
16
|
rnapolis/rfam_folder.py,sha256=SjiiyML_T1__saruFwSMJEoQ7Y55GIU8ktS8ZUn5-fw,11111
|
17
|
+
rnapolis/splitter.py,sha256=x-Zn21mkiMgvYPptUFD9BbdNIvoaM6b8GzGf6uYXEwE,4052
|
17
18
|
rnapolis/tertiary.py,sha256=6t9ZB4w33-5n_M3sns1RoFXCOTgVAgGH4WDNG5OG9Kg,23426
|
18
19
|
rnapolis/tertiary_v2.py,sha256=I1uyHWIUePNGO5m-suoL4ibtz02qAJUMvYm0BUKUygY,22480
|
19
20
|
rnapolis/transformer.py,sha256=aC0nBmHHJf5TyLvBIV57Jj3tlwpvHbPo347opfAOlQA,3844
|
20
|
-
rnapolis/unifier.py,sha256=
|
21
|
+
rnapolis/unifier.py,sha256=2ge7IB9FdRgzSAiVD39U_ciwtdDJ2fGzf8mUIudbrqY,5820
|
21
22
|
rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
|
22
|
-
rnapolis-0.
|
23
|
-
rnapolis-0.
|
24
|
-
rnapolis-0.
|
25
|
-
rnapolis-0.
|
26
|
-
rnapolis-0.
|
27
|
-
rnapolis-0.
|
23
|
+
rnapolis-0.8.1.dist-info/licenses/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
|
24
|
+
rnapolis-0.8.1.dist-info/METADATA,sha256=NOg9-s2n313HElku8z06JiBvEhPf6oV9RR7ur20hwys,54537
|
25
|
+
rnapolis-0.8.1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
26
|
+
rnapolis-0.8.1.dist-info/entry_points.txt,sha256=H00KoN54wU3dFOofAu3H_3PADmZOBTB1hXf5TUU2uzo,438
|
27
|
+
rnapolis-0.8.1.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
|
28
|
+
rnapolis-0.8.1.dist-info/RECORD,,
|
@@ -7,5 +7,6 @@ metareader = rnapolis.metareader:main
|
|
7
7
|
molecule-filter = rnapolis.molecule_filter:main
|
8
8
|
motif-extractor = rnapolis.motif_extractor:main
|
9
9
|
rfam-folder = rnapolis.rfam_folder:main
|
10
|
+
splitter = rnapolis.splitter:main
|
10
11
|
transformer = rnapolis.transformer:main
|
11
12
|
unifier = rnapolis.unifier:main
|
File without changes
|
File without changes
|
File without changes
|