RNApolis 0.2.1__tar.gz → 0.3.1__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (30) hide show
  1. {RNApolis-0.2.1 → RNApolis-0.3.1}/PKG-INFO +39 -1
  2. RNApolis-0.2.1/src/RNApolis.egg-info/PKG-INFO → RNApolis-0.3.1/README.md +35 -25
  3. {RNApolis-0.2.1 → RNApolis-0.3.1}/setup.py +5 -1
  4. RNApolis-0.2.1/README.md → RNApolis-0.3.1/src/RNApolis.egg-info/PKG-INFO +63 -0
  5. {RNApolis-0.2.1 → RNApolis-0.3.1}/src/RNApolis.egg-info/SOURCES.txt +1 -0
  6. {RNApolis-0.2.1 → RNApolis-0.3.1}/src/RNApolis.egg-info/entry_points.txt +1 -0
  7. {RNApolis-0.2.1 → RNApolis-0.3.1}/src/RNApolis.egg-info/requires.txt +3 -0
  8. RNApolis-0.3.1/src/rnapolis/rfam_folder.py +294 -0
  9. {RNApolis-0.2.1 → RNApolis-0.3.1}/LICENSE +0 -0
  10. {RNApolis-0.2.1 → RNApolis-0.3.1}/pyproject.toml +0 -0
  11. {RNApolis-0.2.1 → RNApolis-0.3.1}/setup.cfg +0 -0
  12. {RNApolis-0.2.1 → RNApolis-0.3.1}/src/RNApolis.egg-info/dependency_links.txt +0 -0
  13. {RNApolis-0.2.1 → RNApolis-0.3.1}/src/RNApolis.egg-info/top_level.txt +0 -0
  14. {RNApolis-0.2.1 → RNApolis-0.3.1}/src/rnapolis/annotator.py +0 -0
  15. {RNApolis-0.2.1 → RNApolis-0.3.1}/src/rnapolis/clashfinder.py +0 -0
  16. {RNApolis-0.2.1 → RNApolis-0.3.1}/src/rnapolis/common.py +0 -0
  17. {RNApolis-0.2.1 → RNApolis-0.3.1}/src/rnapolis/metareader.py +0 -0
  18. {RNApolis-0.2.1 → RNApolis-0.3.1}/src/rnapolis/molecule_filter.py +0 -0
  19. {RNApolis-0.2.1 → RNApolis-0.3.1}/src/rnapolis/motif_extractor.py +0 -0
  20. {RNApolis-0.2.1 → RNApolis-0.3.1}/src/rnapolis/parser.py +0 -0
  21. {RNApolis-0.2.1 → RNApolis-0.3.1}/src/rnapolis/tertiary.py +0 -0
  22. {RNApolis-0.2.1 → RNApolis-0.3.1}/src/rnapolis/transformer.py +0 -0
  23. {RNApolis-0.2.1 → RNApolis-0.3.1}/src/rnapolis/util.py +0 -0
  24. {RNApolis-0.2.1 → RNApolis-0.3.1}/tests/test_annotator.py +0 -0
  25. {RNApolis-0.2.1 → RNApolis-0.3.1}/tests/test_bugfixes.py +0 -0
  26. {RNApolis-0.2.1 → RNApolis-0.3.1}/tests/test_common.py +0 -0
  27. {RNApolis-0.2.1 → RNApolis-0.3.1}/tests/test_metareader.py +0 -0
  28. {RNApolis-0.2.1 → RNApolis-0.3.1}/tests/test_parser.py +0 -0
  29. {RNApolis-0.2.1 → RNApolis-0.3.1}/tests/test_quadruplexes.py +0 -0
  30. {RNApolis-0.2.1 → RNApolis-0.3.1}/tests/test_tertiary.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: RNApolis
3
- Version: 0.2.1
3
+ Version: 0.3.1
4
4
  Summary: A Python library containing RNA-related bioinformatics functions and classes
5
5
  Home-page: https://github.com/tzok/rnapolis-py
6
6
  Author: Tomasz Zok
@@ -15,13 +15,16 @@ Classifier: Programming Language :: Python :: 3
15
15
  Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
16
16
  Description-Content-Type: text/markdown
17
17
  License-File: LICENSE
18
+ Requires-Dist: appdirs
18
19
  Requires-Dist: graphviz
19
20
  Requires-Dist: mmcif
20
21
  Requires-Dist: numpy
21
22
  Requires-Dist: ordered-set
22
23
  Requires-Dist: orjson
23
24
  Requires-Dist: pulp
25
+ Requires-Dist: requests
24
26
  Requires-Dist: scipy
27
+ Requires-Dist: viennarna
25
28
 
26
29
  # RNApolis
27
30
 
@@ -2133,3 +2136,38 @@ To use `transformer.py`, specify the path to your input mmCIF file and the desir
2133
2136
  - `--copy-to COPY_TO`: Indicate the column name to copy data to (e.g., `auth_asym_id`).
2134
2137
 
2135
2138
  For additional guidance, use `-h` or `--help`.
2139
+
2140
+ ### `rfam-folder`
2141
+
2142
+ `rfam-folder` is a command-line tool for generating consensus secondary structures for RNA sequences. This tool can process a single RNA sequence or multiple sequences from a FASTA file. It offers the flexibility to specify an Rfam family for targeted analysis, control the folding of the secondary structure, and set a limit on the number of structures generated.
2143
+
2144
+ **Important!** You need to have [Infernal software](http://eddylab.org/infernal/) installed for this script to run.
2145
+
2146
+ #### Usage:
2147
+
2148
+ The general usage pattern for rfam_folder.py is as follows:
2149
+
2150
+ ```bash
2151
+ usage: rfam_folder.py [-h] [--family FAMILY] [--no-fold] [--count COUNT] sequence
2152
+ ```
2153
+
2154
+ Positional Arguments:
2155
+
2156
+ - sequence: An RNA sequence directly or a path to a FASTA file containing one or more sequences.
2157
+
2158
+ Options:
2159
+
2160
+ - `--family FAMILY`: (Optional) Specify the name of the Rfam family to use. If not given, the entire Rfam database is searched for the sequence.
2161
+ - `--no-fold`: (Optional) Disable folding of the consensus secondary structure by RNAfold with constraints.
2162
+ - `--count COUNT`: (Optional) Set the maximum number of consensus secondary structures to generate per sequence, with a default of 1.
2163
+
2164
+ #### Examples
2165
+
2166
+ Generate a consensus structure for a single RNA sequence given specific Rfam family:
2167
+
2168
+ ```
2169
+ $ rfam-folder AUCGUUCACCUCAUAAAUUGAGUGAGACGGAAGUAGGUUAAAACCGAAGGAACGCAGU --family RF01739
2170
+ >header
2171
+ AUCGUUCACCUCAUAAAUUGAGUGAGACGGAAGUAGGUUAAAACCGAAGGAACGCAGU
2172
+ ..(((((..(((((.......)))))((....)).(((....)))....)))))....
2173
+ ```
@@ -1,28 +1,3 @@
1
- Metadata-Version: 2.1
2
- Name: RNApolis
3
- Version: 0.2.1
4
- Summary: A Python library containing RNA-related bioinformatics functions and classes
5
- Home-page: https://github.com/tzok/rnapolis-py
6
- Author: Tomasz Zok
7
- Author-email: tomasz.zok@cs.put.poznan.pl
8
- Project-URL: Bug Tracker, https://github.com/tzok/rnapolis-py/issues
9
- Classifier: Development Status :: 5 - Production/Stable
10
- Classifier: Environment :: Console
11
- Classifier: Intended Audience :: Science/Research
12
- Classifier: License :: OSI Approved :: MIT License
13
- Classifier: Operating System :: OS Independent
14
- Classifier: Programming Language :: Python :: 3
15
- Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
16
- Description-Content-Type: text/markdown
17
- License-File: LICENSE
18
- Requires-Dist: graphviz
19
- Requires-Dist: mmcif
20
- Requires-Dist: numpy
21
- Requires-Dist: ordered-set
22
- Requires-Dist: orjson
23
- Requires-Dist: pulp
24
- Requires-Dist: scipy
25
-
26
1
  # RNApolis
27
2
 
28
3
  A Python library and utilities containing RNA-related bioinformatics functions and classes.
@@ -2133,3 +2108,38 @@ To use `transformer.py`, specify the path to your input mmCIF file and the desir
2133
2108
  - `--copy-to COPY_TO`: Indicate the column name to copy data to (e.g., `auth_asym_id`).
2134
2109
 
2135
2110
  For additional guidance, use `-h` or `--help`.
2111
+
2112
+ ### `rfam-folder`
2113
+
2114
+ `rfam-folder` is a command-line tool for generating consensus secondary structures for RNA sequences. This tool can process a single RNA sequence or multiple sequences from a FASTA file. It offers the flexibility to specify an Rfam family for targeted analysis, control the folding of the secondary structure, and set a limit on the number of structures generated.
2115
+
2116
+ **Important!** You need to have [Infernal software](http://eddylab.org/infernal/) installed for this script to run.
2117
+
2118
+ #### Usage:
2119
+
2120
+ The general usage pattern for rfam_folder.py is as follows:
2121
+
2122
+ ```bash
2123
+ usage: rfam_folder.py [-h] [--family FAMILY] [--no-fold] [--count COUNT] sequence
2124
+ ```
2125
+
2126
+ Positional Arguments:
2127
+
2128
+ - sequence: An RNA sequence directly or a path to a FASTA file containing one or more sequences.
2129
+
2130
+ Options:
2131
+
2132
+ - `--family FAMILY`: (Optional) Specify the name of the Rfam family to use. If not given, the entire Rfam database is searched for the sequence.
2133
+ - `--no-fold`: (Optional) Disable folding of the consensus secondary structure by RNAfold with constraints.
2134
+ - `--count COUNT`: (Optional) Set the maximum number of consensus secondary structures to generate per sequence, with a default of 1.
2135
+
2136
+ #### Examples
2137
+
2138
+ Generate a consensus structure for a single RNA sequence given specific Rfam family:
2139
+
2140
+ ```
2141
+ $ rfam-folder AUCGUUCACCUCAUAAAUUGAGUGAGACGGAAGUAGGUUAAAACCGAAGGAACGCAGU --family RF01739
2142
+ >header
2143
+ AUCGUUCACCUCAUAAAUUGAGUGAGACGGAAGUAGGUUAAAACCGAAGGAACGCAGU
2144
+ ..(((((..(((((.......)))))((....)).(((....)))....)))))....
2145
+ ```
@@ -5,7 +5,7 @@ with open("README.md") as f:
5
5
 
6
6
  setup(
7
7
  name="RNApolis",
8
- version="0.2.1",
8
+ version="0.3.1",
9
9
  packages=["rnapolis"],
10
10
  package_dir={"": "src"},
11
11
  author="Tomasz Zok",
@@ -32,15 +32,19 @@ setup(
32
32
  "molecule-filter=rnapolis.molecule_filter:main",
33
33
  "motif-extractor=rnapolis.motif_extractor:main",
34
34
  "transformer=rnapolis.transformer:main",
35
+ "rfam-folder=rnapolis.rfam_folder:main",
35
36
  ]
36
37
  },
37
38
  install_requires=[
39
+ "appdirs",
38
40
  "graphviz",
39
41
  "mmcif",
40
42
  "numpy",
41
43
  "ordered-set",
42
44
  "orjson",
43
45
  "pulp",
46
+ "requests",
44
47
  "scipy",
48
+ "viennarna",
45
49
  ],
46
50
  )
@@ -1,3 +1,31 @@
1
+ Metadata-Version: 2.1
2
+ Name: RNApolis
3
+ Version: 0.3.1
4
+ Summary: A Python library containing RNA-related bioinformatics functions and classes
5
+ Home-page: https://github.com/tzok/rnapolis-py
6
+ Author: Tomasz Zok
7
+ Author-email: tomasz.zok@cs.put.poznan.pl
8
+ Project-URL: Bug Tracker, https://github.com/tzok/rnapolis-py/issues
9
+ Classifier: Development Status :: 5 - Production/Stable
10
+ Classifier: Environment :: Console
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
16
+ Description-Content-Type: text/markdown
17
+ License-File: LICENSE
18
+ Requires-Dist: appdirs
19
+ Requires-Dist: graphviz
20
+ Requires-Dist: mmcif
21
+ Requires-Dist: numpy
22
+ Requires-Dist: ordered-set
23
+ Requires-Dist: orjson
24
+ Requires-Dist: pulp
25
+ Requires-Dist: requests
26
+ Requires-Dist: scipy
27
+ Requires-Dist: viennarna
28
+
1
29
  # RNApolis
2
30
 
3
31
  A Python library and utilities containing RNA-related bioinformatics functions and classes.
@@ -2108,3 +2136,38 @@ To use `transformer.py`, specify the path to your input mmCIF file and the desir
2108
2136
  - `--copy-to COPY_TO`: Indicate the column name to copy data to (e.g., `auth_asym_id`).
2109
2137
 
2110
2138
  For additional guidance, use `-h` or `--help`.
2139
+
2140
+ ### `rfam-folder`
2141
+
2142
+ `rfam-folder` is a command-line tool for generating consensus secondary structures for RNA sequences. This tool can process a single RNA sequence or multiple sequences from a FASTA file. It offers the flexibility to specify an Rfam family for targeted analysis, control the folding of the secondary structure, and set a limit on the number of structures generated.
2143
+
2144
+ **Important!** You need to have [Infernal software](http://eddylab.org/infernal/) installed for this script to run.
2145
+
2146
+ #### Usage:
2147
+
2148
+ The general usage pattern for rfam_folder.py is as follows:
2149
+
2150
+ ```bash
2151
+ usage: rfam_folder.py [-h] [--family FAMILY] [--no-fold] [--count COUNT] sequence
2152
+ ```
2153
+
2154
+ Positional Arguments:
2155
+
2156
+ - sequence: An RNA sequence directly or a path to a FASTA file containing one or more sequences.
2157
+
2158
+ Options:
2159
+
2160
+ - `--family FAMILY`: (Optional) Specify the name of the Rfam family to use. If not given, the entire Rfam database is searched for the sequence.
2161
+ - `--no-fold`: (Optional) Disable folding of the consensus secondary structure by RNAfold with constraints.
2162
+ - `--count COUNT`: (Optional) Set the maximum number of consensus secondary structures to generate per sequence, with a default of 1.
2163
+
2164
+ #### Examples
2165
+
2166
+ Generate a consensus structure for a single RNA sequence given specific Rfam family:
2167
+
2168
+ ```
2169
+ $ rfam-folder AUCGUUCACCUCAUAAAUUGAGUGAGACGGAAGUAGGUUAAAACCGAAGGAACGCAGU --family RF01739
2170
+ >header
2171
+ AUCGUUCACCUCAUAAAUUGAGUGAGACGGAAGUAGGUUAAAACCGAAGGAACGCAGU
2172
+ ..(((((..(((((.......)))))((....)).(((....)))....)))))....
2173
+ ```
@@ -15,6 +15,7 @@ src/rnapolis/metareader.py
15
15
  src/rnapolis/molecule_filter.py
16
16
  src/rnapolis/motif_extractor.py
17
17
  src/rnapolis/parser.py
18
+ src/rnapolis/rfam_folder.py
18
19
  src/rnapolis/tertiary.py
19
20
  src/rnapolis/transformer.py
20
21
  src/rnapolis/util.py
@@ -4,4 +4,5 @@ clashfinder = rnapolis.clashfinder:main
4
4
  metareader = rnapolis.metareader:main
5
5
  molecule-filter = rnapolis.molecule_filter:main
6
6
  motif-extractor = rnapolis.motif_extractor:main
7
+ rfam-folder = rnapolis.rfam_folder:main
7
8
  transformer = rnapolis.transformer:main
@@ -1,7 +1,10 @@
1
+ appdirs
1
2
  graphviz
2
3
  mmcif
3
4
  numpy
4
5
  ordered-set
5
6
  orjson
6
7
  pulp
8
+ requests
7
9
  scipy
10
+ viennarna
@@ -0,0 +1,294 @@
1
+ #! /usr/bin/env python
2
+ import argparse
3
+ import gzip
4
+ import os
5
+ import re
6
+ import shutil
7
+ import subprocess
8
+ import tempfile
9
+ from typing import List
10
+
11
+ import appdirs
12
+ import requests
13
+ import RNA
14
+
15
+ from rnapolis.common import BpSeq, DotBracket
16
+
17
+ COMBINED_CM = "https://ftp.ebi.ac.uk/pub/databases/Rfam/CURRENT/Rfam.cm.gz"
18
+ SEPARATE_CM = "https://ftp.ebi.ac.uk/pub/databases/Rfam/CURRENT/Rfam.tar.gz"
19
+
20
+
21
+ class FASTA:
22
+ header: str
23
+ sequence: str
24
+
25
+ def __init__(self, header: str, sequence: str):
26
+ self.header = header
27
+ self.sequence = sequence
28
+
29
+ def __str__(self):
30
+ return f">{self.header}\n{self.sequence}"
31
+
32
+
33
+ def parse_fasta(fasta_path: str) -> List[FASTA]:
34
+ """
35
+ Read FASTA entries from a file.
36
+
37
+ Args:
38
+ fasta_path (str): The path to the FASTA file.
39
+
40
+ Returns:
41
+ List[Fasta]: A list of FASTA objects representing the entries in the file.
42
+ """
43
+ with open(fasta_path) as f:
44
+ content = f.read()
45
+
46
+ entries = content.split(">")[1:]
47
+ fastas = []
48
+
49
+ for entry in entries:
50
+ lines = entry.splitlines()
51
+ header = lines[0]
52
+ sequence = "".join(lines[1:])
53
+ fastas.append(FASTA(header, sequence))
54
+
55
+ return fastas
56
+
57
+
58
+ def ensure_cm(family: str = None):
59
+ if not os.path.exists(appdirs.user_data_dir("rnapolis")):
60
+ os.makedirs(appdirs.user_data_dir("rnapolis"))
61
+
62
+ if family is None:
63
+ cm_gz_path = appdirs.user_data_dir("rnapolis") + "/Rfam.cm.gz"
64
+ cm_path = appdirs.user_data_dir("rnapolis") + "/Rfam.cm"
65
+
66
+ if not os.path.exists(cm_gz_path):
67
+ response = requests.get(COMBINED_CM)
68
+
69
+ with open(cm_gz_path, "wb") as f:
70
+ f.write(response.content)
71
+
72
+ if not os.path.exists(cm_path):
73
+ with gzip.open(cm_gz_path, "rb") as f_in, open(cm_path, "wb") as f_out:
74
+ f_out.write(f_in.read())
75
+ else:
76
+ cm_gz_path = appdirs.user_data_dir("rnapolis") + "/Rfam.tar.gz"
77
+ cm_path = appdirs.user_data_dir("rnapolis") + f"/{family}.cm"
78
+
79
+ if not os.path.exists(cm_gz_path):
80
+ response = requests.get(SEPARATE_CM)
81
+
82
+ with open(cm_gz_path, "wb") as f:
83
+ f.write(response.content)
84
+
85
+ if not os.path.exists(cm_path):
86
+ shutil.unpack_archive(cm_gz_path, appdirs.user_data_dir("rnapolis"))
87
+
88
+ if not os.path.exists(cm_path):
89
+ raise RuntimeError(
90
+ f"Failed to find covariance model for {family} from Rfam."
91
+ )
92
+
93
+ if not os.path.exists(cm_path + ".i1m"):
94
+ subprocess.run(["cmpress", cm_path], check=True, capture_output=True)
95
+
96
+ return cm_path
97
+
98
+
99
+ def analyze_cmsearch(cmsearch: str, fasta: FASTA, count: int = 1):
100
+ result = []
101
+ lines = cmsearch.splitlines()
102
+ begins = [i for i, line in enumerate(lines) if line.startswith(">>")]
103
+
104
+ for i, begin in enumerate(begins):
105
+ nc_index, cs_index = None, None
106
+
107
+ for j in range(begin, begins[i + 1] if i + 1 < len(begins) else len(lines)):
108
+ if lines[j].endswith(" NC"):
109
+ nc_index = j
110
+ if lines[j].endswith(" CS"):
111
+ cs_index = j
112
+
113
+ assert len(lines[cs_index].split()) == 2
114
+
115
+ structure = lines[cs_index]
116
+ sequence = lines[cs_index + 3]
117
+
118
+ match = re.match(r"\s*.+?\s+(\d+)\s+.+\s+(\d+)", sequence)
119
+ assert match is not None, sequence
120
+ first, last = int(match.group(1)), int(match.group(2))
121
+
122
+ for i in range(len(structure)):
123
+ if structure[i] != " ":
124
+ break
125
+ j = structure.find(" CS")
126
+
127
+ structure = structure[i:j]
128
+ sequence = sequence[i:j].upper()
129
+
130
+ # remove pairs which did not match to consensus
131
+ if nc_index is not None:
132
+ non_canonical = lines[nc_index][i:j]
133
+ for match in re.finditer(r"[v?]", non_canonical):
134
+ i = match.start()
135
+ structure = structure[:i] + "." + structure[i + 1 :]
136
+
137
+ # replace *[n]* placeholders
138
+ while True:
139
+ match = re.search(r"[<*]\[ *(\d+)\][*>]", sequence)
140
+
141
+ if match is None:
142
+ break
143
+
144
+ i, j = match.start(), match.end()
145
+ n = int(match.group(1))
146
+ sequence = sequence[:i] + "." * n + sequence[j:]
147
+ structure = structure[:i] + "." * n + structure[j:]
148
+
149
+ # replace gaps
150
+ while True:
151
+ match = re.search(r"-+", sequence)
152
+
153
+ if match is None:
154
+ break
155
+
156
+ i, j = match.start(), match.end()
157
+ sequence = sequence[:i] + sequence[j:]
158
+ structure = structure[:i] + structure[j:]
159
+
160
+ assert len(sequence) == len(structure)
161
+
162
+ if first > last:
163
+ # https://en.wikipedia.org/wiki/Nucleic_acid_notation
164
+ complementary = {
165
+ "A": "U",
166
+ "C": "G",
167
+ "G": "C",
168
+ "U": "A",
169
+ "W": "W",
170
+ "S": "S",
171
+ "M": "K",
172
+ "K": "M",
173
+ "R": "Y",
174
+ "Y": "R",
175
+ "B": "V",
176
+ "D": "H",
177
+ "H": "D",
178
+ "V": "B",
179
+ "N": "N",
180
+ ".": ".",
181
+ }
182
+ assert set(sequence) <= set(complementary.keys()), (
183
+ set(sequence) - set(complementary.keys()),
184
+ sequence,
185
+ )
186
+ sequence_comp = "".join([complementary[c] for c in sequence[::-1]])
187
+ match = re.search(sequence_comp, fasta.sequence)
188
+ assert match is not None, (sequence, fasta.sequence)
189
+ sequence_comp = match.group()
190
+ sequence = "".join([complementary[c] for c in sequence_comp[::-1]])
191
+ else:
192
+ match = re.search(sequence, fasta.sequence)
193
+ assert match is not None, (sequence, fasta.sequence)
194
+ sequence = match.group()
195
+
196
+ assert len(sequence) == len(structure)
197
+
198
+ structure = (
199
+ structure.replace(":", ".")
200
+ .replace("-", ".")
201
+ .replace("_", ".")
202
+ .replace(",", ".")
203
+ .replace("~", ".")
204
+ )
205
+ if set(structure) == {"."}:
206
+ continue
207
+
208
+ dot_bracket = DotBracket.from_string("N" * len(structure), structure)
209
+ structure = BpSeq.from_dotbracket(dot_bracket).dot_bracket.structure
210
+ result.append([sequence, structure])
211
+
212
+ if len(result) >= count:
213
+ break
214
+
215
+ return result
216
+
217
+
218
+ def generate_consensus_secondary_structure(
219
+ fasta: FASTA, family: str = None, fold: bool = True, count: int = 1
220
+ ):
221
+ if shutil.which("cmpress") is None or shutil.which("cmsearch") is None:
222
+ raise RuntimeError(
223
+ "cmpress/cmsearch not found in PATH, please install Infernal first."
224
+ )
225
+
226
+ cm_path = ensure_cm(family)
227
+
228
+ with tempfile.NamedTemporaryFile(suffix=".fa") as fin:
229
+ fin.write(str(fasta).encode())
230
+ fin.seek(0)
231
+
232
+ completed = subprocess.run(
233
+ ["cmsearch", "--notextw", cm_path, fin.name],
234
+ check=True,
235
+ capture_output=True,
236
+ )
237
+
238
+ results = analyze_cmsearch(completed.stdout.decode(), fasta, count)
239
+
240
+ if fold:
241
+ for i in range(len(results)):
242
+ RNAfold = RNA.fold_compound(results[i][0])
243
+ RNAfold.hc_add_from_db(results[i][1])
244
+ structure, _ = RNAfold.mfe()
245
+ results[i][1] = structure
246
+
247
+ return [
248
+ f">{fasta.header}\n{sequence}\n{structure}" for sequence, structure in results
249
+ ]
250
+
251
+
252
+ def main():
253
+ parser = argparse.ArgumentParser(
254
+ description="Generate consensus secondary structure for a given sequence. IMPORTANT! You need to have Infernal software installed to use this script."
255
+ )
256
+ parser.add_argument(
257
+ "sequence",
258
+ type=str,
259
+ help="an RNA sequence or a path to FASTA file, possibly containing multiple sequences",
260
+ )
261
+ parser.add_argument(
262
+ "--family",
263
+ type=str,
264
+ help="(optional) name of the Rfam family to use, if not given, the whole Rfam will be checked for the given sequence",
265
+ )
266
+ parser.add_argument(
267
+ "--no-fold",
268
+ action="store_true",
269
+ help="(optional) whether to disable folding of the consensus secondary structure by RNAfold with constraints",
270
+ )
271
+ parser.add_argument(
272
+ "--count",
273
+ type=int,
274
+ default=1,
275
+ help="(optional) maximum number of consensus secondary structures to generate per sequence, default is 1",
276
+ )
277
+
278
+ args = parser.parse_args()
279
+
280
+ if os.path.exists(args.sequence):
281
+ fastas = parse_fasta(args.sequence)
282
+ else:
283
+ fastas = [FASTA("header", args.sequence)]
284
+
285
+ for fasta in fastas:
286
+ results = generate_consensus_secondary_structure(
287
+ fasta, args.family, not args.no_fold, args.count
288
+ )
289
+ for result in results:
290
+ print(result)
291
+
292
+
293
+ if __name__ == "__main__":
294
+ main()
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes