rdrpcatch 0.0.6__py3-none-any.whl → 0.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdrpcatch/cli/args.py +2 -2
- rdrpcatch/rdrpcatch_wrapper.py +14 -6
- {rdrpcatch-0.0.6.dist-info → rdrpcatch-0.0.7.dist-info}/METADATA +21 -21
- {rdrpcatch-0.0.6.dist-info → rdrpcatch-0.0.7.dist-info}/RECORD +7 -7
- {rdrpcatch-0.0.6.dist-info → rdrpcatch-0.0.7.dist-info}/WHEEL +0 -0
- {rdrpcatch-0.0.6.dist-info → rdrpcatch-0.0.7.dist-info}/entry_points.txt +0 -0
- {rdrpcatch-0.0.6.dist-info → rdrpcatch-0.0.7.dist-info}/licenses/LICENSE +0 -0
rdrpcatch/cli/args.py
CHANGED
|
@@ -26,7 +26,7 @@ def parse_comma_separated_options(ctx, param, value):
|
|
|
26
26
|
return ['all']
|
|
27
27
|
|
|
28
28
|
allowed_choices = ['RVMT', 'NeoRdRp', 'NeoRdRp.2.1', 'TSA_Olendraite_fam', 'TSA_Olendraite_gen', 'RDRP-scan',
|
|
29
|
-
'
|
|
29
|
+
'Lucaprot_HMM, Zayed_HMM', 'all']
|
|
30
30
|
lower_choices = [choice.lower() for choice in allowed_choices]
|
|
31
31
|
options = value.split(',')
|
|
32
32
|
lower_options = [option.lower() for option in options]
|
|
@@ -73,7 +73,7 @@ def cli():
|
|
|
73
73
|
callback=parse_comma_separated_options,
|
|
74
74
|
default="all",
|
|
75
75
|
help="Comma-separated list of databases to search against. Valid options: RVMT, NeoRdRp, NeoRdRp.2.1,"
|
|
76
|
-
" TSA_Olendraite_fam, TSA_Olendraite_gen, RDRP-scan,
|
|
76
|
+
" TSA_Olendraite_fam, TSA_Olendraite_gen, RDRP-scan,Lucaprot_HMM, Zayed_HMM, all")
|
|
77
77
|
@click.option("--custom-dbs",
|
|
78
78
|
help="Path to directory containing custom MSAs/pHMM files to use as additional databases",
|
|
79
79
|
type=click.Path(exists=True, path_type=Path))
|
rdrpcatch/rdrpcatch_wrapper.py
CHANGED
|
@@ -170,7 +170,7 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
170
170
|
|
|
171
171
|
logger.loud_log("Fetching HMM databases...")
|
|
172
172
|
|
|
173
|
-
## Fetch HMM databases- RVMT, NeoRdRp, NeoRdRp.2.1, TSA_Olendraite, RDRP-scan,
|
|
173
|
+
## Fetch HMM databases- RVMT, NeoRdRp, NeoRdRp.2.1, TSA_Olendraite, RDRP-scan, Lucaprot_HMM,Zayed_HMM
|
|
174
174
|
rvmt_hmm_db = fetch_dbs.db_fetcher(db_dir).fetch_hmm_db_path("RVMT")
|
|
175
175
|
if verbose:
|
|
176
176
|
logger.loud_log(f"RVMT HMM database fetched from: {rvmt_hmm_db}")
|
|
@@ -202,19 +202,24 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
202
202
|
logger.loud_log(f"RDRP-scan HMM database fetched from: {rdrpscan_hmm_db}")
|
|
203
203
|
else:
|
|
204
204
|
logger.silent_log(f"RDRP-scan HMM database fetched from: {rdrpscan_hmm_db}")
|
|
205
|
-
lucaprot_hmm_db = fetch_dbs.db_fetcher(db_dir).fetch_hmm_db_path("
|
|
205
|
+
lucaprot_hmm_db = fetch_dbs.db_fetcher(db_dir).fetch_hmm_db_path("Lucaprot_HMM")
|
|
206
206
|
if verbose:
|
|
207
207
|
logger.loud_log(f"Lucaprot HMM database fetched from: {lucaprot_hmm_db}")
|
|
208
208
|
else:
|
|
209
209
|
logger.silent_log(f"Lucaprot HMM database fetched from: {lucaprot_hmm_db}")
|
|
210
|
+
zayed_hmm_db = fetch_dbs.db_fetcher(db_dir).fetch_hmm_db_path("Zayed_HMM")
|
|
211
|
+
if verbose:
|
|
212
|
+
logger.loud_log(f"Zayed HMM database fetched from: {zayed_hmm_db}")
|
|
213
|
+
else:
|
|
214
|
+
logger.silent_log(f"Zayed HMM database fetched from: {zayed_hmm_db}")
|
|
210
215
|
|
|
211
216
|
db_name_list = []
|
|
212
217
|
db_path_list = []
|
|
213
218
|
|
|
214
219
|
## Set up HMM databases
|
|
215
220
|
if db_options == ['all']:
|
|
216
|
-
db_name_list = ["RVMT", "NeoRdRp", "NeoRdRp.2.1", "TSA_Olendraite_fam","TSA_Olendraite_gen", "RDRP-scan", "
|
|
217
|
-
db_path_list = [rvmt_hmm_db, neordrp_hmm_db, neordrp_2_hmm_db, tsa_olen_fam_hmm_db,tsa_olen_gen_hmm_db, rdrpscan_hmm_db, lucaprot_hmm_db]
|
|
221
|
+
db_name_list = ["RVMT", "NeoRdRp", "NeoRdRp.2.1", "TSA_Olendraite_fam","TSA_Olendraite_gen", "RDRP-scan", "Lucaprot_HMM", "Zayed_HMM"]
|
|
222
|
+
db_path_list = [rvmt_hmm_db, neordrp_hmm_db, neordrp_2_hmm_db, tsa_olen_fam_hmm_db,tsa_olen_gen_hmm_db, rdrpscan_hmm_db, lucaprot_hmm_db, zayed_hmm_db]
|
|
218
223
|
|
|
219
224
|
else:
|
|
220
225
|
for db in db_options:
|
|
@@ -236,9 +241,12 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
236
241
|
elif db == "RDRP-scan".lower():
|
|
237
242
|
db_name_list.append("RDRP-scan")
|
|
238
243
|
db_path_list.append(rdrpscan_hmm_db)
|
|
239
|
-
elif db == "
|
|
240
|
-
db_name_list.append("
|
|
244
|
+
elif db == "Lucaprot_HMM".lower():
|
|
245
|
+
db_name_list.append("Lucaprot_HMM")
|
|
241
246
|
db_path_list.append(lucaprot_hmm_db)
|
|
247
|
+
elif db == "Zayed_HMM".lower():
|
|
248
|
+
db_name_list.append("Zayed_HMM")
|
|
249
|
+
db_path_list.append(zayed_hmm_db)
|
|
242
250
|
else:
|
|
243
251
|
raise Exception(f"Invalid database option: {db}")
|
|
244
252
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rdrpcatch
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.7
|
|
4
4
|
Dynamic: Summary
|
|
5
5
|
Project-URL: Home, https://github.com/dimitris-karapliafis/RdRpCATCH
|
|
6
6
|
Project-URL: Source, https://github.com/dimitris-karapliafis/RdRpCATCH
|
|
@@ -36,7 +36,7 @@ that were positive for each sequence across all pHMM databases, and taxonomic in
|
|
|
36
36
|
|
|
37
37
|
** The tool has been modified to use [rolypoly](https://code.jgi.doe.gov/UNeri/rolypoly) code/approaches **
|
|
38
38
|
|
|
39
|
-

|
|
40
40
|
|
|
41
41
|
### Supported databases
|
|
42
42
|
- NeoRdRp <sup>1</sup> : 1182 pHMMs
|
|
@@ -133,25 +133,25 @@ Command to download pre-compiled databases from Zenodo. If the databases are alr
|
|
|
133
133
|
### rdrpcatch scan:
|
|
134
134
|
Search a given input using selected RdRp databases.
|
|
135
135
|
|
|
136
|
-
| Argument | Short Flag | Type | Description
|
|
137
|
-
|
|
138
|
-
| `--input` | `-i` | FILE | Path to the input FASTA file. [required]
|
|
139
|
-
| `--output` | `-o` | DIRECTORY | Path to the output directory. [required]
|
|
140
|
-
| `--db_dir` | `-db_dir` | PATH | Path to the directory containing RdRpCATCH databases. [required]
|
|
141
|
-
| `--db_options` | `-dbs` | TEXT | Comma-separated list of databases to search against. Valid options: RVMT, NeoRdRp, NeoRdRp.2.1, TSA_Olendraite_fam, TSA_Olendraite_gen, RDRP-scan,
|
|
142
|
-
| `--custom-dbs` | | PATH | Path to directory containing custom MSAs/pHMM files to use as additional databases
|
|
143
|
-
| `--seq_type` | `-seq_type` | TEXT | Type of sequence to search against: (prot,nuc) Default: unknown
|
|
144
|
-
| `--verbose` | `-v` | FLAG | Print verbose output.
|
|
145
|
-
| `--evalue` | `-e` | FLOAT | E-value threshold for HMMsearch. (default: 1e-5)
|
|
146
|
-
| `--incevalue` | `-incE` | FLOAT | Inclusion E-value threshold for HMMsearch. (default: 1e-5)
|
|
147
|
-
| `--domevalue` | `-domE` | FLOAT | Domain E-value threshold for HMMsearch. (default: 1e-5)
|
|
148
|
-
| `--incdomevalue` | `-incdomE` | FLOAT | Inclusion domain E-value threshold for HMMsearch. (default: 1e-5)
|
|
149
|
-
| `--zvalue` | `-z` | INTEGER | Number of sequences to search against. (default: 1000000)
|
|
150
|
-
| `--cpus` | `-cpus` | INTEGER | Number of CPUs to use for HMMsearch. (default: 1)
|
|
151
|
-
| `--length_thr` | `-length_thr` | INTEGER | Minimum length threshold for seqkit seq. (default: 400)
|
|
152
|
-
| `--gen_code` | `-gen_code` | INTEGER | Genetic code to use for translation. (default: 1)
|
|
153
|
-
| `--bundle` | `-bundle` | |
|
|
154
|
-
| `--keep_tmp` | `-keep_tmp` | | Keep the temporary files generated during the analysis. (default: False)
|
|
136
|
+
| Argument | Short Flag | Type | Description |
|
|
137
|
+
|----------|------------|------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
|
138
|
+
| `--input` | `-i` | FILE | Path to the input FASTA file. [required] |
|
|
139
|
+
| `--output` | `-o` | DIRECTORY | Path to the output directory. [required] |
|
|
140
|
+
| `--db_dir` | `-db_dir` | PATH | Path to the directory containing RdRpCATCH databases. [required] |
|
|
141
|
+
| `--db_options` | `-dbs` | TEXT | Comma-separated list of databases to search against. Valid options: RVMT, NeoRdRp, NeoRdRp.2.1, TSA_Olendraite_fam, TSA_Olendraite_gen, RDRP-scan, Lucaprot_HMM,Zayed_HMM, all |
|
|
142
|
+
| `--custom-dbs` | | PATH | Path to directory containing custom MSAs/pHMM files to use as additional databases |
|
|
143
|
+
| `--seq_type` | `-seq_type` | TEXT | Type of sequence to search against: (prot,nuc) Default: unknown |
|
|
144
|
+
| `--verbose` | `-v` | FLAG | Print verbose output. |
|
|
145
|
+
| `--evalue` | `-e` | FLOAT | E-value threshold for HMMsearch. (default: 1e-5) |
|
|
146
|
+
| `--incevalue` | `-incE` | FLOAT | Inclusion E-value threshold for HMMsearch. (default: 1e-5) |
|
|
147
|
+
| `--domevalue` | `-domE` | FLOAT | Domain E-value threshold for HMMsearch. (default: 1e-5) |
|
|
148
|
+
| `--incdomevalue` | `-incdomE` | FLOAT | Inclusion domain E-value threshold for HMMsearch. (default: 1e-5) |
|
|
149
|
+
| `--zvalue` | `-z` | INTEGER | Number of sequences to search against. (default: 1000000) |
|
|
150
|
+
| `--cpus` | `-cpus` | INTEGER | Number of CPUs to use for HMMsearch. (default: 1) |
|
|
151
|
+
| `--length_thr` | `-length_thr` | INTEGER | Minimum length threshold for seqkit seq. (default: 400) |
|
|
152
|
+
| `--gen_code` | `-gen_code` | INTEGER | Genetic code to use for translation. (default: 1) |
|
|
153
|
+
| `--bundle` | `-bundle` | | Bundle the output files into a single archive. (default: False) |
|
|
154
|
+
| `--keep_tmp` | `-keep_tmp` | | Keep the temporary files generated during the analysis. (default: False) |
|
|
155
155
|
|
|
156
156
|
|
|
157
157
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
rdrpcatch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
rdrpcatch/rdrpcatch_wrapper.py,sha256=
|
|
2
|
+
rdrpcatch/rdrpcatch_wrapper.py,sha256=X-U0CKQWHwybLIdWvaFZGEj-v0oTUnBv2PbiLAdu8s4,31573
|
|
3
3
|
rdrpcatch/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
rdrpcatch/cli/args.py,sha256=
|
|
4
|
+
rdrpcatch/cli/args.py,sha256=DX7gfESWi4j1CNpALAEG45JV_b5KkU1LAJj2FDb8J5g,15963
|
|
5
5
|
rdrpcatch/rdrpcatch_scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
rdrpcatch/rdrpcatch_scripts/fetch_dbs.py,sha256=e9ShColfLgBvWSZpGOvY3zKhEgIg3rw1IIV__KX7N-g,11054
|
|
7
7
|
rdrpcatch/rdrpcatch_scripts/format_pyhmmer_out.py,sha256=2_ERXFQK2lpVReWl0jwQdnKIObv_zq07uFJOzGsTHlo,25025
|
|
@@ -12,8 +12,8 @@ rdrpcatch/rdrpcatch_scripts/plot.py,sha256=Y1mZL7rkKHFKEs2D7T2Qj2kpfiORmFwRLq1LY
|
|
|
12
12
|
rdrpcatch/rdrpcatch_scripts/run_pyhmmer.py,sha256=9zcMzaIwQ4_-NgYzG9kejxOBaDi-gbzaqpvZti8ZXA4,9008
|
|
13
13
|
rdrpcatch/rdrpcatch_scripts/run_seqkit.py,sha256=5y7DtJ6NLa4sRoBQOcjBfczKlqG_LibNrEqNmKLrHu0,4361
|
|
14
14
|
rdrpcatch/rdrpcatch_scripts/utils.py,sha256=jvpyPxchAMn6BeLV7HOFECSY_a3nbkxDBBL8tunmM8A,16938
|
|
15
|
-
rdrpcatch-0.0.
|
|
16
|
-
rdrpcatch-0.0.
|
|
17
|
-
rdrpcatch-0.0.
|
|
18
|
-
rdrpcatch-0.0.
|
|
19
|
-
rdrpcatch-0.0.
|
|
15
|
+
rdrpcatch-0.0.7.dist-info/METADATA,sha256=BU-V7TAZcYQC5L3KuX_N_iH_l7Q77go7ZF9-1jYRrQE,16219
|
|
16
|
+
rdrpcatch-0.0.7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
17
|
+
rdrpcatch-0.0.7.dist-info/entry_points.txt,sha256=uiyoPO41jNz_KVOt2JdPak9NbVei-D8WQ6saMeMBFpE,53
|
|
18
|
+
rdrpcatch-0.0.7.dist-info/licenses/LICENSE,sha256=3jm5vKRMIaiETEFfNN34-oyWUShxZtmDmL38PNAwlUI,1120
|
|
19
|
+
rdrpcatch-0.0.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|