rdrpcatch 0.0.6__py3-none-any.whl → 0.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rdrpcatch/cli/args.py CHANGED
@@ -26,7 +26,7 @@ def parse_comma_separated_options(ctx, param, value):
26
26
  return ['all']
27
27
 
28
28
  allowed_choices = ['RVMT', 'NeoRdRp', 'NeoRdRp.2.1', 'TSA_Olendraite_fam', 'TSA_Olendraite_gen', 'RDRP-scan',
29
- 'Lucaprot', 'all']
29
+ 'Lucaprot_HMM, Zayed_HMM', 'all']
30
30
  lower_choices = [choice.lower() for choice in allowed_choices]
31
31
  options = value.split(',')
32
32
  lower_options = [option.lower() for option in options]
@@ -73,7 +73,7 @@ def cli():
73
73
  callback=parse_comma_separated_options,
74
74
  default="all",
75
75
  help="Comma-separated list of databases to search against. Valid options: RVMT, NeoRdRp, NeoRdRp.2.1,"
76
- " TSA_Olendraite_fam, TSA_Olendraite_gen, RDRP-scan,Lucaprot, all")
76
+ " TSA_Olendraite_fam, TSA_Olendraite_gen, RDRP-scan,Lucaprot_HMM, Zayed_HMM, all")
77
77
  @click.option("--custom-dbs",
78
78
  help="Path to directory containing custom MSAs/pHMM files to use as additional databases",
79
79
  type=click.Path(exists=True, path_type=Path))
@@ -170,7 +170,7 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
170
170
 
171
171
  logger.loud_log("Fetching HMM databases...")
172
172
 
173
- ## Fetch HMM databases- RVMT, NeoRdRp, NeoRdRp.2.1, TSA_Olendraite, RDRP-scan, Lucaprot
173
+ ## Fetch HMM databases- RVMT, NeoRdRp, NeoRdRp.2.1, TSA_Olendraite, RDRP-scan, Lucaprot_HMM,Zayed_HMM
174
174
  rvmt_hmm_db = fetch_dbs.db_fetcher(db_dir).fetch_hmm_db_path("RVMT")
175
175
  if verbose:
176
176
  logger.loud_log(f"RVMT HMM database fetched from: {rvmt_hmm_db}")
@@ -202,19 +202,24 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
202
202
  logger.loud_log(f"RDRP-scan HMM database fetched from: {rdrpscan_hmm_db}")
203
203
  else:
204
204
  logger.silent_log(f"RDRP-scan HMM database fetched from: {rdrpscan_hmm_db}")
205
- lucaprot_hmm_db = fetch_dbs.db_fetcher(db_dir).fetch_hmm_db_path("Lucaprot")
205
+ lucaprot_hmm_db = fetch_dbs.db_fetcher(db_dir).fetch_hmm_db_path("Lucaprot_HMM")
206
206
  if verbose:
207
207
  logger.loud_log(f"Lucaprot HMM database fetched from: {lucaprot_hmm_db}")
208
208
  else:
209
209
  logger.silent_log(f"Lucaprot HMM database fetched from: {lucaprot_hmm_db}")
210
+ zayed_hmm_db = fetch_dbs.db_fetcher(db_dir).fetch_hmm_db_path("Zayed_HMM")
211
+ if verbose:
212
+ logger.loud_log(f"Zayed HMM database fetched from: {zayed_hmm_db}")
213
+ else:
214
+ logger.silent_log(f"Zayed HMM database fetched from: {zayed_hmm_db}")
210
215
 
211
216
  db_name_list = []
212
217
  db_path_list = []
213
218
 
214
219
  ## Set up HMM databases
215
220
  if db_options == ['all']:
216
- db_name_list = ["RVMT", "NeoRdRp", "NeoRdRp.2.1", "TSA_Olendraite_fam","TSA_Olendraite_gen", "RDRP-scan", "Lucaprot"]
217
- db_path_list = [rvmt_hmm_db, neordrp_hmm_db, neordrp_2_hmm_db, tsa_olen_fam_hmm_db,tsa_olen_gen_hmm_db, rdrpscan_hmm_db, lucaprot_hmm_db]
221
+ db_name_list = ["RVMT", "NeoRdRp", "NeoRdRp.2.1", "TSA_Olendraite_fam","TSA_Olendraite_gen", "RDRP-scan", "Lucaprot_HMM", "Zayed_HMM"]
222
+ db_path_list = [rvmt_hmm_db, neordrp_hmm_db, neordrp_2_hmm_db, tsa_olen_fam_hmm_db,tsa_olen_gen_hmm_db, rdrpscan_hmm_db, lucaprot_hmm_db, zayed_hmm_db]
218
223
 
219
224
  else:
220
225
  for db in db_options:
@@ -236,9 +241,12 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
236
241
  elif db == "RDRP-scan".lower():
237
242
  db_name_list.append("RDRP-scan")
238
243
  db_path_list.append(rdrpscan_hmm_db)
239
- elif db == "Lucaprot".lower():
240
- db_name_list.append("Lucaprot")
244
+ elif db == "Lucaprot_HMM".lower():
245
+ db_name_list.append("Lucaprot_HMM")
241
246
  db_path_list.append(lucaprot_hmm_db)
247
+ elif db == "Zayed_HMM".lower():
248
+ db_name_list.append("Zayed_HMM")
249
+ db_path_list.append(zayed_hmm_db)
242
250
  else:
243
251
  raise Exception(f"Invalid database option: {db}")
244
252
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rdrpcatch
3
- Version: 0.0.6
3
+ Version: 0.0.7
4
4
  Dynamic: Summary
5
5
  Project-URL: Home, https://github.com/dimitris-karapliafis/RdRpCATCH
6
6
  Project-URL: Source, https://github.com/dimitris-karapliafis/RdRpCATCH
@@ -36,7 +36,7 @@ that were positive for each sequence across all pHMM databases, and taxonomic in
36
36
 
37
37
  ** The tool has been modified to use [rolypoly](https://code.jgi.doe.gov/UNeri/rolypoly) code/approaches **
38
38
 
39
- ![rdrpcatch_flowchart_v0.png](images%2Frdrpcatch_flowchart_v0.png)
39
+ ![rdrpcatch_flowchart_v0.png](images%2Frdrpcatch_illustration.png)
40
40
 
41
41
  ### Supported databases
42
42
  - NeoRdRp <sup>1</sup> : 1182 pHMMs
@@ -133,25 +133,25 @@ Command to download pre-compiled databases from Zenodo. If the databases are alr
133
133
  ### rdrpcatch scan:
134
134
  Search a given input using selected RdRp databases.
135
135
 
136
- | Argument | Short Flag | Type | Description |
137
- |----------|------------|------|-------------|
138
- | `--input` | `-i` | FILE | Path to the input FASTA file. [required] |
139
- | `--output` | `-o` | DIRECTORY | Path to the output directory. [required] |
140
- | `--db_dir` | `-db_dir` | PATH | Path to the directory containing RdRpCATCH databases. [required] |
141
- | `--db_options` | `-dbs` | TEXT | Comma-separated list of databases to search against. Valid options: RVMT, NeoRdRp, NeoRdRp.2.1, TSA_Olendraite_fam, TSA_Olendraite_gen, RDRP-scan, Lucaprot, all |
142
- | `--custom-dbs` | | PATH | Path to directory containing custom MSAs/pHMM files to use as additional databases |
143
- | `--seq_type` | `-seq_type` | TEXT | Type of sequence to search against: (prot,nuc) Default: unknown |
144
- | `--verbose` | `-v` | FLAG | Print verbose output. |
145
- | `--evalue` | `-e` | FLOAT | E-value threshold for HMMsearch. (default: 1e-5) |
146
- | `--incevalue` | `-incE` | FLOAT | Inclusion E-value threshold for HMMsearch. (default: 1e-5) |
147
- | `--domevalue` | `-domE` | FLOAT | Domain E-value threshold for HMMsearch. (default: 1e-5) |
148
- | `--incdomevalue` | `-incdomE` | FLOAT | Inclusion domain E-value threshold for HMMsearch. (default: 1e-5) |
149
- | `--zvalue` | `-z` | INTEGER | Number of sequences to search against. (default: 1000000) |
150
- | `--cpus` | `-cpus` | INTEGER | Number of CPUs to use for HMMsearch. (default: 1) |
151
- | `--length_thr` | `-length_thr` | INTEGER | Minimum length threshold for seqkit seq. (default: 400) |
152
- | `--gen_code` | `-gen_code` | INTEGER | Genetic code to use for translation. (default: 1) |
153
- | `--bundle` | `-bundle` | | Bundle the output files into a single archive. (default: False) |
154
- | `--keep_tmp` | `-keep_tmp` | | Keep the temporary files generated during the analysis. (default: False) |
136
+ | Argument | Short Flag | Type | Description |
137
+ |----------|------------|------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
138
+ | `--input` | `-i` | FILE | Path to the input FASTA file. [required] |
139
+ | `--output` | `-o` | DIRECTORY | Path to the output directory. [required] |
140
+ | `--db_dir` | `-db_dir` | PATH | Path to the directory containing RdRpCATCH databases. [required] |
141
+ | `--db_options` | `-dbs` | TEXT | Comma-separated list of databases to search against. Valid options: RVMT, NeoRdRp, NeoRdRp.2.1, TSA_Olendraite_fam, TSA_Olendraite_gen, RDRP-scan, Lucaprot_HMM,Zayed_HMM, all |
142
+ | `--custom-dbs` | | PATH | Path to directory containing custom MSAs/pHMM files to use as additional databases |
143
+ | `--seq_type` | `-seq_type` | TEXT | Type of sequence to search against: (prot,nuc) Default: unknown |
144
+ | `--verbose` | `-v` | FLAG | Print verbose output. |
145
+ | `--evalue` | `-e` | FLOAT | E-value threshold for HMMsearch. (default: 1e-5) |
146
+ | `--incevalue` | `-incE` | FLOAT | Inclusion E-value threshold for HMMsearch. (default: 1e-5) |
147
+ | `--domevalue` | `-domE` | FLOAT | Domain E-value threshold for HMMsearch. (default: 1e-5) |
148
+ | `--incdomevalue` | `-incdomE` | FLOAT | Inclusion domain E-value threshold for HMMsearch. (default: 1e-5) |
149
+ | `--zvalue` | `-z` | INTEGER | Number of sequences to search against. (default: 1000000) |
150
+ | `--cpus` | `-cpus` | INTEGER | Number of CPUs to use for HMMsearch. (default: 1) |
151
+ | `--length_thr` | `-length_thr` | INTEGER | Minimum length threshold for seqkit seq. (default: 400) |
152
+ | `--gen_code` | `-gen_code` | INTEGER | Genetic code to use for translation. (default: 1) |
153
+ | `--bundle` | `-bundle` | | Bundle the output files into a single archive. (default: False) |
154
+ | `--keep_tmp` | `-keep_tmp` | | Keep the temporary files generated during the analysis. (default: False) |
155
155
 
156
156
 
157
157
 
@@ -1,7 +1,7 @@
1
1
  rdrpcatch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- rdrpcatch/rdrpcatch_wrapper.py,sha256=75jXdh9rUbo5ypISmfl1e9kLtk6mxg6ivPwLI3slH-U,31106
2
+ rdrpcatch/rdrpcatch_wrapper.py,sha256=X-U0CKQWHwybLIdWvaFZGEj-v0oTUnBv2PbiLAdu8s4,31573
3
3
  rdrpcatch/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- rdrpcatch/cli/args.py,sha256=PGU6SJeHU6B0e-r-PheUpdi5PwkDhBa_ixn7WgNmDRw,15933
4
+ rdrpcatch/cli/args.py,sha256=DX7gfESWi4j1CNpALAEG45JV_b5KkU1LAJj2FDb8J5g,15963
5
5
  rdrpcatch/rdrpcatch_scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  rdrpcatch/rdrpcatch_scripts/fetch_dbs.py,sha256=e9ShColfLgBvWSZpGOvY3zKhEgIg3rw1IIV__KX7N-g,11054
7
7
  rdrpcatch/rdrpcatch_scripts/format_pyhmmer_out.py,sha256=2_ERXFQK2lpVReWl0jwQdnKIObv_zq07uFJOzGsTHlo,25025
@@ -12,8 +12,8 @@ rdrpcatch/rdrpcatch_scripts/plot.py,sha256=Y1mZL7rkKHFKEs2D7T2Qj2kpfiORmFwRLq1LY
12
12
  rdrpcatch/rdrpcatch_scripts/run_pyhmmer.py,sha256=9zcMzaIwQ4_-NgYzG9kejxOBaDi-gbzaqpvZti8ZXA4,9008
13
13
  rdrpcatch/rdrpcatch_scripts/run_seqkit.py,sha256=5y7DtJ6NLa4sRoBQOcjBfczKlqG_LibNrEqNmKLrHu0,4361
14
14
  rdrpcatch/rdrpcatch_scripts/utils.py,sha256=jvpyPxchAMn6BeLV7HOFECSY_a3nbkxDBBL8tunmM8A,16938
15
- rdrpcatch-0.0.6.dist-info/METADATA,sha256=8HANk0HZEF3A1S9rB2wt7LG3SsI8ULMrcfIfaxWSvtc,13977
16
- rdrpcatch-0.0.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
17
- rdrpcatch-0.0.6.dist-info/entry_points.txt,sha256=uiyoPO41jNz_KVOt2JdPak9NbVei-D8WQ6saMeMBFpE,53
18
- rdrpcatch-0.0.6.dist-info/licenses/LICENSE,sha256=3jm5vKRMIaiETEFfNN34-oyWUShxZtmDmL38PNAwlUI,1120
19
- rdrpcatch-0.0.6.dist-info/RECORD,,
15
+ rdrpcatch-0.0.7.dist-info/METADATA,sha256=BU-V7TAZcYQC5L3KuX_N_iH_l7Q77go7ZF9-1jYRrQE,16219
16
+ rdrpcatch-0.0.7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
17
+ rdrpcatch-0.0.7.dist-info/entry_points.txt,sha256=uiyoPO41jNz_KVOt2JdPak9NbVei-D8WQ6saMeMBFpE,53
18
+ rdrpcatch-0.0.7.dist-info/licenses/LICENSE,sha256=3jm5vKRMIaiETEFfNN34-oyWUShxZtmDmL38PNAwlUI,1120
19
+ rdrpcatch-0.0.7.dist-info/RECORD,,