rdrpcatch 0.0.4__py3-none-any.whl → 0.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdrpcatch/cli/args.py +33 -26
- rdrpcatch/rdrpcatch_scripts/format_pyhmmer_out.py +1 -1
- rdrpcatch/rdrpcatch_wrapper.py +56 -47
- {rdrpcatch-0.0.4.dist-info → rdrpcatch-0.0.6.dist-info}/METADATA +2 -3
- {rdrpcatch-0.0.4.dist-info → rdrpcatch-0.0.6.dist-info}/RECORD +8 -8
- {rdrpcatch-0.0.4.dist-info → rdrpcatch-0.0.6.dist-info}/WHEEL +0 -0
- {rdrpcatch-0.0.4.dist-info → rdrpcatch-0.0.6.dist-info}/entry_points.txt +0 -0
- {rdrpcatch-0.0.4.dist-info → rdrpcatch-0.0.6.dist-info}/licenses/LICENSE +0 -0
rdrpcatch/cli/args.py
CHANGED
|
@@ -115,30 +115,30 @@ def cli():
|
|
|
115
115
|
@click.option('-gen_code', '--gen_code',
|
|
116
116
|
type=click.INT,
|
|
117
117
|
default=1,
|
|
118
|
-
help='Genetic code to use for translation. (default: 1) Possible genetic codes (supported by seqkit translate) :
|
|
119
|
-
'2: The Vertebrate Mitochondrial Code
|
|
120
|
-
'3: The Yeast Mitochondrial Code
|
|
121
|
-
'4: The Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code
|
|
122
|
-
'5: The Invertebrate Mitochondrial Code
|
|
123
|
-
'6: The Ciliate, Dasycladacean and Hexamita Nuclear Code
|
|
124
|
-
'9: The Echinoderm and Flatworm Mitochondrial Code
|
|
125
|
-
'10: The Euplotid Nuclear Code
|
|
126
|
-
'11: The Bacterial, Archaeal and Plant Plastid Code
|
|
127
|
-
'12: The Alternative Yeast Nuclear Code
|
|
128
|
-
'13: The Ascidian Mitochondrial Code
|
|
129
|
-
'14: The Alternative Flatworm Mitochondrial Code
|
|
130
|
-
'16: Chlorophycean Mitochondrial Code
|
|
131
|
-
'21: Trematode Mitochondrial Code
|
|
132
|
-
'22: Scenedesmus obliquus Mitochondrial Code
|
|
133
|
-
'23: Thraustochytrium Mitochondrial Code
|
|
134
|
-
'24: Pterobranchia Mitochondrial Code
|
|
135
|
-
'25: Candidate Division SR1 and Gracilibacteria Code
|
|
136
|
-
'26: Pachysolen tannophilus Nuclear Code
|
|
137
|
-
'27: Karyorelict Nuclear
|
|
138
|
-
'28: Condylostoma Nuclear
|
|
139
|
-
'29: Mesodinium Nuclear
|
|
140
|
-
'30: Peritrich Nuclear
|
|
141
|
-
'31: Blastocrithidia Nuclear
|
|
118
|
+
help='Genetic code to use for translation. (default: 1) Possible genetic codes (supported by seqkit translate) : 1: The Standard Code, '
|
|
119
|
+
'2: The Vertebrate Mitochondrial Code, '
|
|
120
|
+
'3: The Yeast Mitochondrial Code, '
|
|
121
|
+
'4: The Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code, '
|
|
122
|
+
'5: The Invertebrate Mitochondrial Code, '
|
|
123
|
+
'6: The Ciliate, Dasycladacean and Hexamita Nuclear Code, '
|
|
124
|
+
'9: The Echinoderm and Flatworm Mitochondrial Code, '
|
|
125
|
+
'10: The Euplotid Nuclear Code, '
|
|
126
|
+
'11: The Bacterial, Archaeal and Plant Plastid Code, '
|
|
127
|
+
'12: The Alternative Yeast Nuclear Code, '
|
|
128
|
+
'13: The Ascidian Mitochondrial Code, '
|
|
129
|
+
'14: The Alternative Flatworm Mitochondrial Code, '
|
|
130
|
+
'16: Chlorophycean Mitochondrial Code, '
|
|
131
|
+
'21: Trematode Mitochondrial Code, '
|
|
132
|
+
'22: Scenedesmus obliquus Mitochondrial Code, '
|
|
133
|
+
'23: Thraustochytrium Mitochondrial Code, '
|
|
134
|
+
'24: Pterobranchia Mitochondrial Code, '
|
|
135
|
+
'25: Candidate Division SR1 and Gracilibacteria Code, '
|
|
136
|
+
'26: Pachysolen tannophilus Nuclear Code, '
|
|
137
|
+
'27: Karyorelict Nuclear, '
|
|
138
|
+
'28: Condylostoma Nuclear, '
|
|
139
|
+
'29: Mesodinium Nuclear, '
|
|
140
|
+
'30: Peritrich Nuclear, '
|
|
141
|
+
'31: Blastocrithidia Nuclear, ')
|
|
142
142
|
@click.option('-bundle', '--bundle',
|
|
143
143
|
is_flag=True,
|
|
144
144
|
default=False,
|
|
@@ -147,9 +147,14 @@ def cli():
|
|
|
147
147
|
is_flag=True,
|
|
148
148
|
default=False,
|
|
149
149
|
help="Keep temporary files (Expert users) (default: False)")
|
|
150
|
+
@click.option('-overwrite', '--overwrite',
|
|
151
|
+
is_flag=True,
|
|
152
|
+
default=False,
|
|
153
|
+
help="Force overwrite of existing output directory. (default: False)")
|
|
154
|
+
|
|
150
155
|
@click.pass_context
|
|
151
156
|
def scan(ctx, input, output, db_options, db_dir, custom_dbs, seq_type, verbose, evalue,
|
|
152
|
-
incevalue, domevalue, incdomevalue, zvalue, cpus, length_thr, gen_code, bundle, keep_tmp):
|
|
157
|
+
incevalue, domevalue, incdomevalue, zvalue, cpus, length_thr, gen_code, bundle, keep_tmp, overwrite):
|
|
153
158
|
"""Scan sequences for RdRps."""
|
|
154
159
|
|
|
155
160
|
# Create a rich table for displaying parameters
|
|
@@ -175,6 +180,7 @@ def scan(ctx, input, output, db_options, db_dir, custom_dbs, seq_type, verbose,
|
|
|
175
180
|
table.add_row("Genetic Code", str(gen_code))
|
|
176
181
|
table.add_row("Bundle Output", "ON" if bundle else "OFF")
|
|
177
182
|
table.add_row("Save Temporary Files", "ON" if keep_tmp else "OFF")
|
|
183
|
+
table.add_row("Force Overwrite", "ON" if overwrite else "OFF")
|
|
178
184
|
|
|
179
185
|
console.print(Panel(table, title="Scan Configuration"))
|
|
180
186
|
|
|
@@ -207,7 +213,8 @@ def scan(ctx, input, output, db_options, db_dir, custom_dbs, seq_type, verbose,
|
|
|
207
213
|
length_thr=length_thr,
|
|
208
214
|
gen_code=gen_code,
|
|
209
215
|
bundle=bundle,
|
|
210
|
-
keep_tmp=keep_tmp
|
|
216
|
+
keep_tmp=keep_tmp,
|
|
217
|
+
overwrite=overwrite
|
|
211
218
|
)
|
|
212
219
|
|
|
213
220
|
# @cli.command("download", help="Download RdRpCATCH databases.")
|
|
@@ -68,7 +68,7 @@ class hmmsearch_formatter:
|
|
|
68
68
|
# Check if the dataframe is empty
|
|
69
69
|
if data_df.is_empty():
|
|
70
70
|
title_line= ['Contig_name', 'Translated_contig_name (frame)', 'Sequence_length(AA)', 'Profile_name',
|
|
71
|
-
'Profile_length', 'E-value', 'score',
|
|
71
|
+
'Profile_length', 'E-value', 'score','norm_bitscore_profile',
|
|
72
72
|
'norm_bitscore_contig', 'ID_score', 'RdRp_from(AA)', 'RdRp_to(AA)', 'profile_coverage',
|
|
73
73
|
'contig_coverage']
|
|
74
74
|
data_df = pl.DataFrame({col: [] for col in title_line})
|
rdrpcatch/rdrpcatch_wrapper.py
CHANGED
|
@@ -53,7 +53,7 @@ def bundle_results(output_dir, prefix):
|
|
|
53
53
|
|
|
54
54
|
return archive_path
|
|
55
55
|
|
|
56
|
-
def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,incdomE,domE,incE,z, cpus, length_thr, gen_code, bundle, keep_tmp):
|
|
56
|
+
def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,incdomE,domE,incE,z, cpus, length_thr, gen_code, bundle, keep_tmp, overwrite):
|
|
57
57
|
"""
|
|
58
58
|
Run RdRpCATCH scan.
|
|
59
59
|
|
|
@@ -110,8 +110,16 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
110
110
|
log_file = outputs.log_file
|
|
111
111
|
if not os.path.exists(outputs.output_dir):
|
|
112
112
|
os.makedirs(outputs.output_dir)
|
|
113
|
+
elif os.path.exists(outputs.output_dir) and overwrite:
|
|
114
|
+
# If the output directory already exists and force_overwrite is True, remove the existing directory
|
|
115
|
+
import shutil
|
|
116
|
+
shutil.rmtree(outputs.output_dir)
|
|
117
|
+
os.makedirs(outputs.output_dir)
|
|
118
|
+
outputs = paths.rdrpcatch_output(prefix, Path(output_dir))
|
|
113
119
|
else:
|
|
114
|
-
raise FileExistsError(f"Output directory already exists: {outputs.output_dir},
|
|
120
|
+
raise FileExistsError(f"Output directory already exists: {outputs.output_dir}, Please choose a different directory"
|
|
121
|
+
f" or activate the -overwrite flag to overwrite the contents of the directory.")
|
|
122
|
+
|
|
115
123
|
if not os.path.exists(outputs.log_dir):
|
|
116
124
|
os.makedirs(outputs.log_dir)
|
|
117
125
|
|
|
@@ -160,6 +168,8 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
160
168
|
if seq_type == 'prot':
|
|
161
169
|
utils.fasta_checker(input_file, logger).check_seq_length(100000)
|
|
162
170
|
|
|
171
|
+
logger.loud_log("Fetching HMM databases...")
|
|
172
|
+
|
|
163
173
|
## Fetch HMM databases- RVMT, NeoRdRp, NeoRdRp.2.1, TSA_Olendraite, RDRP-scan, Lucaprot
|
|
164
174
|
rvmt_hmm_db = fetch_dbs.db_fetcher(db_dir).fetch_hmm_db_path("RVMT")
|
|
165
175
|
if verbose:
|
|
@@ -234,10 +244,9 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
234
244
|
|
|
235
245
|
# Fetch mmseqs database
|
|
236
246
|
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
logger.silent_log("Fetching mmseqs databases.")
|
|
247
|
+
|
|
248
|
+
logger.loud_log("Fetching Mmseqs2 databases...")
|
|
249
|
+
|
|
241
250
|
mmseqs_db_path = fetch_dbs.db_fetcher(db_dir).fetch_mmseqs_db_path("mmseqs_refseq_riboviria_20250211")
|
|
242
251
|
|
|
243
252
|
if verbose:
|
|
@@ -260,21 +269,17 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
260
269
|
if not os.path.exists(outputs.tmp_dir):
|
|
261
270
|
outputs.tmp_dir.mkdir(parents=True)
|
|
262
271
|
|
|
272
|
+
logger.loud_log("Databases fetched successfully.")
|
|
273
|
+
|
|
263
274
|
if seq_type == 'nuc':
|
|
264
|
-
|
|
265
|
-
logger.loud_log("Nucleotide sequence detected.")
|
|
266
|
-
else:
|
|
267
|
-
logger.silent_log("Nucleotide sequence detected.")
|
|
275
|
+
logger.loud_log("Nucleotide sequence detected.")
|
|
268
276
|
|
|
269
277
|
set_dict = {}
|
|
270
278
|
translated_set_dict = {}
|
|
271
279
|
df_list = []
|
|
272
280
|
|
|
273
281
|
## Filter out sequences with length less than 400 bp with seqkit
|
|
274
|
-
|
|
275
|
-
logger.loud_log("Filtering out sequences with length less than 400 bp.")
|
|
276
|
-
else:
|
|
277
|
-
logger.silent_log("Filtering out sequences with length less than 400 bp.")
|
|
282
|
+
logger.loud_log("Filtering out sequences with length less than 400 bp.")
|
|
278
283
|
|
|
279
284
|
if not os.path.exists(outputs.seqkit_seq_output_dir):
|
|
280
285
|
outputs.seqkit_seq_output_dir.mkdir(parents=True)
|
|
@@ -286,10 +291,7 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
286
291
|
logger.silent_log(f"Filtered sequence written to: { outputs.seqkit_seq_output_path}")
|
|
287
292
|
|
|
288
293
|
## Translate nucleotide sequences to protein sequences with seqkit
|
|
289
|
-
|
|
290
|
-
logger.loud_log("Translating nucleotide sequences to protein sequences.")
|
|
291
|
-
else:
|
|
292
|
-
logger.silent_log("Translating nucleotide sequences to protein sequences.")
|
|
294
|
+
logger.loud_log("Translating nucleotide sequences to protein sequences.")
|
|
293
295
|
|
|
294
296
|
if not os.path.exists(outputs.seqkit_translate_output_dir):
|
|
295
297
|
outputs.seqkit_translate_output_dir.mkdir(parents=True)
|
|
@@ -302,6 +304,7 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
302
304
|
logger.silent_log(f"Translated sequence written to: {outputs.seqkit_translate_output_path}")
|
|
303
305
|
|
|
304
306
|
for db_name,db_path in zip(db_name_list, db_path_list):
|
|
307
|
+
logger.loud_log(f"Running HMMsearch for {db_name} database.")
|
|
305
308
|
|
|
306
309
|
if verbose:
|
|
307
310
|
logger.loud_log(f"HMM output path: {outputs.hmm_output_path(db_name)}")
|
|
@@ -353,7 +356,9 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
353
356
|
])
|
|
354
357
|
df_list.append(df)
|
|
355
358
|
|
|
359
|
+
logger.loud_log(f"HMMsearch for {db_name} completed.")
|
|
356
360
|
|
|
361
|
+
logger.loud_log("HMMsearch completed.")
|
|
357
362
|
|
|
358
363
|
if not os.path.exists(outputs.plot_outdir):
|
|
359
364
|
outputs.plot_outdir.mkdir(parents=True)
|
|
@@ -361,9 +366,10 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
361
366
|
if not os.path.exists(outputs.tsv_outdir):
|
|
362
367
|
outputs.tsv_outdir.mkdir(parents=True)
|
|
363
368
|
|
|
369
|
+
logger.loud_log("Consolidating results.")
|
|
364
370
|
|
|
365
371
|
# Combine all the dataframes in the list
|
|
366
|
-
combined_df = pl.concat(df_list, how='
|
|
372
|
+
combined_df = pl.concat(df_list, how='vertical_relaxed')
|
|
367
373
|
# Write the combined dataframe to a tsv file
|
|
368
374
|
for col in ['E-value', 'score', 'norm_bitscore_profile', 'norm_bitscore_contig',
|
|
369
375
|
'ID_score', 'profile_coverage', 'contig_coverage']:
|
|
@@ -379,6 +385,8 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
379
385
|
logger.loud_log("No hits found by RdRpCATCH. Exiting.")
|
|
380
386
|
return None
|
|
381
387
|
|
|
388
|
+
# Generate upset plot
|
|
389
|
+
logger.loud_log("Generating plots.")
|
|
382
390
|
|
|
383
391
|
if len(db_name_list) > 1:
|
|
384
392
|
if verbose:
|
|
@@ -411,6 +419,8 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
411
419
|
combined_set = set.union(*[value for value in set_dict.values()])
|
|
412
420
|
translated_combined_set = set.union(*[value for value in translated_set_dict.values()])
|
|
413
421
|
|
|
422
|
+
logger.loud_log("Extracting RdRp contigs from the input file.")
|
|
423
|
+
|
|
414
424
|
# Write a fasta file with all the contigs
|
|
415
425
|
if not os.path.exists(outputs.fasta_output_dir):
|
|
416
426
|
outputs.fasta_output_dir.mkdir(parents=True)
|
|
@@ -430,25 +440,21 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
430
440
|
if verbose:
|
|
431
441
|
logger.loud_log(f"Contigs written to: {outputs.fasta_nuc_out_path}")
|
|
432
442
|
logger.loud_log(f"Translated contigs written to: {outputs.fasta_prot_out_path}")
|
|
443
|
+
logger.loud_log(f"Trimmed contigs written to: {outputs.fasta_trimmed_out_path}")
|
|
433
444
|
else:
|
|
434
445
|
logger.silent_log(f"Contigs written to: {outputs.fasta_nuc_out_path}")
|
|
435
446
|
logger.silent_log(f"Translated contigs written to: {outputs.fasta_prot_out_path}")
|
|
447
|
+
logger.silent_log(f"Trimmed contigs written to: {outputs.fasta_trimmed_out_path}")
|
|
436
448
|
|
|
437
449
|
if not os.path.exists(outputs.mmseqs_tax_output_dir):
|
|
438
450
|
outputs.mmseqs_tax_output_dir.mkdir(parents=True)
|
|
439
451
|
|
|
440
|
-
|
|
441
|
-
logger.loud_log("Running mmseqs easy-taxonomy for taxonomic annotation.")
|
|
442
|
-
else:
|
|
443
|
-
logger.silent_log("Running mmseqs easy-taxonomy for taxonomic annotation.")
|
|
452
|
+
logger.loud_log("Running mmseqs easy-taxonomy for taxonomic annotation.")
|
|
444
453
|
|
|
445
454
|
mmseqs_tax.mmseqs(outputs.fasta_prot_out_path, mmseqs_db_path, outputs.mmseqs_tax_output_prefix,
|
|
446
455
|
outputs.mmseqs_tax_output_dir, 7, cpus, outputs.mmseqs_tax_log_path).run_mmseqs_easy_tax_lca()
|
|
447
456
|
|
|
448
|
-
|
|
449
|
-
logger.loud_log("Running mmseqs easy-search for taxonomic annotation.")
|
|
450
|
-
else:
|
|
451
|
-
logger.silent_log("Running mmseqs easy-search for taxonomic annotation.")
|
|
457
|
+
logger.loud_log("Running mmseqs easy-search for taxonomic annotation.")
|
|
452
458
|
|
|
453
459
|
if not os.path.exists(outputs.mmseqs_e_search_output_dir):
|
|
454
460
|
outputs.mmseqs_e_search_output_dir.mkdir(parents=True)
|
|
@@ -460,18 +466,17 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
460
466
|
utils.mmseqs_parser(outputs.mmseqs_tax_output_lca_path, outputs.mmseqs_e_search_output_path).tax_to_rdrpcatch(
|
|
461
467
|
outputs.rdrpcatch_output_tsv, outputs.extended_rdrpcatch_output, seq_type)
|
|
462
468
|
|
|
469
|
+
logger.loud_log("Taxonomic annotation completed.")
|
|
463
470
|
|
|
464
471
|
elif seq_type == 'prot':
|
|
465
472
|
|
|
466
|
-
|
|
467
|
-
logger.loud_log("Protein sequence detected.")
|
|
468
|
-
else:
|
|
469
|
-
logger.silent_log("Protein sequence detected.")
|
|
473
|
+
logger.loud_log("Protein sequence detected.")
|
|
470
474
|
|
|
471
475
|
set_dict = {}
|
|
472
476
|
df_list = []
|
|
473
477
|
|
|
474
478
|
for db_name,db_path in zip (db_name_list, db_path_list):
|
|
479
|
+
logger.loud_log(f"Running HMMsearch for {db_name} database.")
|
|
475
480
|
|
|
476
481
|
if verbose:
|
|
477
482
|
logger.loud_log(f"HMM output path: {outputs.hmm_output_path(db_name)}")
|
|
@@ -519,16 +524,20 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
519
524
|
])
|
|
520
525
|
df_list.append(df)
|
|
521
526
|
|
|
527
|
+
logger.loud_log(f"HMMsearch for {db_name} completed.")
|
|
528
|
+
|
|
529
|
+
logger.loud_log("HMMsearch completed.")
|
|
530
|
+
|
|
522
531
|
if not os.path.exists(outputs.plot_outdir):
|
|
523
532
|
outputs.plot_outdir.mkdir(parents=True)
|
|
524
533
|
|
|
525
534
|
if not os.path.exists(outputs.tsv_outdir):
|
|
526
535
|
outputs.tsv_outdir.mkdir(parents=True)
|
|
527
536
|
|
|
528
|
-
|
|
537
|
+
logger.loud_log("Consolidating results.")
|
|
529
538
|
|
|
530
539
|
# Combine all the dataframes in the list
|
|
531
|
-
combined_df = pl.concat(df_list, how='
|
|
540
|
+
combined_df = pl.concat(df_list, how='vertical_relaxed')
|
|
532
541
|
# Write the combined dataframe to a tsv file
|
|
533
542
|
for col in ['E-value', 'score', 'norm_bitscore_profile', 'norm_bitscore_contig',
|
|
534
543
|
'ID_score', 'profile_coverage', 'contig_coverage']:
|
|
@@ -543,6 +552,9 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
543
552
|
logger.loud_log("No hits found by RdRpCATCH. Exiting.")
|
|
544
553
|
return None
|
|
545
554
|
|
|
555
|
+
# Generate upset plot
|
|
556
|
+
logger.loud_log("Generating plots.")
|
|
557
|
+
|
|
546
558
|
if len(db_name_list) > 1:
|
|
547
559
|
if verbose:
|
|
548
560
|
logger.loud_log("Generating upset plot.")
|
|
@@ -575,6 +587,9 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
575
587
|
# Extract all the contigs
|
|
576
588
|
combined_set = set.union(*[value for value in set_dict.values()])
|
|
577
589
|
# Write a fasta file with all the contigs
|
|
590
|
+
|
|
591
|
+
logger.loud_log("Extracting RdRp contigs from the input file.")
|
|
592
|
+
|
|
578
593
|
if not os.path.exists(outputs.fasta_output_dir):
|
|
579
594
|
outputs.fasta_output_dir.mkdir(parents=True)
|
|
580
595
|
|
|
@@ -601,11 +616,7 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
601
616
|
if not os.path.exists(outputs.mmseqs_tax_output_dir):
|
|
602
617
|
outputs.mmseqs_tax_output_dir.mkdir(parents=True)
|
|
603
618
|
|
|
604
|
-
|
|
605
|
-
logger.loud_log("Running mmseqs easy-taxonomy for taxonomic annotation.")
|
|
606
|
-
else:
|
|
607
|
-
logger.silent_log("Running mmseqs easy-taxonomy for taxonomic annotation.")
|
|
608
|
-
|
|
619
|
+
logger.loud_log("Running mmseqs easy-taxonomy for taxonomic annotation.")
|
|
609
620
|
|
|
610
621
|
mmseqs_tax.mmseqs(outputs.fasta_prot_out_path, mmseqs_db_path, outputs.mmseqs_tax_output_prefix,
|
|
611
622
|
outputs.mmseqs_tax_output_dir, 7, cpus, outputs.mmseqs_tax_log_path).run_mmseqs_easy_tax_lca()
|
|
@@ -613,10 +624,7 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
613
624
|
if not os.path.exists(outputs.mmseqs_e_search_output_dir):
|
|
614
625
|
outputs.mmseqs_e_search_output_dir.mkdir(parents=True)
|
|
615
626
|
|
|
616
|
-
|
|
617
|
-
logger.loud_log("Running mmseqs easy-search for taxonomic annotation.")
|
|
618
|
-
else:
|
|
619
|
-
logger.silent_log("Running mmseqs easy-search for taxonomic annotation.")
|
|
627
|
+
logger.loud_log("Running mmseqs easy-search for taxonomic annotation.")
|
|
620
628
|
|
|
621
629
|
mmseqs_tax.mmseqs(outputs.fasta_prot_out_path, mmseqs_db_path, outputs.mmseqs_e_search_output_dir,
|
|
622
630
|
outputs.mmseqs_e_search_output_path, 7, cpus, outputs.mmseqs_e_search_log_path).run_mmseqs_e_search()
|
|
@@ -625,11 +633,7 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
625
633
|
outputs.rdrpcatch_output_tsv, outputs.extended_rdrpcatch_output, seq_type)
|
|
626
634
|
|
|
627
635
|
|
|
628
|
-
|
|
629
|
-
if verbose:
|
|
630
|
-
logger.loud_log(f"Total Runtime: {end_time}")
|
|
631
|
-
else:
|
|
632
|
-
logger.silent_log(f"Total Runtime: {end_time}")
|
|
636
|
+
|
|
633
637
|
|
|
634
638
|
|
|
635
639
|
|
|
@@ -658,6 +662,11 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
658
662
|
else:
|
|
659
663
|
logger.silent_log(f"Results bundled into: {archive_path}")
|
|
660
664
|
|
|
665
|
+
end_time = logger.stop_timer(start_time, verbose)
|
|
666
|
+
|
|
667
|
+
logger.loud_log(f"Total Runtime: {end_time}")
|
|
668
|
+
|
|
669
|
+
logger.loud_log("RdRpCATCH completed successfully.")
|
|
661
670
|
|
|
662
671
|
|
|
663
672
|
return outputs.extended_rdrpcatch_output
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rdrpcatch
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.6
|
|
4
4
|
Dynamic: Summary
|
|
5
5
|
Project-URL: Home, https://github.com/dimitris-karapliafis/RdRpCATCH
|
|
6
6
|
Project-URL: Source, https://github.com/dimitris-karapliafis/RdRpCATCH
|
|
@@ -81,7 +81,7 @@ The dependencies can be installed using conda or mamba. Follow these steps:
|
|
|
81
81
|
|
|
82
82
|
Create a new conda environment and install the dependencies:
|
|
83
83
|
```bash
|
|
84
|
-
conda create -n rdrpcatch python=3.12
|
|
84
|
+
conda env create -n rdrpcatch python=3.12
|
|
85
85
|
conda activate rdrpcatch
|
|
86
86
|
conda install -c bioconda mmseqs2==17.b804f seqkit==2.10.0
|
|
87
87
|
```
|
|
@@ -205,7 +205,6 @@ Dimitris Karapliafis (dimitris.karapliafis@wur.nl), potentially via slack/teams
|
|
|
205
205
|
|
|
206
206
|
##TODO:
|
|
207
207
|
- [ ] loud logging is linking to the utils.py file, not the actual line of code causing the error.
|
|
208
|
-
- [ ] Add `overwrite` flag
|
|
209
208
|
- [ ] drop `db_dir` argument and use global/environment/config variable that is set after running the `download` command
|
|
210
209
|
|
|
211
210
|
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
rdrpcatch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
rdrpcatch/rdrpcatch_wrapper.py,sha256=
|
|
2
|
+
rdrpcatch/rdrpcatch_wrapper.py,sha256=75jXdh9rUbo5ypISmfl1e9kLtk6mxg6ivPwLI3slH-U,31106
|
|
3
3
|
rdrpcatch/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
rdrpcatch/cli/args.py,sha256=
|
|
4
|
+
rdrpcatch/cli/args.py,sha256=PGU6SJeHU6B0e-r-PheUpdi5PwkDhBa_ixn7WgNmDRw,15933
|
|
5
5
|
rdrpcatch/rdrpcatch_scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
rdrpcatch/rdrpcatch_scripts/fetch_dbs.py,sha256=e9ShColfLgBvWSZpGOvY3zKhEgIg3rw1IIV__KX7N-g,11054
|
|
7
|
-
rdrpcatch/rdrpcatch_scripts/format_pyhmmer_out.py,sha256=
|
|
7
|
+
rdrpcatch/rdrpcatch_scripts/format_pyhmmer_out.py,sha256=2_ERXFQK2lpVReWl0jwQdnKIObv_zq07uFJOzGsTHlo,25025
|
|
8
8
|
rdrpcatch/rdrpcatch_scripts/gui.py,sha256=he8kx_4VJWB7SVv9XSQPk0DmkOjEFIg-uGMAtDp3t-w,10576
|
|
9
9
|
rdrpcatch/rdrpcatch_scripts/mmseqs_tax.py,sha256=bwzuCxu8nHQ5OC0Yr5Lyvhcyk9OWjuamInqe0T0lc38,3809
|
|
10
10
|
rdrpcatch/rdrpcatch_scripts/paths.py,sha256=roTZ2QPF4Fii7jtHkS9I6INJg1Vu78Dc_ieQGKjOCP4,4710
|
|
@@ -12,8 +12,8 @@ rdrpcatch/rdrpcatch_scripts/plot.py,sha256=Y1mZL7rkKHFKEs2D7T2Qj2kpfiORmFwRLq1LY
|
|
|
12
12
|
rdrpcatch/rdrpcatch_scripts/run_pyhmmer.py,sha256=9zcMzaIwQ4_-NgYzG9kejxOBaDi-gbzaqpvZti8ZXA4,9008
|
|
13
13
|
rdrpcatch/rdrpcatch_scripts/run_seqkit.py,sha256=5y7DtJ6NLa4sRoBQOcjBfczKlqG_LibNrEqNmKLrHu0,4361
|
|
14
14
|
rdrpcatch/rdrpcatch_scripts/utils.py,sha256=jvpyPxchAMn6BeLV7HOFECSY_a3nbkxDBBL8tunmM8A,16938
|
|
15
|
-
rdrpcatch-0.0.
|
|
16
|
-
rdrpcatch-0.0.
|
|
17
|
-
rdrpcatch-0.0.
|
|
18
|
-
rdrpcatch-0.0.
|
|
19
|
-
rdrpcatch-0.0.
|
|
15
|
+
rdrpcatch-0.0.6.dist-info/METADATA,sha256=8HANk0HZEF3A1S9rB2wt7LG3SsI8ULMrcfIfaxWSvtc,13977
|
|
16
|
+
rdrpcatch-0.0.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
17
|
+
rdrpcatch-0.0.6.dist-info/entry_points.txt,sha256=uiyoPO41jNz_KVOt2JdPak9NbVei-D8WQ6saMeMBFpE,53
|
|
18
|
+
rdrpcatch-0.0.6.dist-info/licenses/LICENSE,sha256=3jm5vKRMIaiETEFfNN34-oyWUShxZtmDmL38PNAwlUI,1120
|
|
19
|
+
rdrpcatch-0.0.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|