PyPI - rdrpcatch - Versions diffs - 0.0.5__py3-none-any.whl → 0.0.7__py3-none-any.whl - Mend

rdrpcatch 0.0.5py3-none-any.whl → 0.0.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

rdrpcatch/cli/args.py CHANGED Viewed

@@ -26,7 +26,7 @@ def parse_comma_separated_options(ctx, param, value):
         return ['all']
     allowed_choices = ['RVMT', 'NeoRdRp', 'NeoRdRp.2.1', 'TSA_Olendraite_fam', 'TSA_Olendraite_gen', 'RDRP-scan',
-                       'Lucaprot', 'all']
+                       'Lucaprot_HMM, Zayed_HMM', 'all']
     lower_choices = [choice.lower() for choice in allowed_choices]
     options = value.split(',')
     lower_options = [option.lower() for option in options]
@@ -73,7 +73,7 @@ def cli():
               callback=parse_comma_separated_options,
               default="all",
               help="Comma-separated list of databases to search against. Valid options: RVMT, NeoRdRp, NeoRdRp.2.1,"
-                   " TSA_Olendraite_fam, TSA_Olendraite_gen, RDRP-scan,Lucaprot, all")
+                   " TSA_Olendraite_fam, TSA_Olendraite_gen, RDRP-scan,Lucaprot_HMM, Zayed_HMM, all")
 @click.option("--custom-dbs",
               help="Path to directory containing custom MSAs/pHMM files to use as additional databases",
               type=click.Path(exists=True, path_type=Path))
@@ -115,30 +115,30 @@ def cli():
 @click.option('-gen_code', '--gen_code',
               type=click.INT,
               default=1,
-              help='Genetic code to use for translation. (default: 1) Possible genetic codes (supported by seqkit translate) :      1: The Standard Code       \n'
-                     '2: The Vertebrate Mitochondrial Code      \n'
-                     '3: The Yeast Mitochondrial Code       \n'
-                     '4: The Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code        \n'
-                     '5: The Invertebrate Mitochondrial Code        \n'
-                     '6: The Ciliate, Dasycladacean and Hexamita Nuclear Code       \n'
-                     '9: The Echinoderm and Flatworm Mitochondrial Code     \n'
-                    '10: The Euplotid Nuclear Code      \n'
-                    '11: The Bacterial, Archaeal and Plant Plastid Code     \n'
-                    '12: The Alternative Yeast Nuclear Code     \n'
-                    '13: The Ascidian Mitochondrial Code        \n'
-                    '14: The Alternative Flatworm Mitochondrial Code        \n'
-                    '16: Chlorophycean Mitochondrial Code       \n'
-                    '21: Trematode Mitochondrial Code       \n'
-                    '22: Scenedesmus obliquus Mitochondrial Code        \n'
-                    '23: Thraustochytrium Mitochondrial Code        \n'
-                    '24: Pterobranchia Mitochondrial Code       \n'
-                    '25: Candidate Division SR1 and Gracilibacteria Code        \n'
-                    '26: Pachysolen tannophilus Nuclear Code        \n'
-                    '27: Karyorelict Nuclear        \n'
-                    '28: Condylostoma Nuclear       \n'
-                    '29: Mesodinium Nuclear     \n'
-                    '30: Peritrich Nuclear      \n'
-                    '31: Blastocrithidia Nuclear        \n')
+              help='Genetic code to use for translation. (default: 1) Possible genetic codes (supported by seqkit translate) : 1: The Standard Code, '
+                     '2: The Vertebrate Mitochondrial Code, '
+                     '3: The Yeast Mitochondrial Code, '
+                     '4: The Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code, '
+                     '5: The Invertebrate Mitochondrial Code, '
+                     '6: The Ciliate, Dasycladacean and Hexamita Nuclear Code, '
+                     '9: The Echinoderm and Flatworm Mitochondrial Code, '
+                    '10: The Euplotid Nuclear Code, '
+                    '11: The Bacterial, Archaeal and Plant Plastid Code, '
+                    '12: The Alternative Yeast Nuclear Code, '
+                    '13: The Ascidian Mitochondrial Code, '
+                    '14: The Alternative Flatworm Mitochondrial Code, '
+                    '16: Chlorophycean Mitochondrial Code, '
+                    '21: Trematode Mitochondrial Code, '
+                    '22: Scenedesmus obliquus Mitochondrial Code, '
+                    '23: Thraustochytrium Mitochondrial Code, '
+                    '24: Pterobranchia Mitochondrial Code, '
+                    '25: Candidate Division SR1 and Gracilibacteria Code, '
+                    '26: Pachysolen tannophilus Nuclear Code, '
+                    '27: Karyorelict Nuclear, '
+                    '28: Condylostoma Nuclear, '
+                    '29: Mesodinium Nuclear, '
+                    '30: Peritrich Nuclear, '
+                    '31: Blastocrithidia Nuclear, ')
 @click.option('-bundle', '--bundle',
               is_flag=True,
               default=False,
@@ -147,9 +147,14 @@ def cli():
               is_flag=True,
               default=False,
               help="Keep temporary files (Expert users) (default: False)")
+@click.option('-overwrite', '--overwrite',
+              is_flag=True,
+              default=False,
+              help="Force overwrite of existing output directory. (default: False)")
 @click.pass_context
 def scan(ctx, input, output, db_options, db_dir, custom_dbs, seq_type, verbose, evalue,
-         incevalue, domevalue, incdomevalue, zvalue, cpus, length_thr, gen_code, bundle, keep_tmp):
+         incevalue, domevalue, incdomevalue, zvalue, cpus, length_thr, gen_code, bundle, keep_tmp, overwrite):
     """Scan sequences for RdRps."""
     # Create a rich table for displaying parameters
@@ -175,6 +180,7 @@ def scan(ctx, input, output, db_options, db_dir, custom_dbs, seq_type, verbose,
     table.add_row("Genetic Code", str(gen_code))
     table.add_row("Bundle Output", "ON" if bundle else "OFF")
     table.add_row("Save Temporary Files", "ON" if keep_tmp else "OFF")
+    table.add_row("Force Overwrite", "ON" if overwrite else "OFF")
     console.print(Panel(table, title="Scan Configuration"))
@@ -207,7 +213,8 @@ def scan(ctx, input, output, db_options, db_dir, custom_dbs, seq_type, verbose,
         length_thr=length_thr,
         gen_code=gen_code,
         bundle=bundle,
-        keep_tmp=keep_tmp
+        keep_tmp=keep_tmp,
+        overwrite=overwrite
     )
 # @cli.command("download", help="Download RdRpCATCH databases.")

rdrpcatch/rdrpcatch_wrapper.py CHANGED Viewed

@@ -53,7 +53,7 @@ def bundle_results(output_dir, prefix):
     return archive_path
-def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,incdomE,domE,incE,z, cpus, length_thr, gen_code, bundle, keep_tmp):
+def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,incdomE,domE,incE,z, cpus, length_thr, gen_code, bundle, keep_tmp, overwrite):
     """
     Run RdRpCATCH scan.
@@ -110,8 +110,16 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
     log_file = outputs.log_file
     if not os.path.exists(outputs.output_dir):
         os.makedirs(outputs.output_dir)
+    elif os.path.exists(outputs.output_dir) and overwrite:
+        # If the output directory already exists and force_overwrite is True, remove the existing directory
+        import shutil
+        shutil.rmtree(outputs.output_dir)
+        os.makedirs(outputs.output_dir)
+        outputs = paths.rdrpcatch_output(prefix, Path(output_dir))
     else:
-        raise FileExistsError(f"Output directory already exists: {outputs.output_dir}, Please choose a different directory.")
+        raise FileExistsError(f"Output directory already exists: {outputs.output_dir}, Please choose a different directory"
+                              f" or activate the -overwrite flag to overwrite the contents of the directory.")
     if not os.path.exists(outputs.log_dir):
         os.makedirs(outputs.log_dir)
@@ -160,7 +168,9 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
     if seq_type == 'prot':
         utils.fasta_checker(input_file, logger).check_seq_length(100000)
-    ## Fetch HMM databases- RVMT, NeoRdRp, NeoRdRp.2.1, TSA_Olendraite, RDRP-scan, Lucaprot
+    logger.loud_log("Fetching HMM databases...")
+    ## Fetch HMM databases- RVMT, NeoRdRp, NeoRdRp.2.1, TSA_Olendraite, RDRP-scan, Lucaprot_HMM,Zayed_HMM
     rvmt_hmm_db = fetch_dbs.db_fetcher(db_dir).fetch_hmm_db_path("RVMT")
     if verbose:
         logger.loud_log(f"RVMT HMM database fetched from: {rvmt_hmm_db}")
@@ -192,19 +202,24 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
         logger.loud_log(f"RDRP-scan HMM database fetched from: {rdrpscan_hmm_db}")
     else:
         logger.silent_log(f"RDRP-scan HMM database fetched from: {rdrpscan_hmm_db}")
-    lucaprot_hmm_db = fetch_dbs.db_fetcher(db_dir).fetch_hmm_db_path("Lucaprot")
+    lucaprot_hmm_db = fetch_dbs.db_fetcher(db_dir).fetch_hmm_db_path("Lucaprot_HMM")
     if verbose:
         logger.loud_log(f"Lucaprot HMM database fetched from: {lucaprot_hmm_db}")
     else:
         logger.silent_log(f"Lucaprot HMM database fetched from: {lucaprot_hmm_db}")
+    zayed_hmm_db = fetch_dbs.db_fetcher(db_dir).fetch_hmm_db_path("Zayed_HMM")
+    if verbose:
+        logger.loud_log(f"Zayed HMM database fetched from: {zayed_hmm_db}")
+    else:
+        logger.silent_log(f"Zayed HMM database fetched from: {zayed_hmm_db}")
     db_name_list = []
     db_path_list = []
     ## Set up HMM databases
     if db_options == ['all']:
-        db_name_list = ["RVMT", "NeoRdRp", "NeoRdRp.2.1", "TSA_Olendraite_fam","TSA_Olendraite_gen", "RDRP-scan", "Lucaprot"]
-        db_path_list = [rvmt_hmm_db, neordrp_hmm_db, neordrp_2_hmm_db, tsa_olen_fam_hmm_db,tsa_olen_gen_hmm_db, rdrpscan_hmm_db, lucaprot_hmm_db]
+        db_name_list = ["RVMT", "NeoRdRp", "NeoRdRp.2.1", "TSA_Olendraite_fam","TSA_Olendraite_gen", "RDRP-scan", "Lucaprot_HMM", "Zayed_HMM"]
+        db_path_list = [rvmt_hmm_db, neordrp_hmm_db, neordrp_2_hmm_db, tsa_olen_fam_hmm_db,tsa_olen_gen_hmm_db, rdrpscan_hmm_db, lucaprot_hmm_db, zayed_hmm_db]
     else:
         for db in db_options:
@@ -226,18 +241,20 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
             elif db == "RDRP-scan".lower():
                 db_name_list.append("RDRP-scan")
                 db_path_list.append(rdrpscan_hmm_db)
-            elif db == "Lucaprot".lower():
-                db_name_list.append("Lucaprot")
+            elif db == "Lucaprot_HMM".lower():
+                db_name_list.append("Lucaprot_HMM")
                 db_path_list.append(lucaprot_hmm_db)
+            elif db == "Zayed_HMM".lower():
+                db_name_list.append("Zayed_HMM")
+                db_path_list.append(zayed_hmm_db)
             else:
                 raise Exception(f"Invalid database option: {db}")
     # Fetch mmseqs database
-    if verbose:
-        logger.loud_log("Fetching mmseqs databases.")
-    else:
-        logger.silent_log("Fetching mmseqs databases.")
+    logger.loud_log("Fetching Mmseqs2 databases...")
     mmseqs_db_path = fetch_dbs.db_fetcher(db_dir).fetch_mmseqs_db_path("mmseqs_refseq_riboviria_20250211")
     if verbose:
@@ -260,21 +277,17 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
     if not os.path.exists(outputs.tmp_dir):
         outputs.tmp_dir.mkdir(parents=True)
+    logger.loud_log("Databases fetched successfully.")
     if seq_type == 'nuc':
-        if verbose:
-            logger.loud_log("Nucleotide sequence detected.")
-        else:
-            logger.silent_log("Nucleotide sequence detected.")
+        logger.loud_log("Nucleotide sequence detected.")
         set_dict = {}
         translated_set_dict = {}
         df_list = []
         ## Filter out sequences with length less than 400 bp with seqkit
-        if verbose:
-            logger.loud_log("Filtering out sequences with length less than 400 bp.")
-        else:
-            logger.silent_log("Filtering out sequences with length less than 400 bp.")
+        logger.loud_log("Filtering out sequences with length less than 400 bp.")
         if not os.path.exists(outputs.seqkit_seq_output_dir):
             outputs.seqkit_seq_output_dir.mkdir(parents=True)
@@ -286,10 +299,7 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
             logger.silent_log(f"Filtered sequence written to: { outputs.seqkit_seq_output_path}")
         ## Translate nucleotide sequences to protein sequences with seqkit
-        if verbose:
-            logger.loud_log("Translating nucleotide sequences to protein sequences.")
-        else:
-            logger.silent_log("Translating nucleotide sequences to protein sequences.")
+        logger.loud_log("Translating nucleotide sequences to protein sequences.")
         if not os.path.exists(outputs.seqkit_translate_output_dir):
             outputs.seqkit_translate_output_dir.mkdir(parents=True)
@@ -302,6 +312,7 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
             logger.silent_log(f"Translated sequence written to: {outputs.seqkit_translate_output_path}")
         for db_name,db_path in zip(db_name_list, db_path_list):
+            logger.loud_log(f"Running HMMsearch for {db_name} database.")
             if verbose:
                 logger.loud_log(f"HMM output path: {outputs.hmm_output_path(db_name)}")
@@ -353,7 +364,9 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
             ])
             df_list.append(df)
+            logger.loud_log(f"HMMsearch for {db_name} completed.")
+        logger.loud_log("HMMsearch completed.")
         if not os.path.exists(outputs.plot_outdir):
             outputs.plot_outdir.mkdir(parents=True)
@@ -361,6 +374,7 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
         if not os.path.exists(outputs.tsv_outdir):
             outputs.tsv_outdir.mkdir(parents=True)
+        logger.loud_log("Consolidating results.")
         # Combine all the dataframes in the list
         combined_df = pl.concat(df_list, how='vertical_relaxed')
@@ -379,6 +393,8 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
             logger.loud_log("No hits found by RdRpCATCH. Exiting.")
             return None
+        # Generate upset plot
+        logger.loud_log("Generating plots.")
         if len(db_name_list) > 1:
             if verbose:
@@ -411,6 +427,8 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
         combined_set = set.union(*[value for value in set_dict.values()])
         translated_combined_set = set.union(*[value for value in translated_set_dict.values()])
+        logger.loud_log("Extracting RdRp contigs from the input file.")
         # Write a fasta file with all the contigs
         if not os.path.exists(outputs.fasta_output_dir):
             outputs.fasta_output_dir.mkdir(parents=True)
@@ -430,25 +448,21 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
         if verbose:
             logger.loud_log(f"Contigs written to: {outputs.fasta_nuc_out_path}")
             logger.loud_log(f"Translated contigs written to: {outputs.fasta_prot_out_path}")
+            logger.loud_log(f"Trimmed contigs written to: {outputs.fasta_trimmed_out_path}")
         else:
             logger.silent_log(f"Contigs written to: {outputs.fasta_nuc_out_path}")
             logger.silent_log(f"Translated contigs written to: {outputs.fasta_prot_out_path}")
+            logger.silent_log(f"Trimmed contigs written to: {outputs.fasta_trimmed_out_path}")
         if not os.path.exists(outputs.mmseqs_tax_output_dir):
             outputs.mmseqs_tax_output_dir.mkdir(parents=True)
-        if verbose:
-            logger.loud_log("Running mmseqs easy-taxonomy for taxonomic annotation.")
-        else:
-            logger.silent_log("Running mmseqs easy-taxonomy for taxonomic annotation.")
+        logger.loud_log("Running mmseqs easy-taxonomy for taxonomic annotation.")
         mmseqs_tax.mmseqs(outputs.fasta_prot_out_path, mmseqs_db_path, outputs.mmseqs_tax_output_prefix,
                           outputs.mmseqs_tax_output_dir, 7, cpus, outputs.mmseqs_tax_log_path).run_mmseqs_easy_tax_lca()
-        if verbose:
-            logger.loud_log("Running mmseqs easy-search for taxonomic annotation.")
-        else:
-            logger.silent_log("Running mmseqs easy-search for taxonomic annotation.")
+        logger.loud_log("Running mmseqs easy-search for taxonomic annotation.")
         if not os.path.exists(outputs.mmseqs_e_search_output_dir):
             outputs.mmseqs_e_search_output_dir.mkdir(parents=True)
@@ -460,18 +474,17 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
         utils.mmseqs_parser(outputs.mmseqs_tax_output_lca_path, outputs.mmseqs_e_search_output_path).tax_to_rdrpcatch(
             outputs.rdrpcatch_output_tsv, outputs.extended_rdrpcatch_output, seq_type)
+        logger.loud_log("Taxonomic annotation completed.")
     elif seq_type == 'prot':
-        if verbose:
-            logger.loud_log("Protein sequence detected.")
-        else:
-            logger.silent_log("Protein sequence detected.")
+        logger.loud_log("Protein sequence detected.")
         set_dict = {}
         df_list = []
         for db_name,db_path in zip (db_name_list, db_path_list):
+            logger.loud_log(f"Running HMMsearch for {db_name} database.")
             if verbose:
                 logger.loud_log(f"HMM output path: {outputs.hmm_output_path(db_name)}")
@@ -519,12 +532,17 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
             ])
             df_list.append(df)
+            logger.loud_log(f"HMMsearch for {db_name} completed.")
+        logger.loud_log("HMMsearch completed.")
         if not os.path.exists(outputs.plot_outdir):
             outputs.plot_outdir.mkdir(parents=True)
         if not os.path.exists(outputs.tsv_outdir):
             outputs.tsv_outdir.mkdir(parents=True)
+        logger.loud_log("Consolidating results.")
         # Combine all the dataframes in the list
         combined_df = pl.concat(df_list, how='vertical_relaxed')
@@ -542,6 +560,9 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
             logger.loud_log("No hits found by RdRpCATCH. Exiting.")
             return None
+        # Generate upset plot
+        logger.loud_log("Generating plots.")
         if len(db_name_list) > 1:
             if verbose:
                 logger.loud_log("Generating upset plot.")
@@ -574,6 +595,9 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
         # Extract all the contigs
         combined_set = set.union(*[value for value in set_dict.values()])
         # Write a fasta file with all the contigs
+        logger.loud_log("Extracting RdRp contigs from the input file.")
         if not os.path.exists(outputs.fasta_output_dir):
             outputs.fasta_output_dir.mkdir(parents=True)
@@ -600,11 +624,7 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
         if not os.path.exists(outputs.mmseqs_tax_output_dir):
             outputs.mmseqs_tax_output_dir.mkdir(parents=True)
-        if verbose:
-            logger.loud_log("Running mmseqs easy-taxonomy for taxonomic annotation.")
-        else:
-            logger.silent_log("Running mmseqs easy-taxonomy for taxonomic annotation.")
+        logger.loud_log("Running mmseqs easy-taxonomy for taxonomic annotation.")
         mmseqs_tax.mmseqs(outputs.fasta_prot_out_path, mmseqs_db_path, outputs.mmseqs_tax_output_prefix,
                           outputs.mmseqs_tax_output_dir, 7, cpus, outputs.mmseqs_tax_log_path).run_mmseqs_easy_tax_lca()
@@ -612,10 +632,7 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
         if not os.path.exists(outputs.mmseqs_e_search_output_dir):
             outputs.mmseqs_e_search_output_dir.mkdir(parents=True)
-        if verbose:
-            logger.loud_log("Running mmseqs easy-search for taxonomic annotation.")
-        else:
-            logger.silent_log("Running mmseqs easy-search for taxonomic annotation.")
+        logger.loud_log("Running mmseqs easy-search for taxonomic annotation.")
         mmseqs_tax.mmseqs(outputs.fasta_prot_out_path, mmseqs_db_path, outputs.mmseqs_e_search_output_dir,
                           outputs.mmseqs_e_search_output_path, 7, cpus, outputs.mmseqs_e_search_log_path).run_mmseqs_e_search()
@@ -624,11 +641,7 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
             outputs.rdrpcatch_output_tsv, outputs.extended_rdrpcatch_output, seq_type)
-    end_time = logger.stop_timer(start_time, verbose)
-    if verbose:
-        logger.loud_log(f"Total Runtime: {end_time}")
-    else:
-        logger.silent_log(f"Total Runtime: {end_time}")
@@ -657,6 +670,11 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
         else:
             logger.silent_log(f"Results bundled into: {archive_path}")
+    end_time = logger.stop_timer(start_time, verbose)
+    logger.loud_log(f"Total Runtime: {end_time}")
+    logger.loud_log("RdRpCATCH completed successfully.")
     return outputs.extended_rdrpcatch_output

{rdrpcatch-0.0.5.dist-info → rdrpcatch-0.0.7.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rdrpcatch
-Version: 0.0.5
+Version: 0.0.7
 Dynamic: Summary
 Project-URL: Home, https://github.com/dimitris-karapliafis/RdRpCATCH
 Project-URL: Source, https://github.com/dimitris-karapliafis/RdRpCATCH
@@ -36,7 +36,7 @@ that were positive for each sequence across all pHMM databases, and taxonomic in
 ** The tool has been modified to use [rolypoly](https://code.jgi.doe.gov/UNeri/rolypoly) code/approaches **
-![rdrpcatch_flowchart_v0.png](images%2Frdrpcatch_flowchart_v0.png)
+![rdrpcatch_flowchart_v0.png](images%2Frdrpcatch_illustration.png)
 ### Supported databases
 - NeoRdRp <sup>1</sup> : 1182 pHMMs
@@ -133,25 +133,25 @@ Command to download pre-compiled databases from Zenodo. If the databases are alr
 ### rdrpcatch scan:
 Search a given input using selected RdRp databases.
-| Argument | Short Flag | Type | Description |
-|----------|------------|------|-------------|
-| `--input` | `-i` | FILE | Path to the input FASTA file. [required] |
-| `--output` | `-o` | DIRECTORY | Path to the output directory. [required] |
-| `--db_dir` | `-db_dir` | PATH | Path to the directory containing RdRpCATCH databases. [required] |
-| `--db_options` | `-dbs` | TEXT | Comma-separated list of databases to search against. Valid options: RVMT, NeoRdRp, NeoRdRp.2.1, TSA_Olendraite_fam, TSA_Olendraite_gen, RDRP-scan, Lucaprot, all |
-| `--custom-dbs` | | PATH | Path to directory containing custom MSAs/pHMM files to use as additional databases |
-| `--seq_type` | `-seq_type` | TEXT | Type of sequence to search against: (prot,nuc) Default: unknown |
-| `--verbose` | `-v` | FLAG | Print verbose output. |
-| `--evalue` | `-e` | FLOAT | E-value threshold for HMMsearch. (default: 1e-5) |
-| `--incevalue` | `-incE` | FLOAT | Inclusion E-value threshold for HMMsearch. (default: 1e-5) |
-| `--domevalue` | `-domE` | FLOAT | Domain E-value threshold for HMMsearch. (default: 1e-5) |
-| `--incdomevalue` | `-incdomE` | FLOAT | Inclusion domain E-value threshold for HMMsearch. (default: 1e-5) |
-| `--zvalue` | `-z` | INTEGER | Number of sequences to search against. (default: 1000000) |
-| `--cpus` | `-cpus` | INTEGER | Number of CPUs to use for HMMsearch. (default: 1) |
-| `--length_thr` | `-length_thr` | INTEGER | Minimum length threshold for seqkit seq. (default: 400) |
-| `--gen_code` | `-gen_code` | INTEGER | Genetic code to use for translation. (default: 1) |
-| `--bundle` | `-bundle` |  |  Bundle the output files into a single archive. (default: False) |
-| `--keep_tmp` | `-keep_tmp` |  | Keep the temporary files generated during the analysis. (default: False) |
+| Argument | Short Flag | Type | Description                                                                                                                                                                    |
+|----------|------------|------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `--input` | `-i` | FILE | Path to the input FASTA file. [required]                                                                                                                                       |
+| `--output` | `-o` | DIRECTORY | Path to the output directory. [required]                                                                                                                                       |
+| `--db_dir` | `-db_dir` | PATH | Path to the directory containing RdRpCATCH databases. [required]                                                                                                               |
+| `--db_options` | `-dbs` | TEXT | Comma-separated list of databases to search against. Valid options: RVMT, NeoRdRp, NeoRdRp.2.1, TSA_Olendraite_fam, TSA_Olendraite_gen, RDRP-scan, Lucaprot_HMM,Zayed_HMM, all |
+| `--custom-dbs` | | PATH | Path to directory containing custom MSAs/pHMM files to use as additional databases                                                                                             |
+| `--seq_type` | `-seq_type` | TEXT | Type of sequence to search against: (prot,nuc) Default: unknown                                                                                                                |
+| `--verbose` | `-v` | FLAG | Print verbose output.                                                                                                                                                          |
+| `--evalue` | `-e` | FLOAT | E-value threshold for HMMsearch. (default: 1e-5)                                                                                                                               |
+| `--incevalue` | `-incE` | FLOAT | Inclusion E-value threshold for HMMsearch. (default: 1e-5)                                                                                                                     |
+| `--domevalue` | `-domE` | FLOAT | Domain E-value threshold for HMMsearch. (default: 1e-5)                                                                                                                        |
+| `--incdomevalue` | `-incdomE` | FLOAT | Inclusion domain E-value threshold for HMMsearch. (default: 1e-5)                                                                                                              |
+| `--zvalue` | `-z` | INTEGER | Number of sequences to search against. (default: 1000000)                                                                                                                      |
+| `--cpus` | `-cpus` | INTEGER | Number of CPUs to use for HMMsearch. (default: 1)                                                                                                                              |
+| `--length_thr` | `-length_thr` | INTEGER | Minimum length threshold for seqkit seq. (default: 400)                                                                                                                        |
+| `--gen_code` | `-gen_code` | INTEGER | Genetic code to use for translation. (default: 1)                                                                                                                              |
+| `--bundle` | `-bundle` |  | Bundle the output files into a single archive. (default: False)                                                                                                                |
+| `--keep_tmp` | `-keep_tmp` |  | Keep the temporary files generated during the analysis. (default: False)                                                                                                       |
@@ -205,7 +205,6 @@ Dimitris Karapliafis (dimitris.karapliafis@wur.nl), potentially via slack/teams
 ##TODO:
 - [ ] loud logging is linking to the utils.py file, not the actual line of code causing the error.
-- [ ] Add `overwrite` flag
 - [ ] drop `db_dir` argument and use global/environment/config variable that is set after running the `download` command

{rdrpcatch-0.0.5.dist-info → rdrpcatch-0.0.7.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,7 @@
 rdrpcatch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-rdrpcatch/rdrpcatch_wrapper.py,sha256=bZ5w4NuTlCSUsCx9baEtJSk7jGiyp-6XthO80IKaMXI,30564
+rdrpcatch/rdrpcatch_wrapper.py,sha256=X-U0CKQWHwybLIdWvaFZGEj-v0oTUnBv2PbiLAdu8s4,31573
 rdrpcatch/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-rdrpcatch/cli/args.py,sha256=2E2gXY42hNasUP94HmPxpgVCA1glk_oN7D5ftbu6W2c,15805
+rdrpcatch/cli/args.py,sha256=DX7gfESWi4j1CNpALAEG45JV_b5KkU1LAJj2FDb8J5g,15963
 rdrpcatch/rdrpcatch_scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 rdrpcatch/rdrpcatch_scripts/fetch_dbs.py,sha256=e9ShColfLgBvWSZpGOvY3zKhEgIg3rw1IIV__KX7N-g,11054
 rdrpcatch/rdrpcatch_scripts/format_pyhmmer_out.py,sha256=2_ERXFQK2lpVReWl0jwQdnKIObv_zq07uFJOzGsTHlo,25025
@@ -12,8 +12,8 @@ rdrpcatch/rdrpcatch_scripts/plot.py,sha256=Y1mZL7rkKHFKEs2D7T2Qj2kpfiORmFwRLq1LY
 rdrpcatch/rdrpcatch_scripts/run_pyhmmer.py,sha256=9zcMzaIwQ4_-NgYzG9kejxOBaDi-gbzaqpvZti8ZXA4,9008
 rdrpcatch/rdrpcatch_scripts/run_seqkit.py,sha256=5y7DtJ6NLa4sRoBQOcjBfczKlqG_LibNrEqNmKLrHu0,4361
 rdrpcatch/rdrpcatch_scripts/utils.py,sha256=jvpyPxchAMn6BeLV7HOFECSY_a3nbkxDBBL8tunmM8A,16938
-rdrpcatch-0.0.5.dist-info/METADATA,sha256=X3wolDh_nUrk7caPG4jFMvsF7FHZCvYuGzjPLZnC4VA,14004
-rdrpcatch-0.0.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-rdrpcatch-0.0.5.dist-info/entry_points.txt,sha256=uiyoPO41jNz_KVOt2JdPak9NbVei-D8WQ6saMeMBFpE,53
-rdrpcatch-0.0.5.dist-info/licenses/LICENSE,sha256=3jm5vKRMIaiETEFfNN34-oyWUShxZtmDmL38PNAwlUI,1120
-rdrpcatch-0.0.5.dist-info/RECORD,,
+rdrpcatch-0.0.7.dist-info/METADATA,sha256=BU-V7TAZcYQC5L3KuX_N_iH_l7Q77go7ZF9-1jYRrQE,16219
+rdrpcatch-0.0.7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+rdrpcatch-0.0.7.dist-info/entry_points.txt,sha256=uiyoPO41jNz_KVOt2JdPak9NbVei-D8WQ6saMeMBFpE,53
+rdrpcatch-0.0.7.dist-info/licenses/LICENSE,sha256=3jm5vKRMIaiETEFfNN34-oyWUShxZtmDmL38PNAwlUI,1120
+rdrpcatch-0.0.7.dist-info/RECORD,,

{rdrpcatch-0.0.5.dist-info → rdrpcatch-0.0.7.dist-info}/WHEEL RENAMED Viewed

File without changes

{rdrpcatch-0.0.5.dist-info → rdrpcatch-0.0.7.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{rdrpcatch-0.0.5.dist-info → rdrpcatch-0.0.7.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

rdrpcatch 0.0.5__py3-none-any.whl → 0.0.7__py3-none-any.whl

rdrpcatch 0.0.5py3-none-any.whl → 0.0.7py3-none-any.whl