rdrpcatch 0.0.1.post1__py3-none-any.whl → 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -491,11 +491,10 @@ class hmmsearch_output_writter:
491
491
  :rtype: list
492
492
  """
493
493
  # Convert the path to use combined.tsv instead of rdrpcatch_output.tsv
494
- combined_file = str(Path(rdrpcatch_out).parent / Path(rdrpcatch_out))
495
494
  if self.logger:
496
- self.logger.silent_log(f"Reading coordinates from {combined_file}")
495
+ self.logger.silent_log(f"Reading coordinates from {rdrpcatch_out}")
497
496
 
498
- df = pl.read_csv(combined_file, separator='\t')
497
+ df = pl.read_csv(rdrpcatch_out, separator='\t')
499
498
  if self.logger:
500
499
  self.logger.silent_log(f"Found {len(df)} rows in combined file")
501
500
  self.logger.silent_log(f"Column names: {df.columns}")
@@ -139,7 +139,7 @@ class rdrpcatch_output:
139
139
  return self.fasta_output_dir / f"{self.prefix}_full_aminoacid_contigs.fasta"
140
140
 
141
141
  @property
142
- def rdrpcatch_output(self):
142
+ def rdrpcatch_output_tsv(self):
143
143
  return self.tsv_outdir / f"{self.prefix}_rdrpcatch_output.tsv"
144
144
 
145
145
  @property
@@ -423,8 +423,8 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
423
423
  if not os.path.exists(outputs.gff_output_dir):
424
424
  outputs.gff_output_dir.mkdir(parents=True)
425
425
  hmm_writer = format_pyhmmer_out.hmmsearch_output_writter(logger)
426
- hmm_writer.write_hmmsearch_hits(outputs.combined_tsv_path, seq_type, outputs.rdrpcatch_output, outputs.gff_output_path)
427
- rdrp_coords_list = hmm_writer.get_rdrp_coords(outputs.rdrpcatch_output,seq_type)
426
+ hmm_writer.write_hmmsearch_hits(outputs.combined_tsv_path, seq_type, outputs.rdrpcatch_output_tsv, outputs.gff_output_path)
427
+ rdrp_coords_list = hmm_writer.get_rdrp_coords(outputs.rdrpcatch_output_tsv,seq_type)
428
428
  utils.fasta(outputs.seqkit_translate_output_path, logger).write_fasta_coords(rdrp_coords_list,outputs.fasta_trimmed_out_path, seq_type)
429
429
 
430
430
  if verbose:
@@ -458,7 +458,7 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
458
458
  outputs.mmseqs_e_search_output_path, 7, cpus, outputs.mmseqs_e_search_log_path).run_mmseqs_e_search()
459
459
 
460
460
  utils.mmseqs_parser(outputs.mmseqs_tax_output_lca_path, outputs.mmseqs_e_search_output_path).tax_to_rdrpcatch(
461
- outputs.rdrpcatch_output, outputs.extended_rdrpcatch_output, seq_type)
461
+ outputs.rdrpcatch_output_tsv, outputs.extended_rdrpcatch_output, seq_type)
462
462
 
463
463
 
464
464
  elif seq_type == 'prot':
@@ -589,8 +589,8 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
589
589
  outputs.gff_output_dir.mkdir(parents=True)
590
590
 
591
591
  hmm_writer = format_pyhmmer_out.hmmsearch_output_writter(logger)
592
- hmm_writer.write_hmmsearch_hits(outputs.combined_tsv_path, seq_type, outputs.rdrpcatch_output, outputs.gff_output_path)
593
- rdrp_coords_list = hmm_writer.get_rdrp_coords(outputs.rdrpcatch_output,seq_type)
592
+ hmm_writer.write_hmmsearch_hits(outputs.combined_tsv_path, seq_type, outputs.rdrpcatch_output_tsv, outputs.gff_output_path)
593
+ rdrp_coords_list = hmm_writer.get_rdrp_coords(outputs.rdrpcatch_output_tsv,seq_type)
594
594
  utils.fasta(input_file, logger).write_fasta_coords(rdrp_coords_list,outputs.fasta_trimmed_out_path, seq_type)
595
595
 
596
596
  if verbose:
@@ -622,7 +622,7 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
622
622
  outputs.mmseqs_e_search_output_path, 7, cpus, outputs.mmseqs_e_search_log_path).run_mmseqs_e_search()
623
623
 
624
624
  utils.mmseqs_parser(outputs.mmseqs_tax_output_lca_path, outputs.mmseqs_e_search_output_path).tax_to_rdrpcatch(
625
- outputs.rdrpcatch_output, outputs.extended_rdrpcatch_output, seq_type)
625
+ outputs.rdrpcatch_output_tsv, outputs.extended_rdrpcatch_output, seq_type)
626
626
 
627
627
 
628
628
  end_time = logger.stop_timer(start_time, verbose)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rdrpcatch
3
- Version: 0.0.1.post1
3
+ Version: 0.0.3
4
4
  Dynamic: Summary
5
5
  Project-URL: Home, https://github.com/dimitris-karapliafis/RdRpCATCH
6
6
  Project-URL: Source, https://github.com/dimitris-karapliafis/RdRpCATCH
@@ -38,7 +38,7 @@ that were positive for each sequence across all pHMM databases, and taxonomic in
38
38
 
39
39
  ![rdrpcatch_flowchart_v0.png](images%2Frdrpcatch_flowchart_v0.png)
40
40
 
41
- Supported databases
41
+ ### Supported databases
42
42
  - NeoRdRp <sup>1</sup> : 1182 pHMMs
43
43
  - NeoRdRp2 <sup>2</sup>: 19394 pHMMs
44
44
  - RVMT <sup>3</sup>: 710 pHMMs
@@ -83,7 +83,7 @@ Create a new conda environment and install the dependencies:
83
83
  ```bash
84
84
  conda create -n rdrpcatch python=3.12
85
85
  conda activate rdrpcatch
86
- conda install -c bioconda mmseqs2==17.17.b804f seqkit==2.10.0
86
+ conda install -c bioconda mmseqs2==17.b804f seqkit==2.10.0
87
87
  ```
88
88
  Install the tool from pip:
89
89
  ```bash
@@ -98,7 +98,8 @@ rdrpcatch download --destination_dir path/to/store/databases
98
98
  ```
99
99
 
100
100
  * Note 1: The databases are large files and may take some time to download (~ 3 GB).
101
- * Note 2: The databases are stored in the specified directory, and the path is required to run RdRpCATCH.
101
+ * Note 2: The databases are stored in the specified directory, and the path is required to run RdRpCATCH.
102
+ * Note 3: If you encounter an SSL error while downloading, please try again. The error seems to appear sporadically during testing, and a simple re-initiation of the downloading process seems to fix it.
102
103
 
103
104
  ## Usage
104
105
  RdRpCATCH can be used as a CLI tool as follows:
@@ -163,7 +164,7 @@ rdrpcatch scan will create a folder with the following structure:
163
164
  | `{prefix}_rdrpcatch_fasta` | A directory containing the sequences that were identified as RdRp sequences. |
164
165
  | `{prefix}_rdrpcatch_plots` | A directory containing the plots generated during the analysis. |
165
166
  | `{prefix}_gff_files` | A directory containing the GFF files generated during the analysis. (For now only based on protein sequences) |
166
- | `tmp` | A directory containing temporary files generated during the analysis. (Only available if the -keep_tmp flag is used |
167
+ | `tmp` | A directory containing temporary files generated during the analysis. (Only available if the -keep_tmp flag is used )|
167
168
 
168
169
  #### Output table fields
169
170
  A summary of the results is stored in the `{prefix}_rdrpcatch_output_annotated.tsv` file, which contains the following fields:
@@ -194,10 +195,10 @@ A summary of the results is stored in the `{prefix}_rdrpcatch_output_annotated.t
194
195
  ## Citations
195
196
  Manuscript still in preparation. If you use RdRpCATCH, please cite this GitHub repository
196
197
  A precompiled version of the used databases is available at Zenodo DOI: [10.5281/zenodo.14358348](https://doi.org/10.5281/zenodo.14358348).
197
- If you use RdRpCATCH, please cite the following third party databases:
198
+ If you use RdRpCATCH, please cite the [underlying third party databases](#supported-databases) :
198
199
 
199
200
  ## Acknowledgements
200
- RdRpCATCH is a collaborative effort and we would like to thank all the authors and developers of the underling databases.
201
+ RdRpCATCH is a collaborative effort and we would like to thank all the authors and developers of the underlying databases.
201
202
 
202
203
  ## Contact
203
204
  Dimitris Karapliafis (dimitris.karapliafis@wur.nl), potentially via slack/teams or an issue in the main repo.
@@ -1,19 +1,19 @@
1
1
  rdrpcatch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- rdrpcatch/rdrpcatch_wrapper.py,sha256=PLj8KSJ2wbXVKlGhCaQEhGgoFBOMXBKQS9DnukHOgAs,30501
2
+ rdrpcatch/rdrpcatch_wrapper.py,sha256=skWDoNCTEKc_7eA6HjBIGe8jk-J1xnzU-zyOzCiA_jo,30525
3
3
  rdrpcatch/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  rdrpcatch/cli/args.py,sha256=2E2gXY42hNasUP94HmPxpgVCA1glk_oN7D5ftbu6W2c,15805
5
5
  rdrpcatch/rdrpcatch_scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  rdrpcatch/rdrpcatch_scripts/fetch_dbs.py,sha256=e9ShColfLgBvWSZpGOvY3zKhEgIg3rw1IIV__KX7N-g,11054
7
- rdrpcatch/rdrpcatch_scripts/format_pyhmmer_out.py,sha256=w4I_7W-fvuT4JmKvZmbJ07Dewm3CBQuQmpMvQutdOqo,25112
7
+ rdrpcatch/rdrpcatch_scripts/format_pyhmmer_out.py,sha256=uah2XPrGNkkeptCv_WWBz_qTn5AtDTfVm6XjbCmNO00,25033
8
8
  rdrpcatch/rdrpcatch_scripts/gui.py,sha256=he8kx_4VJWB7SVv9XSQPk0DmkOjEFIg-uGMAtDp3t-w,10576
9
9
  rdrpcatch/rdrpcatch_scripts/mmseqs_tax.py,sha256=bwzuCxu8nHQ5OC0Yr5Lyvhcyk9OWjuamInqe0T0lc38,3809
10
- rdrpcatch/rdrpcatch_scripts/paths.py,sha256=Nq08P8GGPKPrzX6u4wQ2Xwn-kQP-pue_yOGMuRjrLdY,4706
10
+ rdrpcatch/rdrpcatch_scripts/paths.py,sha256=roTZ2QPF4Fii7jtHkS9I6INJg1Vu78Dc_ieQGKjOCP4,4710
11
11
  rdrpcatch/rdrpcatch_scripts/plot.py,sha256=Y1mZL7rkKHFKEs2D7T2Qj2kpfiORmFwRLq1LYWqwcJI,5938
12
12
  rdrpcatch/rdrpcatch_scripts/run_pyhmmer.py,sha256=9zcMzaIwQ4_-NgYzG9kejxOBaDi-gbzaqpvZti8ZXA4,9008
13
13
  rdrpcatch/rdrpcatch_scripts/run_seqkit.py,sha256=5y7DtJ6NLa4sRoBQOcjBfczKlqG_LibNrEqNmKLrHu0,4361
14
14
  rdrpcatch/rdrpcatch_scripts/utils.py,sha256=Wx1GXhAPBfJw7x67sOu7WclZzMo0N3O-hxNYTVxc3v4,16780
15
- rdrpcatch-0.0.1.post1.dist-info/METADATA,sha256=u56mpzcXGlFbZN3Gxx7fmF6KhgT6usc9HGM0xDi2N9Q,13774
16
- rdrpcatch-0.0.1.post1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
17
- rdrpcatch-0.0.1.post1.dist-info/entry_points.txt,sha256=uiyoPO41jNz_KVOt2JdPak9NbVei-D8WQ6saMeMBFpE,53
18
- rdrpcatch-0.0.1.post1.dist-info/licenses/LICENSE,sha256=3jm5vKRMIaiETEFfNN34-oyWUShxZtmDmL38PNAwlUI,1120
19
- rdrpcatch-0.0.1.post1.dist-info/RECORD,,
15
+ rdrpcatch-0.0.3.dist-info/METADATA,sha256=8tUKJfUQb2uEdha9EQuhI1OyEjXnWD4byUM6lzFBlZE,14000
16
+ rdrpcatch-0.0.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
17
+ rdrpcatch-0.0.3.dist-info/entry_points.txt,sha256=uiyoPO41jNz_KVOt2JdPak9NbVei-D8WQ6saMeMBFpE,53
18
+ rdrpcatch-0.0.3.dist-info/licenses/LICENSE,sha256=3jm5vKRMIaiETEFfNN34-oyWUShxZtmDmL38PNAwlUI,1120
19
+ rdrpcatch-0.0.3.dist-info/RECORD,,