rdrpcatch 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -491,11 +491,10 @@ class hmmsearch_output_writter:
491
491
  :rtype: list
492
492
  """
493
493
  # Convert the path to use combined.tsv instead of rdrpcatch_output.tsv
494
- combined_file = str(Path(rdrpcatch_out).parent / Path(rdrpcatch_out))
495
494
  if self.logger:
496
- self.logger.silent_log(f"Reading coordinates from {combined_file}")
495
+ self.logger.silent_log(f"Reading coordinates from {rdrpcatch_out}")
497
496
 
498
- df = pl.read_csv(combined_file, separator='\t')
497
+ df = pl.read_csv(rdrpcatch_out, separator='\t')
499
498
  if self.logger:
500
499
  self.logger.silent_log(f"Found {len(df)} rows in combined file")
501
500
  self.logger.silent_log(f"Column names: {df.columns}")
@@ -139,7 +139,7 @@ class rdrpcatch_output:
139
139
  return self.fasta_output_dir / f"{self.prefix}_full_aminoacid_contigs.fasta"
140
140
 
141
141
  @property
142
- def rdrpcatch_output(self):
142
+ def rdrpcatch_output_tsv(self):
143
143
  return self.tsv_outdir / f"{self.prefix}_rdrpcatch_output.tsv"
144
144
 
145
145
  @property
@@ -423,8 +423,8 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
423
423
  if not os.path.exists(outputs.gff_output_dir):
424
424
  outputs.gff_output_dir.mkdir(parents=True)
425
425
  hmm_writer = format_pyhmmer_out.hmmsearch_output_writter(logger)
426
- hmm_writer.write_hmmsearch_hits(outputs.combined_tsv_path, seq_type, outputs.rdrpcatch_output, outputs.gff_output_path)
427
- rdrp_coords_list = hmm_writer.get_rdrp_coords(outputs.rdrpcatch_output,seq_type)
426
+ hmm_writer.write_hmmsearch_hits(outputs.combined_tsv_path, seq_type, outputs.rdrpcatch_output_tsv, outputs.gff_output_path)
427
+ rdrp_coords_list = hmm_writer.get_rdrp_coords(outputs.rdrpcatch_output_tsv,seq_type)
428
428
  utils.fasta(outputs.seqkit_translate_output_path, logger).write_fasta_coords(rdrp_coords_list,outputs.fasta_trimmed_out_path, seq_type)
429
429
 
430
430
  if verbose:
@@ -589,8 +589,8 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
589
589
  outputs.gff_output_dir.mkdir(parents=True)
590
590
 
591
591
  hmm_writer = format_pyhmmer_out.hmmsearch_output_writter(logger)
592
- hmm_writer.write_hmmsearch_hits(outputs.combined_tsv_path, seq_type, outputs.rdrpcatch_output, outputs.gff_output_path)
593
- rdrp_coords_list = hmm_writer.get_rdrp_coords(outputs.rdrpcatch_output,seq_type)
592
+ hmm_writer.write_hmmsearch_hits(outputs.combined_tsv_path, seq_type, outputs.rdrpcatch_output_tsv, outputs.gff_output_path)
593
+ rdrp_coords_list = hmm_writer.get_rdrp_coords(outputs.rdrpcatch_output_tsv,seq_type)
594
594
  utils.fasta(input_file, logger).write_fasta_coords(rdrp_coords_list,outputs.fasta_trimmed_out_path, seq_type)
595
595
 
596
596
  if verbose:
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rdrpcatch
3
- Version: 0.0.1
3
+ Version: 0.0.2
4
4
  Dynamic: Summary
5
5
  Project-URL: Home, https://github.com/dimitris-karapliafis/RdRpCATCH
6
6
  Project-URL: Source, https://github.com/dimitris-karapliafis/RdRpCATCH
7
7
  Author-email: Dimitris Karapliafis <dimitris.karapliafis@wur.nl>, Uri Neri <uneri@lbl.gov>, RdRpCATCH contributors <dimitris.karapliafis@wur.nl>
8
8
  License: MIT
9
- License-File: LICENCE
9
+ License-File: LICENSE
10
10
  Requires-Python: >=3.12
11
11
  Requires-Dist: altair==5.5.0
12
12
  Requires-Dist: matplotlib==3.10.1
@@ -38,7 +38,7 @@ that were positive for each sequence across all pHMM databases, and taxonomic in
38
38
 
39
39
  ![rdrpcatch_flowchart_v0.png](images%2Frdrpcatch_flowchart_v0.png)
40
40
 
41
- Supported databases
41
+ ### Supported databases
42
42
  - NeoRdRp <sup>1</sup> : 1182 pHMMs
43
43
  - NeoRdRp2 <sup>2</sup>: 19394 pHMMs
44
44
  - RVMT <sup>3</sup>: 710 pHMMs
@@ -58,11 +58,6 @@ Supported databases
58
58
 
59
59
  ## Installation
60
60
 
61
- ### Installation instructions for testing phase
62
-
63
- RdRpCATCH will be available as a bioconda package soon. For the testing phase, we provide a tarball and a .yaml file to
64
- install the tool and its dependencies. The .tar.bz2 is created for Linux systems but should work on MacOS as well.
65
- (Windows is not supported)
66
61
 
67
62
  #### Prerequisites
68
63
  For the installation process, conda is required. If you don't have conda installed, you can find instructions on how to
@@ -72,10 +67,7 @@ Mamba is a faster alternative to conda. If you have it installed, you can use it
72
67
  #### Installation steps
73
68
 
74
69
  The package is available as a bioconda package. You can install it using the following command:
75
- ```bash
76
- conda install -c bioconda rdrpcatch
77
- ```
78
- or
70
+
79
71
  ```bash
80
72
  conda env create rdrpcatch -c bioconda rdrpcatch
81
73
  ```
@@ -91,7 +83,7 @@ Create a new conda environment and install the dependencies:
91
83
  ```bash
92
84
  conda create -n rdrpcatch python=3.12
93
85
  conda activate rdrpcatch
94
- conda install -c bioconda mmseqs2==17.17.b804f seqkit==2.10.0
86
+ conda install -c bioconda mmseqs2==17.b804f seqkit==2.10.0
95
87
  ```
96
88
  Install the tool from pip:
97
89
  ```bash
@@ -106,7 +98,8 @@ rdrpcatch download --destination_dir path/to/store/databases
106
98
  ```
107
99
 
108
100
  * Note 1: The databases are large files and may take some time to download (~ 3 GB).
109
- * Note 2: The databases are stored in the specified directory, and the path is required to run RdRpCATCH.
101
+ * Note 2: The databases are stored in the specified directory, and the path is required to run RdRpCATCH.
102
+ * Note 3: If you encounter an SSL error while downloading, please try again. The error seems to appear sporadically during testing, and a simple re-initiation of the downloading process seems to fix it.
110
103
 
111
104
  ## Usage
112
105
  RdRpCATCH can be used as a CLI tool as follows:
@@ -171,7 +164,7 @@ rdrpcatch scan will create a folder with the following structure:
171
164
  | `{prefix}_rdrpcatch_fasta` | A directory containing the sequences that were identified as RdRp sequences. |
172
165
  | `{prefix}_rdrpcatch_plots` | A directory containing the plots generated during the analysis. |
173
166
  | `{prefix}_gff_files` | A directory containing the GFF files generated during the analysis. (For now only based on protein sequences) |
174
- | `tmp` | A directory containing temporary files generated during the analysis. (Only available if the -keep_tmp flag is used |
167
+ | `tmp` | A directory containing temporary files generated during the analysis. (Only available if the -keep_tmp flag is used )|
175
168
 
176
169
  #### Output table fields
177
170
  A summary of the results is stored in the `{prefix}_rdrpcatch_output_annotated.tsv` file, which contains the following fields:
@@ -202,10 +195,10 @@ A summary of the results is stored in the `{prefix}_rdrpcatch_output_annotated.t
202
195
  ## Citations
203
196
  Manuscript still in preparation. If you use RdRpCATCH, please cite this GitHub repository
204
197
  A precompiled version of the used databases is available at Zenodo DOI: [10.5281/zenodo.14358348](https://doi.org/10.5281/zenodo.14358348).
205
- If you use RdRpCATCH, please cite the following third party databases:
198
+ If you use RdRpCATCH, please cite the [underlying third party databases](#supported-databases) :
206
199
 
207
200
  ## Acknowledgements
208
- RdRpCATCH is a collaborative effort and we would like to thank all the authors and developers of the underling databases.
201
+ RdRpCATCH is a collaborative effort and we would like to thank all the authors and developers of the underlying databases.
209
202
 
210
203
  ## Contact
211
204
  Dimitris Karapliafis (dimitris.karapliafis@wur.nl), potentially via slack/teams or an issue in the main repo.
@@ -220,4 +213,4 @@ Dimitris Karapliafis (dimitris.karapliafis@wur.nl), potentially via slack/teams
220
213
  TBD up to Dimitris and Anne
221
214
 
222
215
  ## Licence
223
- [MIT](LICENCE)
216
+ [MIT](LICENSE)
@@ -1,19 +1,19 @@
1
1
  rdrpcatch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- rdrpcatch/rdrpcatch_wrapper.py,sha256=PLj8KSJ2wbXVKlGhCaQEhGgoFBOMXBKQS9DnukHOgAs,30501
2
+ rdrpcatch/rdrpcatch_wrapper.py,sha256=HrC2IqxLYFTroJwH1gDcU1QQU0gzs4YfBDzIN64Zc0A,30517
3
3
  rdrpcatch/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  rdrpcatch/cli/args.py,sha256=2E2gXY42hNasUP94HmPxpgVCA1glk_oN7D5ftbu6W2c,15805
5
5
  rdrpcatch/rdrpcatch_scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  rdrpcatch/rdrpcatch_scripts/fetch_dbs.py,sha256=e9ShColfLgBvWSZpGOvY3zKhEgIg3rw1IIV__KX7N-g,11054
7
- rdrpcatch/rdrpcatch_scripts/format_pyhmmer_out.py,sha256=w4I_7W-fvuT4JmKvZmbJ07Dewm3CBQuQmpMvQutdOqo,25112
7
+ rdrpcatch/rdrpcatch_scripts/format_pyhmmer_out.py,sha256=uah2XPrGNkkeptCv_WWBz_qTn5AtDTfVm6XjbCmNO00,25033
8
8
  rdrpcatch/rdrpcatch_scripts/gui.py,sha256=he8kx_4VJWB7SVv9XSQPk0DmkOjEFIg-uGMAtDp3t-w,10576
9
9
  rdrpcatch/rdrpcatch_scripts/mmseqs_tax.py,sha256=bwzuCxu8nHQ5OC0Yr5Lyvhcyk9OWjuamInqe0T0lc38,3809
10
- rdrpcatch/rdrpcatch_scripts/paths.py,sha256=Nq08P8GGPKPrzX6u4wQ2Xwn-kQP-pue_yOGMuRjrLdY,4706
10
+ rdrpcatch/rdrpcatch_scripts/paths.py,sha256=roTZ2QPF4Fii7jtHkS9I6INJg1Vu78Dc_ieQGKjOCP4,4710
11
11
  rdrpcatch/rdrpcatch_scripts/plot.py,sha256=Y1mZL7rkKHFKEs2D7T2Qj2kpfiORmFwRLq1LYWqwcJI,5938
12
12
  rdrpcatch/rdrpcatch_scripts/run_pyhmmer.py,sha256=9zcMzaIwQ4_-NgYzG9kejxOBaDi-gbzaqpvZti8ZXA4,9008
13
13
  rdrpcatch/rdrpcatch_scripts/run_seqkit.py,sha256=5y7DtJ6NLa4sRoBQOcjBfczKlqG_LibNrEqNmKLrHu0,4361
14
14
  rdrpcatch/rdrpcatch_scripts/utils.py,sha256=Wx1GXhAPBfJw7x67sOu7WclZzMo0N3O-hxNYTVxc3v4,16780
15
- rdrpcatch-0.0.1.dist-info/METADATA,sha256=LMx68xrBacLt8cml_tHk6F-7_Uvr3KOHmhyZOD38joA,14131
16
- rdrpcatch-0.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
17
- rdrpcatch-0.0.1.dist-info/entry_points.txt,sha256=uiyoPO41jNz_KVOt2JdPak9NbVei-D8WQ6saMeMBFpE,53
18
- rdrpcatch-0.0.1.dist-info/licenses/LICENCE,sha256=3jm5vKRMIaiETEFfNN34-oyWUShxZtmDmL38PNAwlUI,1120
19
- rdrpcatch-0.0.1.dist-info/RECORD,,
15
+ rdrpcatch-0.0.2.dist-info/METADATA,sha256=JtS1E35XYcWSNu98shFxcpirpz021Ja4pxjjx0Izyz0,14000
16
+ rdrpcatch-0.0.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
17
+ rdrpcatch-0.0.2.dist-info/entry_points.txt,sha256=uiyoPO41jNz_KVOt2JdPak9NbVei-D8WQ6saMeMBFpE,53
18
+ rdrpcatch-0.0.2.dist-info/licenses/LICENSE,sha256=3jm5vKRMIaiETEFfNN34-oyWUShxZtmDmL38PNAwlUI,1120
19
+ rdrpcatch-0.0.2.dist-info/RECORD,,