rdrpcatch 0.0.1.post1__py3-none-any.whl → 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdrpcatch/rdrpcatch_scripts/format_pyhmmer_out.py +2 -3
- rdrpcatch/rdrpcatch_scripts/paths.py +1 -1
- rdrpcatch/rdrpcatch_wrapper.py +4 -4
- {rdrpcatch-0.0.1.post1.dist-info → rdrpcatch-0.0.2.dist-info}/METADATA +8 -7
- {rdrpcatch-0.0.1.post1.dist-info → rdrpcatch-0.0.2.dist-info}/RECORD +8 -8
- {rdrpcatch-0.0.1.post1.dist-info → rdrpcatch-0.0.2.dist-info}/WHEEL +0 -0
- {rdrpcatch-0.0.1.post1.dist-info → rdrpcatch-0.0.2.dist-info}/entry_points.txt +0 -0
- {rdrpcatch-0.0.1.post1.dist-info → rdrpcatch-0.0.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -491,11 +491,10 @@ class hmmsearch_output_writter:
|
|
|
491
491
|
:rtype: list
|
|
492
492
|
"""
|
|
493
493
|
# Convert the path to use combined.tsv instead of rdrpcatch_output.tsv
|
|
494
|
-
combined_file = str(Path(rdrpcatch_out).parent / Path(rdrpcatch_out))
|
|
495
494
|
if self.logger:
|
|
496
|
-
self.logger.silent_log(f"Reading coordinates from {
|
|
495
|
+
self.logger.silent_log(f"Reading coordinates from {rdrpcatch_out}")
|
|
497
496
|
|
|
498
|
-
df = pl.read_csv(
|
|
497
|
+
df = pl.read_csv(rdrpcatch_out, separator='\t')
|
|
499
498
|
if self.logger:
|
|
500
499
|
self.logger.silent_log(f"Found {len(df)} rows in combined file")
|
|
501
500
|
self.logger.silent_log(f"Column names: {df.columns}")
|
|
@@ -139,7 +139,7 @@ class rdrpcatch_output:
|
|
|
139
139
|
return self.fasta_output_dir / f"{self.prefix}_full_aminoacid_contigs.fasta"
|
|
140
140
|
|
|
141
141
|
@property
|
|
142
|
-
def
|
|
142
|
+
def rdrpcatch_output_tsv(self):
|
|
143
143
|
return self.tsv_outdir / f"{self.prefix}_rdrpcatch_output.tsv"
|
|
144
144
|
|
|
145
145
|
@property
|
rdrpcatch/rdrpcatch_wrapper.py
CHANGED
|
@@ -423,8 +423,8 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
423
423
|
if not os.path.exists(outputs.gff_output_dir):
|
|
424
424
|
outputs.gff_output_dir.mkdir(parents=True)
|
|
425
425
|
hmm_writer = format_pyhmmer_out.hmmsearch_output_writter(logger)
|
|
426
|
-
hmm_writer.write_hmmsearch_hits(outputs.combined_tsv_path, seq_type, outputs.
|
|
427
|
-
rdrp_coords_list = hmm_writer.get_rdrp_coords(outputs.
|
|
426
|
+
hmm_writer.write_hmmsearch_hits(outputs.combined_tsv_path, seq_type, outputs.rdrpcatch_output_tsv, outputs.gff_output_path)
|
|
427
|
+
rdrp_coords_list = hmm_writer.get_rdrp_coords(outputs.rdrpcatch_output_tsv,seq_type)
|
|
428
428
|
utils.fasta(outputs.seqkit_translate_output_path, logger).write_fasta_coords(rdrp_coords_list,outputs.fasta_trimmed_out_path, seq_type)
|
|
429
429
|
|
|
430
430
|
if verbose:
|
|
@@ -589,8 +589,8 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
589
589
|
outputs.gff_output_dir.mkdir(parents=True)
|
|
590
590
|
|
|
591
591
|
hmm_writer = format_pyhmmer_out.hmmsearch_output_writter(logger)
|
|
592
|
-
hmm_writer.write_hmmsearch_hits(outputs.combined_tsv_path, seq_type, outputs.
|
|
593
|
-
rdrp_coords_list = hmm_writer.get_rdrp_coords(outputs.
|
|
592
|
+
hmm_writer.write_hmmsearch_hits(outputs.combined_tsv_path, seq_type, outputs.rdrpcatch_output_tsv, outputs.gff_output_path)
|
|
593
|
+
rdrp_coords_list = hmm_writer.get_rdrp_coords(outputs.rdrpcatch_output_tsv,seq_type)
|
|
594
594
|
utils.fasta(input_file, logger).write_fasta_coords(rdrp_coords_list,outputs.fasta_trimmed_out_path, seq_type)
|
|
595
595
|
|
|
596
596
|
if verbose:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rdrpcatch
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.2
|
|
4
4
|
Dynamic: Summary
|
|
5
5
|
Project-URL: Home, https://github.com/dimitris-karapliafis/RdRpCATCH
|
|
6
6
|
Project-URL: Source, https://github.com/dimitris-karapliafis/RdRpCATCH
|
|
@@ -38,7 +38,7 @@ that were positive for each sequence across all pHMM databases, and taxonomic in
|
|
|
38
38
|
|
|
39
39
|

|
|
40
40
|
|
|
41
|
-
Supported databases
|
|
41
|
+
### Supported databases
|
|
42
42
|
- NeoRdRp <sup>1</sup> : 1182 pHMMs
|
|
43
43
|
- NeoRdRp2 <sup>2</sup>: 19394 pHMMs
|
|
44
44
|
- RVMT <sup>3</sup>: 710 pHMMs
|
|
@@ -83,7 +83,7 @@ Create a new conda environment and install the dependencies:
|
|
|
83
83
|
```bash
|
|
84
84
|
conda create -n rdrpcatch python=3.12
|
|
85
85
|
conda activate rdrpcatch
|
|
86
|
-
conda install -c bioconda mmseqs2==17.
|
|
86
|
+
conda install -c bioconda mmseqs2==17.b804f seqkit==2.10.0
|
|
87
87
|
```
|
|
88
88
|
Install the tool from pip:
|
|
89
89
|
```bash
|
|
@@ -98,7 +98,8 @@ rdrpcatch download --destination_dir path/to/store/databases
|
|
|
98
98
|
```
|
|
99
99
|
|
|
100
100
|
* Note 1: The databases are large files and may take some time to download (~ 3 GB).
|
|
101
|
-
* Note 2: The databases are stored in the specified directory, and the path is required to run RdRpCATCH.
|
|
101
|
+
* Note 2: The databases are stored in the specified directory, and the path is required to run RdRpCATCH.
|
|
102
|
+
* Note 3: If you encounter an SSL error while downloading, please try again. The error seems to appear sporadically during testing, and a simple re-initiation of the downloading process seems to fix it.
|
|
102
103
|
|
|
103
104
|
## Usage
|
|
104
105
|
RdRpCATCH can be used as a CLI tool as follows:
|
|
@@ -163,7 +164,7 @@ rdrpcatch scan will create a folder with the following structure:
|
|
|
163
164
|
| `{prefix}_rdrpcatch_fasta` | A directory containing the sequences that were identified as RdRp sequences. |
|
|
164
165
|
| `{prefix}_rdrpcatch_plots` | A directory containing the plots generated during the analysis. |
|
|
165
166
|
| `{prefix}_gff_files` | A directory containing the GFF files generated during the analysis. (For now only based on protein sequences) |
|
|
166
|
-
| `tmp` | A directory containing temporary files generated during the analysis. (Only available if the -keep_tmp flag is used |
|
|
167
|
+
| `tmp` | A directory containing temporary files generated during the analysis. (Only available if the -keep_tmp flag is used )|
|
|
167
168
|
|
|
168
169
|
#### Output table fields
|
|
169
170
|
A summary of the results is stored in the `{prefix}_rdrpcatch_output_annotated.tsv` file, which contains the following fields:
|
|
@@ -194,10 +195,10 @@ A summary of the results is stored in the `{prefix}_rdrpcatch_output_annotated.t
|
|
|
194
195
|
## Citations
|
|
195
196
|
Manuscript still in preparation. If you use RdRpCATCH, please cite this GitHub repository
|
|
196
197
|
A precompiled version of the used databases is available at Zenodo DOI: [10.5281/zenodo.14358348](https://doi.org/10.5281/zenodo.14358348).
|
|
197
|
-
If you use RdRpCATCH, please cite the
|
|
198
|
+
If you use RdRpCATCH, please cite the [underlying third party databases](#supported-databases) :
|
|
198
199
|
|
|
199
200
|
## Acknowledgements
|
|
200
|
-
RdRpCATCH is a collaborative effort and we would like to thank all the authors and developers of the
|
|
201
|
+
RdRpCATCH is a collaborative effort and we would like to thank all the authors and developers of the underlying databases.
|
|
201
202
|
|
|
202
203
|
## Contact
|
|
203
204
|
Dimitris Karapliafis (dimitris.karapliafis@wur.nl), potentially via slack/teams or an issue in the main repo.
|
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
rdrpcatch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
rdrpcatch/rdrpcatch_wrapper.py,sha256=
|
|
2
|
+
rdrpcatch/rdrpcatch_wrapper.py,sha256=HrC2IqxLYFTroJwH1gDcU1QQU0gzs4YfBDzIN64Zc0A,30517
|
|
3
3
|
rdrpcatch/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
rdrpcatch/cli/args.py,sha256=2E2gXY42hNasUP94HmPxpgVCA1glk_oN7D5ftbu6W2c,15805
|
|
5
5
|
rdrpcatch/rdrpcatch_scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
rdrpcatch/rdrpcatch_scripts/fetch_dbs.py,sha256=e9ShColfLgBvWSZpGOvY3zKhEgIg3rw1IIV__KX7N-g,11054
|
|
7
|
-
rdrpcatch/rdrpcatch_scripts/format_pyhmmer_out.py,sha256=
|
|
7
|
+
rdrpcatch/rdrpcatch_scripts/format_pyhmmer_out.py,sha256=uah2XPrGNkkeptCv_WWBz_qTn5AtDTfVm6XjbCmNO00,25033
|
|
8
8
|
rdrpcatch/rdrpcatch_scripts/gui.py,sha256=he8kx_4VJWB7SVv9XSQPk0DmkOjEFIg-uGMAtDp3t-w,10576
|
|
9
9
|
rdrpcatch/rdrpcatch_scripts/mmseqs_tax.py,sha256=bwzuCxu8nHQ5OC0Yr5Lyvhcyk9OWjuamInqe0T0lc38,3809
|
|
10
|
-
rdrpcatch/rdrpcatch_scripts/paths.py,sha256=
|
|
10
|
+
rdrpcatch/rdrpcatch_scripts/paths.py,sha256=roTZ2QPF4Fii7jtHkS9I6INJg1Vu78Dc_ieQGKjOCP4,4710
|
|
11
11
|
rdrpcatch/rdrpcatch_scripts/plot.py,sha256=Y1mZL7rkKHFKEs2D7T2Qj2kpfiORmFwRLq1LYWqwcJI,5938
|
|
12
12
|
rdrpcatch/rdrpcatch_scripts/run_pyhmmer.py,sha256=9zcMzaIwQ4_-NgYzG9kejxOBaDi-gbzaqpvZti8ZXA4,9008
|
|
13
13
|
rdrpcatch/rdrpcatch_scripts/run_seqkit.py,sha256=5y7DtJ6NLa4sRoBQOcjBfczKlqG_LibNrEqNmKLrHu0,4361
|
|
14
14
|
rdrpcatch/rdrpcatch_scripts/utils.py,sha256=Wx1GXhAPBfJw7x67sOu7WclZzMo0N3O-hxNYTVxc3v4,16780
|
|
15
|
-
rdrpcatch-0.0.
|
|
16
|
-
rdrpcatch-0.0.
|
|
17
|
-
rdrpcatch-0.0.
|
|
18
|
-
rdrpcatch-0.0.
|
|
19
|
-
rdrpcatch-0.0.
|
|
15
|
+
rdrpcatch-0.0.2.dist-info/METADATA,sha256=JtS1E35XYcWSNu98shFxcpirpz021Ja4pxjjx0Izyz0,14000
|
|
16
|
+
rdrpcatch-0.0.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
17
|
+
rdrpcatch-0.0.2.dist-info/entry_points.txt,sha256=uiyoPO41jNz_KVOt2JdPak9NbVei-D8WQ6saMeMBFpE,53
|
|
18
|
+
rdrpcatch-0.0.2.dist-info/licenses/LICENSE,sha256=3jm5vKRMIaiETEFfNN34-oyWUShxZtmDmL38PNAwlUI,1120
|
|
19
|
+
rdrpcatch-0.0.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|