rdrpcatch 0.0.5__tar.gz → 0.0.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/.idea/workspace.xml +22 -17
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/PKG-INFO +1 -2
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/README.md +0 -1
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/pyproject.toml +1 -1
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/rdrpcatch/cli/args.py +33 -26
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/rdrpcatch/rdrpcatch_wrapper.py +54 -44
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/.gitignore +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/.idea/.gitignore +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/.idea/.name +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/.idea/ColaB-Scan.iml +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/.idea/inspectionProfiles/profiles_settings.xml +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/.idea/misc.xml +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/.idea/modules.xml +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/.idea/vcs.xml +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/LICENSE +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/dependencies/rdrpcatch_test_env.yaml +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/dependencies/requirements.txt +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/images/rdrpcatch_flowchart_v0.png +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/meta.yaml +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/rdrpcatch/__init__.py +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/rdrpcatch/cli/__init__.py +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/rdrpcatch/rdrpcatch_scripts/__init__.py +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/rdrpcatch/rdrpcatch_scripts/fetch_dbs.py +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/rdrpcatch/rdrpcatch_scripts/format_pyhmmer_out.py +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/rdrpcatch/rdrpcatch_scripts/gui.py +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/rdrpcatch/rdrpcatch_scripts/mmseqs_tax.py +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/rdrpcatch/rdrpcatch_scripts/paths.py +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/rdrpcatch/rdrpcatch_scripts/plot.py +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/rdrpcatch/rdrpcatch_scripts/run_pyhmmer.py +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/rdrpcatch/rdrpcatch_scripts/run_seqkit.py +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/rdrpcatch/rdrpcatch_scripts/utils.py +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/meta_4test.yaml +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch-1.0.0-py312_2.tar.bz2 +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_env.yaml +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/test_translate_gff_files/test_translate_full_aminoacid_rdrpcatch.gff3 +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_fasta/test_translate_full_aminoacid_contigs.fasta +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_fasta/test_translate_trimmed_aminoacid_contigs.fasta +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_output_annotated.tsv +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_ID_score_plot.html +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_contig_coverage_plot.html +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_evalue_plot.html +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_norm_bitscore_contig_plot.html +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_norm_bitscore_plot_profile.html +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_profile_coverage_plot.html +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_score_plot.html +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_upset_plot.png +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/best_hit_hmm_output/test_translate_Lucaprot_hmm_output_best_hit.txt +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/best_hit_hmm_output/test_translate_NeoRdRp.2.1_hmm_output_best_hit.txt +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/best_hit_hmm_output/test_translate_NeoRdRp_hmm_output_best_hit.txt +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/best_hit_hmm_output/test_translate_RDRP-scan_hmm_output_best_hit.txt +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/best_hit_hmm_output/test_translate_RVMT_hmm_output_best_hit.txt +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/best_hit_hmm_output/test_translate_TSA_Olendraite_fam_hmm_output_best_hit.txt +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/best_hit_hmm_output/test_translate_TSA_Olendraite_gen_hmm_output_best_hit.txt +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/formatted_hmm_output/test_translate_Lucaprot_hmm_output_formatted.txt +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/formatted_hmm_output/test_translate_NeoRdRp.2.1_hmm_output_formatted.txt +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/formatted_hmm_output/test_translate_NeoRdRp_hmm_output_formatted.txt +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/formatted_hmm_output/test_translate_RDRP-scan_hmm_output_formatted.txt +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/formatted_hmm_output/test_translate_RVMT_hmm_output_formatted.txt +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/formatted_hmm_output/test_translate_TSA_Olendraite_fam_hmm_output_formatted.txt +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/formatted_hmm_output/test_translate_TSA_Olendraite_gen_hmm_output_formatted.txt +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_Lucaprot_hmmsearch_output.custom.tsv +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_Lucaprot_hmmsearch_output.txt +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_NeoRdRp.2.1_hmmsearch_output.custom.tsv +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_NeoRdRp.2.1_hmmsearch_output.txt +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_NeoRdRp_hmmsearch_output.custom.tsv +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_NeoRdRp_hmmsearch_output.txt +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_RDRP-scan_hmmsearch_output.custom.tsv +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_RDRP-scan_hmmsearch_output.txt +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_RVMT_hmmsearch_output.custom.tsv +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_RVMT_hmmsearch_output.txt +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_TSA_Olendraite_fam_hmmsearch_output.custom.tsv +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_TSA_Olendraite_fam_hmmsearch_output.txt +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_TSA_Olendraite_gen_hmmsearch_output.custom.tsv +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_TSA_Olendraite_gen_hmmsearch_output.txt +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/mmseqs_e_search_output/test_translate_mmseqs_e_search.tsv +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/mmseqs_tax_output/test_translate_mmseqs_tax_lca.tsv +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/mmseqs_tax_output/test_translate_mmseqs_tax_report +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/mmseqs_tax_output/test_translate_mmseqs_tax_tophit_aln +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/mmseqs_tax_output/test_translate_mmseqs_tax_tophit_report +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/mmseqs_tax_output/tmp/16608414482057878997/easy-taxonomy.sh +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/test_translate_logs/test_translate_mmseqs_e_search.log +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/test_translate_logs/test_translate_mmseqs_tax.log +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/test_translate_logs/test_translate_rdrpcatch.log +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/tsv_files/test_translate_combined.tsv +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/tsv_files/test_translate_rdrpcatch_output.tsv +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/rdrpcatch_test_translate/tmp/tsv_files/test_translate_upset_data.tsv +0 -0
- {rdrpcatch-0.0.5 → rdrpcatch-0.0.6}/testing/test_translate.fasta +0 -0
|
@@ -4,7 +4,11 @@
|
|
|
4
4
|
<option name="autoReloadType" value="SELECTIVE" />
|
|
5
5
|
</component>
|
|
6
6
|
<component name="ChangeListManager">
|
|
7
|
-
<list default="true" id="d849e6fa-87f9-4e92-9c33-abef7cc975d3" name="Changes" comment="Updates: Fixed bug that crushed the script when at least one pHMM DB does not have a match against the sequence database also for nuc branch"
|
|
7
|
+
<list default="true" id="d849e6fa-87f9-4e92-9c33-abef7cc975d3" name="Changes" comment="Updates: Fixed bug that crushed the script when at least one pHMM DB does not have a match against the sequence database also for nuc branch">
|
|
8
|
+
<change beforePath="$PROJECT_DIR$/pyproject.toml" beforeDir="false" afterPath="$PROJECT_DIR$/pyproject.toml" afterDir="false" />
|
|
9
|
+
<change beforePath="$PROJECT_DIR$/rdrpcatch/cli/args.py" beforeDir="false" afterPath="$PROJECT_DIR$/rdrpcatch/cli/args.py" afterDir="false" />
|
|
10
|
+
<change beforePath="$PROJECT_DIR$/rdrpcatch/rdrpcatch_wrapper.py" beforeDir="false" afterPath="$PROJECT_DIR$/rdrpcatch/rdrpcatch_wrapper.py" afterDir="false" />
|
|
11
|
+
</list>
|
|
8
12
|
<option name="SHOW_DIALOG" value="false" />
|
|
9
13
|
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
|
10
14
|
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
|
|
@@ -36,22 +40,22 @@
|
|
|
36
40
|
<option name="hideEmptyMiddlePackages" value="true" />
|
|
37
41
|
<option name="showLibraryContents" value="true" />
|
|
38
42
|
</component>
|
|
39
|
-
<component name="PropertiesComponent"
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
43
|
+
<component name="PropertiesComponent">{
|
|
44
|
+
"keyToString": {
|
|
45
|
+
"ASKED_ADD_EXTERNAL_FILES": "true",
|
|
46
|
+
"RunOnceActivity.OpenProjectViewOnStart": "true",
|
|
47
|
+
"RunOnceActivity.ShowReadmeOnStart": "true",
|
|
48
|
+
"ignore.virus.scanning.warn.message": "true",
|
|
49
|
+
"last_opened_file_path": "C:/Users/karso/PycharmProjects/rdrpcatch_benchmarks",
|
|
50
|
+
"node.js.detected.package.eslint": "true",
|
|
51
|
+
"node.js.detected.package.tslint": "true",
|
|
52
|
+
"node.js.selected.package.eslint": "(autodetect)",
|
|
53
|
+
"node.js.selected.package.tslint": "(autodetect)",
|
|
54
|
+
"nodejs_package_manager_path": "npm",
|
|
55
|
+
"settings.editor.selected.configurable": "preferences.pluginManager",
|
|
56
|
+
"vue.rearranger.settings.migration": "true"
|
|
53
57
|
}
|
|
54
|
-
}
|
|
58
|
+
}</component>
|
|
55
59
|
<component name="RecentsManager">
|
|
56
60
|
<key name="CopyFile.RECENT_KEYS">
|
|
57
61
|
<recent name="C:\Users\karso\PycharmProjects\ColaB-Scan\testing" />
|
|
@@ -116,7 +120,8 @@
|
|
|
116
120
|
<workItem from="1743683517488" duration="7013000" />
|
|
117
121
|
<workItem from="1743714892367" duration="21775000" />
|
|
118
122
|
<workItem from="1744200654491" duration="635000" />
|
|
119
|
-
<workItem from="1744241097621" duration="
|
|
123
|
+
<workItem from="1744241097621" duration="28847000" />
|
|
124
|
+
<workItem from="1745576502650" duration="11360000" />
|
|
120
125
|
</task>
|
|
121
126
|
<task id="LOCAL-00001" summary="First commit: Script for benchmark">
|
|
122
127
|
<option name="closed" value="true" />
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rdrpcatch
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.6
|
|
4
4
|
Dynamic: Summary
|
|
5
5
|
Project-URL: Home, https://github.com/dimitris-karapliafis/RdRpCATCH
|
|
6
6
|
Project-URL: Source, https://github.com/dimitris-karapliafis/RdRpCATCH
|
|
@@ -205,7 +205,6 @@ Dimitris Karapliafis (dimitris.karapliafis@wur.nl), potentially via slack/teams
|
|
|
205
205
|
|
|
206
206
|
##TODO:
|
|
207
207
|
- [ ] loud logging is linking to the utils.py file, not the actual line of code causing the error.
|
|
208
|
-
- [ ] Add `overwrite` flag
|
|
209
208
|
- [ ] drop `db_dir` argument and use global/environment/config variable that is set after running the `download` command
|
|
210
209
|
|
|
211
210
|
|
|
@@ -183,7 +183,6 @@ Dimitris Karapliafis (dimitris.karapliafis@wur.nl), potentially via slack/teams
|
|
|
183
183
|
|
|
184
184
|
##TODO:
|
|
185
185
|
- [ ] loud logging is linking to the utils.py file, not the actual line of code causing the error.
|
|
186
|
-
- [ ] Add `overwrite` flag
|
|
187
186
|
- [ ] drop `db_dir` argument and use global/environment/config variable that is set after running the `download` command
|
|
188
187
|
|
|
189
188
|
|
|
@@ -115,30 +115,30 @@ def cli():
|
|
|
115
115
|
@click.option('-gen_code', '--gen_code',
|
|
116
116
|
type=click.INT,
|
|
117
117
|
default=1,
|
|
118
|
-
help='Genetic code to use for translation. (default: 1) Possible genetic codes (supported by seqkit translate) :
|
|
119
|
-
'2: The Vertebrate Mitochondrial Code
|
|
120
|
-
'3: The Yeast Mitochondrial Code
|
|
121
|
-
'4: The Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code
|
|
122
|
-
'5: The Invertebrate Mitochondrial Code
|
|
123
|
-
'6: The Ciliate, Dasycladacean and Hexamita Nuclear Code
|
|
124
|
-
'9: The Echinoderm and Flatworm Mitochondrial Code
|
|
125
|
-
'10: The Euplotid Nuclear Code
|
|
126
|
-
'11: The Bacterial, Archaeal and Plant Plastid Code
|
|
127
|
-
'12: The Alternative Yeast Nuclear Code
|
|
128
|
-
'13: The Ascidian Mitochondrial Code
|
|
129
|
-
'14: The Alternative Flatworm Mitochondrial Code
|
|
130
|
-
'16: Chlorophycean Mitochondrial Code
|
|
131
|
-
'21: Trematode Mitochondrial Code
|
|
132
|
-
'22: Scenedesmus obliquus Mitochondrial Code
|
|
133
|
-
'23: Thraustochytrium Mitochondrial Code
|
|
134
|
-
'24: Pterobranchia Mitochondrial Code
|
|
135
|
-
'25: Candidate Division SR1 and Gracilibacteria Code
|
|
136
|
-
'26: Pachysolen tannophilus Nuclear Code
|
|
137
|
-
'27: Karyorelict Nuclear
|
|
138
|
-
'28: Condylostoma Nuclear
|
|
139
|
-
'29: Mesodinium Nuclear
|
|
140
|
-
'30: Peritrich Nuclear
|
|
141
|
-
'31: Blastocrithidia Nuclear
|
|
118
|
+
help='Genetic code to use for translation. (default: 1) Possible genetic codes (supported by seqkit translate) : 1: The Standard Code, '
|
|
119
|
+
'2: The Vertebrate Mitochondrial Code, '
|
|
120
|
+
'3: The Yeast Mitochondrial Code, '
|
|
121
|
+
'4: The Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code, '
|
|
122
|
+
'5: The Invertebrate Mitochondrial Code, '
|
|
123
|
+
'6: The Ciliate, Dasycladacean and Hexamita Nuclear Code, '
|
|
124
|
+
'9: The Echinoderm and Flatworm Mitochondrial Code, '
|
|
125
|
+
'10: The Euplotid Nuclear Code, '
|
|
126
|
+
'11: The Bacterial, Archaeal and Plant Plastid Code, '
|
|
127
|
+
'12: The Alternative Yeast Nuclear Code, '
|
|
128
|
+
'13: The Ascidian Mitochondrial Code, '
|
|
129
|
+
'14: The Alternative Flatworm Mitochondrial Code, '
|
|
130
|
+
'16: Chlorophycean Mitochondrial Code, '
|
|
131
|
+
'21: Trematode Mitochondrial Code, '
|
|
132
|
+
'22: Scenedesmus obliquus Mitochondrial Code, '
|
|
133
|
+
'23: Thraustochytrium Mitochondrial Code, '
|
|
134
|
+
'24: Pterobranchia Mitochondrial Code, '
|
|
135
|
+
'25: Candidate Division SR1 and Gracilibacteria Code, '
|
|
136
|
+
'26: Pachysolen tannophilus Nuclear Code, '
|
|
137
|
+
'27: Karyorelict Nuclear, '
|
|
138
|
+
'28: Condylostoma Nuclear, '
|
|
139
|
+
'29: Mesodinium Nuclear, '
|
|
140
|
+
'30: Peritrich Nuclear, '
|
|
141
|
+
'31: Blastocrithidia Nuclear, ')
|
|
142
142
|
@click.option('-bundle', '--bundle',
|
|
143
143
|
is_flag=True,
|
|
144
144
|
default=False,
|
|
@@ -147,9 +147,14 @@ def cli():
|
|
|
147
147
|
is_flag=True,
|
|
148
148
|
default=False,
|
|
149
149
|
help="Keep temporary files (Expert users) (default: False)")
|
|
150
|
+
@click.option('-overwrite', '--overwrite',
|
|
151
|
+
is_flag=True,
|
|
152
|
+
default=False,
|
|
153
|
+
help="Force overwrite of existing output directory. (default: False)")
|
|
154
|
+
|
|
150
155
|
@click.pass_context
|
|
151
156
|
def scan(ctx, input, output, db_options, db_dir, custom_dbs, seq_type, verbose, evalue,
|
|
152
|
-
incevalue, domevalue, incdomevalue, zvalue, cpus, length_thr, gen_code, bundle, keep_tmp):
|
|
157
|
+
incevalue, domevalue, incdomevalue, zvalue, cpus, length_thr, gen_code, bundle, keep_tmp, overwrite):
|
|
153
158
|
"""Scan sequences for RdRps."""
|
|
154
159
|
|
|
155
160
|
# Create a rich table for displaying parameters
|
|
@@ -175,6 +180,7 @@ def scan(ctx, input, output, db_options, db_dir, custom_dbs, seq_type, verbose,
|
|
|
175
180
|
table.add_row("Genetic Code", str(gen_code))
|
|
176
181
|
table.add_row("Bundle Output", "ON" if bundle else "OFF")
|
|
177
182
|
table.add_row("Save Temporary Files", "ON" if keep_tmp else "OFF")
|
|
183
|
+
table.add_row("Force Overwrite", "ON" if overwrite else "OFF")
|
|
178
184
|
|
|
179
185
|
console.print(Panel(table, title="Scan Configuration"))
|
|
180
186
|
|
|
@@ -207,7 +213,8 @@ def scan(ctx, input, output, db_options, db_dir, custom_dbs, seq_type, verbose,
|
|
|
207
213
|
length_thr=length_thr,
|
|
208
214
|
gen_code=gen_code,
|
|
209
215
|
bundle=bundle,
|
|
210
|
-
keep_tmp=keep_tmp
|
|
216
|
+
keep_tmp=keep_tmp,
|
|
217
|
+
overwrite=overwrite
|
|
211
218
|
)
|
|
212
219
|
|
|
213
220
|
# @cli.command("download", help="Download RdRpCATCH databases.")
|
|
@@ -53,7 +53,7 @@ def bundle_results(output_dir, prefix):
|
|
|
53
53
|
|
|
54
54
|
return archive_path
|
|
55
55
|
|
|
56
|
-
def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,incdomE,domE,incE,z, cpus, length_thr, gen_code, bundle, keep_tmp):
|
|
56
|
+
def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,incdomE,domE,incE,z, cpus, length_thr, gen_code, bundle, keep_tmp, overwrite):
|
|
57
57
|
"""
|
|
58
58
|
Run RdRpCATCH scan.
|
|
59
59
|
|
|
@@ -110,8 +110,16 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
110
110
|
log_file = outputs.log_file
|
|
111
111
|
if not os.path.exists(outputs.output_dir):
|
|
112
112
|
os.makedirs(outputs.output_dir)
|
|
113
|
+
elif os.path.exists(outputs.output_dir) and overwrite:
|
|
114
|
+
# If the output directory already exists and force_overwrite is True, remove the existing directory
|
|
115
|
+
import shutil
|
|
116
|
+
shutil.rmtree(outputs.output_dir)
|
|
117
|
+
os.makedirs(outputs.output_dir)
|
|
118
|
+
outputs = paths.rdrpcatch_output(prefix, Path(output_dir))
|
|
113
119
|
else:
|
|
114
|
-
raise FileExistsError(f"Output directory already exists: {outputs.output_dir}, Please choose a different directory
|
|
120
|
+
raise FileExistsError(f"Output directory already exists: {outputs.output_dir}, Please choose a different directory"
|
|
121
|
+
f" or activate the -overwrite flag to overwrite the contents of the directory.")
|
|
122
|
+
|
|
115
123
|
if not os.path.exists(outputs.log_dir):
|
|
116
124
|
os.makedirs(outputs.log_dir)
|
|
117
125
|
|
|
@@ -160,6 +168,8 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
160
168
|
if seq_type == 'prot':
|
|
161
169
|
utils.fasta_checker(input_file, logger).check_seq_length(100000)
|
|
162
170
|
|
|
171
|
+
logger.loud_log("Fetching HMM databases...")
|
|
172
|
+
|
|
163
173
|
## Fetch HMM databases- RVMT, NeoRdRp, NeoRdRp.2.1, TSA_Olendraite, RDRP-scan, Lucaprot
|
|
164
174
|
rvmt_hmm_db = fetch_dbs.db_fetcher(db_dir).fetch_hmm_db_path("RVMT")
|
|
165
175
|
if verbose:
|
|
@@ -234,10 +244,9 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
234
244
|
|
|
235
245
|
# Fetch mmseqs database
|
|
236
246
|
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
logger.silent_log("Fetching mmseqs databases.")
|
|
247
|
+
|
|
248
|
+
logger.loud_log("Fetching Mmseqs2 databases...")
|
|
249
|
+
|
|
241
250
|
mmseqs_db_path = fetch_dbs.db_fetcher(db_dir).fetch_mmseqs_db_path("mmseqs_refseq_riboviria_20250211")
|
|
242
251
|
|
|
243
252
|
if verbose:
|
|
@@ -260,21 +269,17 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
260
269
|
if not os.path.exists(outputs.tmp_dir):
|
|
261
270
|
outputs.tmp_dir.mkdir(parents=True)
|
|
262
271
|
|
|
272
|
+
logger.loud_log("Databases fetched successfully.")
|
|
273
|
+
|
|
263
274
|
if seq_type == 'nuc':
|
|
264
|
-
|
|
265
|
-
logger.loud_log("Nucleotide sequence detected.")
|
|
266
|
-
else:
|
|
267
|
-
logger.silent_log("Nucleotide sequence detected.")
|
|
275
|
+
logger.loud_log("Nucleotide sequence detected.")
|
|
268
276
|
|
|
269
277
|
set_dict = {}
|
|
270
278
|
translated_set_dict = {}
|
|
271
279
|
df_list = []
|
|
272
280
|
|
|
273
281
|
## Filter out sequences with length less than 400 bp with seqkit
|
|
274
|
-
|
|
275
|
-
logger.loud_log("Filtering out sequences with length less than 400 bp.")
|
|
276
|
-
else:
|
|
277
|
-
logger.silent_log("Filtering out sequences with length less than 400 bp.")
|
|
282
|
+
logger.loud_log("Filtering out sequences with length less than 400 bp.")
|
|
278
283
|
|
|
279
284
|
if not os.path.exists(outputs.seqkit_seq_output_dir):
|
|
280
285
|
outputs.seqkit_seq_output_dir.mkdir(parents=True)
|
|
@@ -286,10 +291,7 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
286
291
|
logger.silent_log(f"Filtered sequence written to: { outputs.seqkit_seq_output_path}")
|
|
287
292
|
|
|
288
293
|
## Translate nucleotide sequences to protein sequences with seqkit
|
|
289
|
-
|
|
290
|
-
logger.loud_log("Translating nucleotide sequences to protein sequences.")
|
|
291
|
-
else:
|
|
292
|
-
logger.silent_log("Translating nucleotide sequences to protein sequences.")
|
|
294
|
+
logger.loud_log("Translating nucleotide sequences to protein sequences.")
|
|
293
295
|
|
|
294
296
|
if not os.path.exists(outputs.seqkit_translate_output_dir):
|
|
295
297
|
outputs.seqkit_translate_output_dir.mkdir(parents=True)
|
|
@@ -302,6 +304,7 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
302
304
|
logger.silent_log(f"Translated sequence written to: {outputs.seqkit_translate_output_path}")
|
|
303
305
|
|
|
304
306
|
for db_name,db_path in zip(db_name_list, db_path_list):
|
|
307
|
+
logger.loud_log(f"Running HMMsearch for {db_name} database.")
|
|
305
308
|
|
|
306
309
|
if verbose:
|
|
307
310
|
logger.loud_log(f"HMM output path: {outputs.hmm_output_path(db_name)}")
|
|
@@ -353,7 +356,9 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
353
356
|
])
|
|
354
357
|
df_list.append(df)
|
|
355
358
|
|
|
359
|
+
logger.loud_log(f"HMMsearch for {db_name} completed.")
|
|
356
360
|
|
|
361
|
+
logger.loud_log("HMMsearch completed.")
|
|
357
362
|
|
|
358
363
|
if not os.path.exists(outputs.plot_outdir):
|
|
359
364
|
outputs.plot_outdir.mkdir(parents=True)
|
|
@@ -361,6 +366,7 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
361
366
|
if not os.path.exists(outputs.tsv_outdir):
|
|
362
367
|
outputs.tsv_outdir.mkdir(parents=True)
|
|
363
368
|
|
|
369
|
+
logger.loud_log("Consolidating results.")
|
|
364
370
|
|
|
365
371
|
# Combine all the dataframes in the list
|
|
366
372
|
combined_df = pl.concat(df_list, how='vertical_relaxed')
|
|
@@ -379,6 +385,8 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
379
385
|
logger.loud_log("No hits found by RdRpCATCH. Exiting.")
|
|
380
386
|
return None
|
|
381
387
|
|
|
388
|
+
# Generate upset plot
|
|
389
|
+
logger.loud_log("Generating plots.")
|
|
382
390
|
|
|
383
391
|
if len(db_name_list) > 1:
|
|
384
392
|
if verbose:
|
|
@@ -411,6 +419,8 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
411
419
|
combined_set = set.union(*[value for value in set_dict.values()])
|
|
412
420
|
translated_combined_set = set.union(*[value for value in translated_set_dict.values()])
|
|
413
421
|
|
|
422
|
+
logger.loud_log("Extracting RdRp contigs from the input file.")
|
|
423
|
+
|
|
414
424
|
# Write a fasta file with all the contigs
|
|
415
425
|
if not os.path.exists(outputs.fasta_output_dir):
|
|
416
426
|
outputs.fasta_output_dir.mkdir(parents=True)
|
|
@@ -430,25 +440,21 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
430
440
|
if verbose:
|
|
431
441
|
logger.loud_log(f"Contigs written to: {outputs.fasta_nuc_out_path}")
|
|
432
442
|
logger.loud_log(f"Translated contigs written to: {outputs.fasta_prot_out_path}")
|
|
443
|
+
logger.loud_log(f"Trimmed contigs written to: {outputs.fasta_trimmed_out_path}")
|
|
433
444
|
else:
|
|
434
445
|
logger.silent_log(f"Contigs written to: {outputs.fasta_nuc_out_path}")
|
|
435
446
|
logger.silent_log(f"Translated contigs written to: {outputs.fasta_prot_out_path}")
|
|
447
|
+
logger.silent_log(f"Trimmed contigs written to: {outputs.fasta_trimmed_out_path}")
|
|
436
448
|
|
|
437
449
|
if not os.path.exists(outputs.mmseqs_tax_output_dir):
|
|
438
450
|
outputs.mmseqs_tax_output_dir.mkdir(parents=True)
|
|
439
451
|
|
|
440
|
-
|
|
441
|
-
logger.loud_log("Running mmseqs easy-taxonomy for taxonomic annotation.")
|
|
442
|
-
else:
|
|
443
|
-
logger.silent_log("Running mmseqs easy-taxonomy for taxonomic annotation.")
|
|
452
|
+
logger.loud_log("Running mmseqs easy-taxonomy for taxonomic annotation.")
|
|
444
453
|
|
|
445
454
|
mmseqs_tax.mmseqs(outputs.fasta_prot_out_path, mmseqs_db_path, outputs.mmseqs_tax_output_prefix,
|
|
446
455
|
outputs.mmseqs_tax_output_dir, 7, cpus, outputs.mmseqs_tax_log_path).run_mmseqs_easy_tax_lca()
|
|
447
456
|
|
|
448
|
-
|
|
449
|
-
logger.loud_log("Running mmseqs easy-search for taxonomic annotation.")
|
|
450
|
-
else:
|
|
451
|
-
logger.silent_log("Running mmseqs easy-search for taxonomic annotation.")
|
|
457
|
+
logger.loud_log("Running mmseqs easy-search for taxonomic annotation.")
|
|
452
458
|
|
|
453
459
|
if not os.path.exists(outputs.mmseqs_e_search_output_dir):
|
|
454
460
|
outputs.mmseqs_e_search_output_dir.mkdir(parents=True)
|
|
@@ -460,18 +466,17 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
460
466
|
utils.mmseqs_parser(outputs.mmseqs_tax_output_lca_path, outputs.mmseqs_e_search_output_path).tax_to_rdrpcatch(
|
|
461
467
|
outputs.rdrpcatch_output_tsv, outputs.extended_rdrpcatch_output, seq_type)
|
|
462
468
|
|
|
469
|
+
logger.loud_log("Taxonomic annotation completed.")
|
|
463
470
|
|
|
464
471
|
elif seq_type == 'prot':
|
|
465
472
|
|
|
466
|
-
|
|
467
|
-
logger.loud_log("Protein sequence detected.")
|
|
468
|
-
else:
|
|
469
|
-
logger.silent_log("Protein sequence detected.")
|
|
473
|
+
logger.loud_log("Protein sequence detected.")
|
|
470
474
|
|
|
471
475
|
set_dict = {}
|
|
472
476
|
df_list = []
|
|
473
477
|
|
|
474
478
|
for db_name,db_path in zip (db_name_list, db_path_list):
|
|
479
|
+
logger.loud_log(f"Running HMMsearch for {db_name} database.")
|
|
475
480
|
|
|
476
481
|
if verbose:
|
|
477
482
|
logger.loud_log(f"HMM output path: {outputs.hmm_output_path(db_name)}")
|
|
@@ -519,12 +524,17 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
519
524
|
])
|
|
520
525
|
df_list.append(df)
|
|
521
526
|
|
|
527
|
+
logger.loud_log(f"HMMsearch for {db_name} completed.")
|
|
528
|
+
|
|
529
|
+
logger.loud_log("HMMsearch completed.")
|
|
530
|
+
|
|
522
531
|
if not os.path.exists(outputs.plot_outdir):
|
|
523
532
|
outputs.plot_outdir.mkdir(parents=True)
|
|
524
533
|
|
|
525
534
|
if not os.path.exists(outputs.tsv_outdir):
|
|
526
535
|
outputs.tsv_outdir.mkdir(parents=True)
|
|
527
536
|
|
|
537
|
+
logger.loud_log("Consolidating results.")
|
|
528
538
|
|
|
529
539
|
# Combine all the dataframes in the list
|
|
530
540
|
combined_df = pl.concat(df_list, how='vertical_relaxed')
|
|
@@ -542,6 +552,9 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
542
552
|
logger.loud_log("No hits found by RdRpCATCH. Exiting.")
|
|
543
553
|
return None
|
|
544
554
|
|
|
555
|
+
# Generate upset plot
|
|
556
|
+
logger.loud_log("Generating plots.")
|
|
557
|
+
|
|
545
558
|
if len(db_name_list) > 1:
|
|
546
559
|
if verbose:
|
|
547
560
|
logger.loud_log("Generating upset plot.")
|
|
@@ -574,6 +587,9 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
574
587
|
# Extract all the contigs
|
|
575
588
|
combined_set = set.union(*[value for value in set_dict.values()])
|
|
576
589
|
# Write a fasta file with all the contigs
|
|
590
|
+
|
|
591
|
+
logger.loud_log("Extracting RdRp contigs from the input file.")
|
|
592
|
+
|
|
577
593
|
if not os.path.exists(outputs.fasta_output_dir):
|
|
578
594
|
outputs.fasta_output_dir.mkdir(parents=True)
|
|
579
595
|
|
|
@@ -600,11 +616,7 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
600
616
|
if not os.path.exists(outputs.mmseqs_tax_output_dir):
|
|
601
617
|
outputs.mmseqs_tax_output_dir.mkdir(parents=True)
|
|
602
618
|
|
|
603
|
-
|
|
604
|
-
logger.loud_log("Running mmseqs easy-taxonomy for taxonomic annotation.")
|
|
605
|
-
else:
|
|
606
|
-
logger.silent_log("Running mmseqs easy-taxonomy for taxonomic annotation.")
|
|
607
|
-
|
|
619
|
+
logger.loud_log("Running mmseqs easy-taxonomy for taxonomic annotation.")
|
|
608
620
|
|
|
609
621
|
mmseqs_tax.mmseqs(outputs.fasta_prot_out_path, mmseqs_db_path, outputs.mmseqs_tax_output_prefix,
|
|
610
622
|
outputs.mmseqs_tax_output_dir, 7, cpus, outputs.mmseqs_tax_log_path).run_mmseqs_easy_tax_lca()
|
|
@@ -612,10 +624,7 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
612
624
|
if not os.path.exists(outputs.mmseqs_e_search_output_dir):
|
|
613
625
|
outputs.mmseqs_e_search_output_dir.mkdir(parents=True)
|
|
614
626
|
|
|
615
|
-
|
|
616
|
-
logger.loud_log("Running mmseqs easy-search for taxonomic annotation.")
|
|
617
|
-
else:
|
|
618
|
-
logger.silent_log("Running mmseqs easy-search for taxonomic annotation.")
|
|
627
|
+
logger.loud_log("Running mmseqs easy-search for taxonomic annotation.")
|
|
619
628
|
|
|
620
629
|
mmseqs_tax.mmseqs(outputs.fasta_prot_out_path, mmseqs_db_path, outputs.mmseqs_e_search_output_dir,
|
|
621
630
|
outputs.mmseqs_e_search_output_path, 7, cpus, outputs.mmseqs_e_search_log_path).run_mmseqs_e_search()
|
|
@@ -624,11 +633,7 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
624
633
|
outputs.rdrpcatch_output_tsv, outputs.extended_rdrpcatch_output, seq_type)
|
|
625
634
|
|
|
626
635
|
|
|
627
|
-
|
|
628
|
-
if verbose:
|
|
629
|
-
logger.loud_log(f"Total Runtime: {end_time}")
|
|
630
|
-
else:
|
|
631
|
-
logger.silent_log(f"Total Runtime: {end_time}")
|
|
636
|
+
|
|
632
637
|
|
|
633
638
|
|
|
634
639
|
|
|
@@ -657,6 +662,11 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
657
662
|
else:
|
|
658
663
|
logger.silent_log(f"Results bundled into: {archive_path}")
|
|
659
664
|
|
|
665
|
+
end_time = logger.stop_timer(start_time, verbose)
|
|
666
|
+
|
|
667
|
+
logger.loud_log(f"Total Runtime: {end_time}")
|
|
668
|
+
|
|
669
|
+
logger.loud_log("RdRpCATCH completed successfully.")
|
|
660
670
|
|
|
661
671
|
|
|
662
672
|
return outputs.extended_rdrpcatch_output
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|