rdrpcatch 0.0.7__tar.gz → 0.0.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/.idea/workspace.xml +95 -30
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/PKG-INFO +92 -32
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/README.md +91 -31
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/meta.yaml +3 -3
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/pyproject.toml +1 -1
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/rdrpcatch/cli/args.py +38 -49
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/rdrpcatch/rdrpcatch_scripts/fetch_dbs.py +13 -9
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/rdrpcatch/rdrpcatch_wrapper.py +41 -7
- rdrpcatch-0.0.7/.idea/shelf/Uncommitted_changes_before_Update_at_19_05_2025_13_00_[Changes]/shelved.patch +0 -145
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/.gitignore +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/.idea/.gitignore +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/.idea/.name +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/.idea/ColaB-Scan.iml +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/.idea/inspectionProfiles/profiles_settings.xml +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/.idea/misc.xml +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/.idea/modules.xml +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/.idea/vcs.xml +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/LICENSE +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/dependencies/rdrpcatch_test_env.yaml +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/dependencies/requirements.txt +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/images/rdrpcatch_flowchart_v0.png +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/images/rdrpcatch_illustration.png +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/rdrpcatch/__init__.py +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/rdrpcatch/cli/__init__.py +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/rdrpcatch/rdrpcatch_scripts/__init__.py +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/rdrpcatch/rdrpcatch_scripts/format_pyhmmer_out.py +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/rdrpcatch/rdrpcatch_scripts/gui.py +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/rdrpcatch/rdrpcatch_scripts/mmseqs_tax.py +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/rdrpcatch/rdrpcatch_scripts/paths.py +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/rdrpcatch/rdrpcatch_scripts/plot.py +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/rdrpcatch/rdrpcatch_scripts/run_pyhmmer.py +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/rdrpcatch/rdrpcatch_scripts/run_seqkit.py +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/rdrpcatch/rdrpcatch_scripts/utils.py +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/meta_4test.yaml +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch-1.0.0-py312_2.tar.bz2 +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_env.yaml +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/test_translate_gff_files/test_translate_full_aminoacid_rdrpcatch.gff3 +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_fasta/test_translate_full_aminoacid_contigs.fasta +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_fasta/test_translate_trimmed_aminoacid_contigs.fasta +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_output_annotated.tsv +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_ID_score_plot.html +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_contig_coverage_plot.html +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_evalue_plot.html +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_norm_bitscore_contig_plot.html +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_norm_bitscore_plot_profile.html +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_profile_coverage_plot.html +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_score_plot.html +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_upset_plot.png +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/best_hit_hmm_output/test_translate_Lucaprot_hmm_output_best_hit.txt +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/best_hit_hmm_output/test_translate_NeoRdRp.2.1_hmm_output_best_hit.txt +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/best_hit_hmm_output/test_translate_NeoRdRp_hmm_output_best_hit.txt +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/best_hit_hmm_output/test_translate_RDRP-scan_hmm_output_best_hit.txt +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/best_hit_hmm_output/test_translate_RVMT_hmm_output_best_hit.txt +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/best_hit_hmm_output/test_translate_TSA_Olendraite_fam_hmm_output_best_hit.txt +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/best_hit_hmm_output/test_translate_TSA_Olendraite_gen_hmm_output_best_hit.txt +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/formatted_hmm_output/test_translate_Lucaprot_hmm_output_formatted.txt +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/formatted_hmm_output/test_translate_NeoRdRp.2.1_hmm_output_formatted.txt +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/formatted_hmm_output/test_translate_NeoRdRp_hmm_output_formatted.txt +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/formatted_hmm_output/test_translate_RDRP-scan_hmm_output_formatted.txt +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/formatted_hmm_output/test_translate_RVMT_hmm_output_formatted.txt +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/formatted_hmm_output/test_translate_TSA_Olendraite_fam_hmm_output_formatted.txt +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/formatted_hmm_output/test_translate_TSA_Olendraite_gen_hmm_output_formatted.txt +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_Lucaprot_hmmsearch_output.custom.tsv +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_Lucaprot_hmmsearch_output.txt +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_NeoRdRp.2.1_hmmsearch_output.custom.tsv +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_NeoRdRp.2.1_hmmsearch_output.txt +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_NeoRdRp_hmmsearch_output.custom.tsv +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_NeoRdRp_hmmsearch_output.txt +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_RDRP-scan_hmmsearch_output.custom.tsv +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_RDRP-scan_hmmsearch_output.txt +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_RVMT_hmmsearch_output.custom.tsv +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_RVMT_hmmsearch_output.txt +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_TSA_Olendraite_fam_hmmsearch_output.custom.tsv +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_TSA_Olendraite_fam_hmmsearch_output.txt +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_TSA_Olendraite_gen_hmmsearch_output.custom.tsv +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_TSA_Olendraite_gen_hmmsearch_output.txt +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/mmseqs_e_search_output/test_translate_mmseqs_e_search.tsv +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/mmseqs_tax_output/test_translate_mmseqs_tax_lca.tsv +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/mmseqs_tax_output/test_translate_mmseqs_tax_report +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/mmseqs_tax_output/test_translate_mmseqs_tax_tophit_aln +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/mmseqs_tax_output/test_translate_mmseqs_tax_tophit_report +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/mmseqs_tax_output/tmp/16608414482057878997/easy-taxonomy.sh +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/test_translate_logs/test_translate_mmseqs_e_search.log +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/test_translate_logs/test_translate_mmseqs_tax.log +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/test_translate_logs/test_translate_rdrpcatch.log +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/tsv_files/test_translate_combined.tsv +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/tsv_files/test_translate_rdrpcatch_output.tsv +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/rdrpcatch_test_translate/tmp/tsv_files/test_translate_upset_data.tsv +0 -0
- {rdrpcatch-0.0.7 → rdrpcatch-0.0.9}/testing/test_translate.fasta +0 -0
|
@@ -4,11 +4,7 @@
|
|
|
4
4
|
<option name="autoReloadType" value="SELECTIVE" />
|
|
5
5
|
</component>
|
|
6
6
|
<component name="ChangeListManager">
|
|
7
|
-
<list default="true" id="d849e6fa-87f9-4e92-9c33-abef7cc975d3" name="Changes" comment="
|
|
8
|
-
<change beforePath="$PROJECT_DIR$/README.md" beforeDir="false" afterPath="$PROJECT_DIR$/README.md" afterDir="false" />
|
|
9
|
-
<change beforePath="$PROJECT_DIR$/rdrpcatch/cli/args.py" beforeDir="false" afterPath="$PROJECT_DIR$/rdrpcatch/cli/args.py" afterDir="false" />
|
|
10
|
-
<change beforePath="$PROJECT_DIR$/rdrpcatch/rdrpcatch_wrapper.py" beforeDir="false" afterPath="$PROJECT_DIR$/rdrpcatch/rdrpcatch_wrapper.py" afterDir="false" />
|
|
11
|
-
</list>
|
|
7
|
+
<list default="true" id="d849e6fa-87f9-4e92-9c33-abef7cc975d3" name="Changes" comment="Bug fixes" />
|
|
12
8
|
<option name="SHOW_DIALOG" value="false" />
|
|
13
9
|
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
|
14
10
|
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
|
|
@@ -40,22 +36,22 @@
|
|
|
40
36
|
<option name="hideEmptyMiddlePackages" value="true" />
|
|
41
37
|
<option name="showLibraryContents" value="true" />
|
|
42
38
|
</component>
|
|
43
|
-
<component name="PropertiesComponent"
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
39
|
+
<component name="PropertiesComponent">{
|
|
40
|
+
"keyToString": {
|
|
41
|
+
"ASKED_ADD_EXTERNAL_FILES": "true",
|
|
42
|
+
"RunOnceActivity.OpenProjectViewOnStart": "true",
|
|
43
|
+
"RunOnceActivity.ShowReadmeOnStart": "true",
|
|
44
|
+
"ignore.virus.scanning.warn.message": "true",
|
|
45
|
+
"last_opened_file_path": "C:/Users/karso/PycharmProjects/bioconda_recipes",
|
|
46
|
+
"node.js.detected.package.eslint": "true",
|
|
47
|
+
"node.js.detected.package.tslint": "true",
|
|
48
|
+
"node.js.selected.package.eslint": "(autodetect)",
|
|
49
|
+
"node.js.selected.package.tslint": "(autodetect)",
|
|
50
|
+
"nodejs_package_manager_path": "npm",
|
|
51
|
+
"settings.editor.selected.configurable": "preferences.pluginManager",
|
|
52
|
+
"vue.rearranger.settings.migration": "true"
|
|
57
53
|
}
|
|
58
|
-
}
|
|
54
|
+
}</component>
|
|
59
55
|
<component name="RecentsManager">
|
|
60
56
|
<key name="CopyFile.RECENT_KEYS">
|
|
61
57
|
<recent name="C:\Users\karso\PycharmProjects\ColaB-Scan\testing" />
|
|
@@ -124,7 +120,20 @@
|
|
|
124
120
|
<workItem from="1745576502650" duration="11691000" />
|
|
125
121
|
<workItem from="1746005454102" duration="1271000" />
|
|
126
122
|
<workItem from="1746359600096" duration="3517000" />
|
|
127
|
-
<workItem from="1747128382581" duration="
|
|
123
|
+
<workItem from="1747128382581" duration="13158000" />
|
|
124
|
+
<workItem from="1747982429311" duration="667000" />
|
|
125
|
+
<workItem from="1749116182040" duration="4263000" />
|
|
126
|
+
<workItem from="1749714727345" duration="626000" />
|
|
127
|
+
<workItem from="1750247217114" duration="128000" />
|
|
128
|
+
<workItem from="1750854909876" duration="599000" />
|
|
129
|
+
<workItem from="1751007640706" duration="596000" />
|
|
130
|
+
<workItem from="1751535404721" duration="19179000" />
|
|
131
|
+
<workItem from="1751979275998" duration="99000" />
|
|
132
|
+
<workItem from="1752554982335" duration="3354000" />
|
|
133
|
+
<workItem from="1752590986032" duration="642000" />
|
|
134
|
+
<workItem from="1753197756503" duration="3070000" />
|
|
135
|
+
<workItem from="1755379572482" duration="1765000" />
|
|
136
|
+
<workItem from="1770303390426" duration="217000" />
|
|
128
137
|
</task>
|
|
129
138
|
<task id="LOCAL-00001" summary="First commit: Script for benchmark">
|
|
130
139
|
<option name="closed" value="true" />
|
|
@@ -382,7 +391,63 @@
|
|
|
382
391
|
<option name="project" value="LOCAL" />
|
|
383
392
|
<updated>1745863445358</updated>
|
|
384
393
|
</task>
|
|
385
|
-
<
|
|
394
|
+
<task id="LOCAL-00033" summary="Updates: add support for Zayed_HMM database. change name of Lucaprot db to Lucaprot_HMM">
|
|
395
|
+
<option name="closed" value="true" />
|
|
396
|
+
<created>1747652478191</created>
|
|
397
|
+
<option name="number" value="00033" />
|
|
398
|
+
<option name="presentableId" value="LOCAL-00033" />
|
|
399
|
+
<option name="project" value="LOCAL" />
|
|
400
|
+
<updated>1747652478191</updated>
|
|
401
|
+
</task>
|
|
402
|
+
<task id="LOCAL-00034" summary="Updates: Update .toml">
|
|
403
|
+
<option name="closed" value="true" />
|
|
404
|
+
<created>1747652535824</created>
|
|
405
|
+
<option name="number" value="00034" />
|
|
406
|
+
<option name="presentableId" value="LOCAL-00034" />
|
|
407
|
+
<option name="project" value="LOCAL" />
|
|
408
|
+
<updated>1747652535824</updated>
|
|
409
|
+
</task>
|
|
410
|
+
<task id="LOCAL-00035" summary="Updates: Command line argument parsing without underscore characters Module download renamed to databases Implementation of integration of custom databases Fixed bug with reading the directories of Lucaprot_HMM and Zayed_HMM Update .toml">
|
|
411
|
+
<option name="closed" value="true" />
|
|
412
|
+
<created>1751587387896</created>
|
|
413
|
+
<option name="number" value="00035" />
|
|
414
|
+
<option name="presentableId" value="LOCAL-00035" />
|
|
415
|
+
<option name="project" value="LOCAL" />
|
|
416
|
+
<updated>1751587387896</updated>
|
|
417
|
+
</task>
|
|
418
|
+
<task id="LOCAL-00036" summary="Polish README.md">
|
|
419
|
+
<option name="closed" value="true" />
|
|
420
|
+
<created>1751587669980</created>
|
|
421
|
+
<option name="number" value="00036" />
|
|
422
|
+
<option name="presentableId" value="LOCAL-00036" />
|
|
423
|
+
<option name="project" value="LOCAL" />
|
|
424
|
+
<updated>1751587669980</updated>
|
|
425
|
+
</task>
|
|
426
|
+
<task id="LOCAL-00037" summary="Polish meta.yml">
|
|
427
|
+
<option name="closed" value="true" />
|
|
428
|
+
<created>1751623835098</created>
|
|
429
|
+
<option name="number" value="00037" />
|
|
430
|
+
<option name="presentableId" value="LOCAL-00037" />
|
|
431
|
+
<option name="project" value="LOCAL" />
|
|
432
|
+
<updated>1751623835098</updated>
|
|
433
|
+
</task>
|
|
434
|
+
<task id="LOCAL-00038" summary="Update rdrpcatch to version 0.0.8">
|
|
435
|
+
<option name="closed" value="true" />
|
|
436
|
+
<created>1751625641622</created>
|
|
437
|
+
<option name="number" value="00038" />
|
|
438
|
+
<option name="presentableId" value="LOCAL-00038" />
|
|
439
|
+
<option name="project" value="LOCAL" />
|
|
440
|
+
<updated>1751625641622</updated>
|
|
441
|
+
</task>
|
|
442
|
+
<task id="LOCAL-00039" summary="Bug fixes">
|
|
443
|
+
<option name="closed" value="true" />
|
|
444
|
+
<created>1755380495287</created>
|
|
445
|
+
<option name="number" value="00039" />
|
|
446
|
+
<option name="presentableId" value="LOCAL-00039" />
|
|
447
|
+
<option name="project" value="LOCAL" />
|
|
448
|
+
<updated>1755380495287</updated>
|
|
449
|
+
</task>
|
|
450
|
+
<option name="localTasksCounter" value="40" />
|
|
386
451
|
<servers />
|
|
387
452
|
</component>
|
|
388
453
|
<component name="TypeScriptGeneratedFilesManager">
|
|
@@ -400,13 +465,6 @@
|
|
|
400
465
|
</option>
|
|
401
466
|
</component>
|
|
402
467
|
<component name="VcsManagerConfiguration">
|
|
403
|
-
<MESSAGE value="Commit: File name change" />
|
|
404
|
-
<MESSAGE value="Commit: Upload script and results" />
|
|
405
|
-
<MESSAGE value="Upload Jupyter notebooks and their respective documentation" />
|
|
406
|
-
<MESSAGE value="Upload script progress" />
|
|
407
|
-
<MESSAGE value="Script progress: GUI implementation with tkinter Summarised ColabScan output added Improvement of command line arguments General fixes" />
|
|
408
|
-
<MESSAGE value="Script progress: Reimplementation of argument parsing to support 3 modes: Scan, Download and GUI Implementation of runtime calculation Rename seqkit.py to run_seqkit.py" />
|
|
409
|
-
<MESSAGE value="Script progress: Introduce test for Fasta seq length to avoid crushing pyHMMER Add RdRp coordinates to output and Fasta file (only for aminoacid sequences) Correction of scores calculation for coverage Fix scaling of e-value plot (converted to log-scale) Introduce saving intermidiate files to tmp directory" />
|
|
410
468
|
<MESSAGE value="Major refactoring Formatted as a conda package Removal of redundant files and directories Addition of annotation via mmseqs easy-taxonomy and mmseqs easy-search Tidy up output directories Addition of requirements.txt and" />
|
|
411
469
|
<MESSAGE value="Major refactoring Formatted as a conda package Removal of redundant files and directories Addition of annotation via mmseqs easy-taxonomy and mmseqs easy-search Tidy up output directories Addition of requirements.txt and conda .yaml file for dependencies" />
|
|
412
470
|
<MESSAGE value="Updates: Drop GUI Minor fixes" />
|
|
@@ -425,6 +483,13 @@
|
|
|
425
483
|
<MESSAGE value="Updates: Polishing ReadME Fixed bug that crushed the script when at least one pHMM DB does not have a match against the sequence database" />
|
|
426
484
|
<MESSAGE value="Updates: Fixed bug that crushed the script when at least one pHMM DB does not have a match against the sequence database also for nuc branch" />
|
|
427
485
|
<MESSAGE value="Updates: Add -overwrite as a flag Add informative progress statements in cli" />
|
|
428
|
-
<
|
|
486
|
+
<MESSAGE value="Updates: add support for Zayed_HMM database. change name of Lucaprot db to Lucaprot_HMM" />
|
|
487
|
+
<MESSAGE value="Updates: Update .toml" />
|
|
488
|
+
<MESSAGE value="Updates: Command line argument parsing without underscore characters Module download renamed to databases Implementation of integration of custom databases Fixed bug with reading the directories of Lucaprot_HMM and Zayed_HMM Update .toml" />
|
|
489
|
+
<MESSAGE value="Polish README.md" />
|
|
490
|
+
<MESSAGE value="Polish meta.yml" />
|
|
491
|
+
<MESSAGE value="Update rdrpcatch to version 0.0.8" />
|
|
492
|
+
<MESSAGE value="Bug fixes" />
|
|
493
|
+
<option name="LAST_COMMIT_MESSAGE" value="Bug fixes" />
|
|
429
494
|
</component>
|
|
430
495
|
</project>
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rdrpcatch
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.9
|
|
4
4
|
Dynamic: Summary
|
|
5
5
|
Project-URL: Home, https://github.com/dimitris-karapliafis/RdRpCATCH
|
|
6
6
|
Project-URL: Source, https://github.com/dimitris-karapliafis/RdRpCATCH
|
|
@@ -34,8 +34,20 @@ library to perform pHMM searches. In addition, the tool scans each sequence (aa
|
|
|
34
34
|
In addition, RdRpCATCH provides information about the number of profiles
|
|
35
35
|
that were positive for each sequence across all pHMM databases, and taxonomic information based on the MMseqs2 easy-taxonomy and search modules against a custom RefSeq Riboviria database.
|
|
36
36
|
|
|
37
|
+
### Version 0.0.7 -> 0.0.8 Changelog
|
|
38
|
+
- Added support for custom pHMM databases. See the [Setting up custom pHMM databases](#setting-up-custom-phmm-databases) section for more information.
|
|
39
|
+
- All specified flags use '-' instead of '_' (e.g. `--db-dir` instead of `--db_dir`).
|
|
40
|
+
- Fixed issue with specifying the Lucaprot_HMM and Zayed_HMM databases in the `--db-options` argument.
|
|
41
|
+
- Command `rdrpcatch download` renamed as `rdrpcatch databases` for clarity, as it now supports adding custom pHMM
|
|
42
|
+
databases to the RdRpCATCH databases. This is facilitated by the `--add-custom-db` argument.
|
|
43
|
+
- Added none option to the `--db-options` argument to search only against custom databases.
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
|
|
37
47
|
** The tool has been modified to use [rolypoly](https://code.jgi.doe.gov/UNeri/rolypoly) code/approaches **
|
|
38
48
|
|
|
49
|
+
|
|
50
|
+
|
|
39
51
|

|
|
40
52
|
|
|
41
53
|
### Supported databases
|
|
@@ -43,9 +55,10 @@ that were positive for each sequence across all pHMM databases, and taxonomic in
|
|
|
43
55
|
- NeoRdRp2 <sup>2</sup>: 19394 pHMMs
|
|
44
56
|
- RVMT <sup>3</sup>: 710 pHMMs
|
|
45
57
|
- RdRp-Scan <sup>4</sup> : 68 pHMMs
|
|
46
|
-
-
|
|
47
|
-
-
|
|
48
|
-
-
|
|
58
|
+
- TSA_Olendraite_fam <sup>5</sup>: 77 pHMMs
|
|
59
|
+
- TSA_Olendraite_gen <sup>6</sup> : 341 pHMMs
|
|
60
|
+
- LucaProt_HMM<sup>7 </sup> : 754 pHMMs
|
|
61
|
+
- Zayed_HMM<sup>8 </sup> : 2489 pHMMs
|
|
49
62
|
|
|
50
63
|
1. Sakaguchi, S. et al. (2022) 'NeoRdRp: A comprehensive dataset for identifying RNA-dependent RNA polymerases of various RNA viruses from metatranscriptomic data', *Microbes and Environments*, 37(3). [doi:10.1264/jsme2.me22001](https://doi.org/10.1264/jsme2.me22001)
|
|
51
64
|
2. Sakaguchi, S., Nakano, T. and Nakagawa, S. (2024) 'Neordrp2 with improved seed data, annotations, and scoring', *Frontiers in Virology*, 4. [doi:10.3389/fviro.2024.1378695](https://doi.org/10.3389/fviro.2024.1378695)
|
|
@@ -53,7 +66,9 @@ that were positive for each sequence across all pHMM databases, and taxonomic in
|
|
|
53
66
|
4. Charon, J. et al. (2022) 'RDRP-Scan: A bioinformatic resource to identify and annotate divergent RNA viruses in metagenomic sequence data', *Virus Evolution*, 8(2). [doi:10.1093/ve/veac082](https://doi.org/10.1093/ve/veac082)
|
|
54
67
|
5. Olendraite, I., Brown, K. and Firth, A.E. (2023) 'Identification of RNA virus–derived rdrp sequences in publicly available transcriptomic data sets', *Molecular Biology and Evolution*, 40(4). [doi:10.1093/molbev/msad060](https://doi.org/10.1093/molbev/msad060)
|
|
55
68
|
6. Olendraite, I. (2021) 'Mining diverse and novel RNA viruses in transcriptomic datasets', Apollo. Available at: [https://www.repository.cam.ac.uk/items/1fabebd2-429b-45c9-b6eb-41d27d0a90c2](https://www.repository.cam.ac.uk/items/1fabebd2-429b-45c9-b6eb-41d27d0a90c2)
|
|
56
|
-
7. Hou, X. et al. (2024) 'Using artificial intelligence to document the hidden RNA virosphere', *Cell*, 187(24). [doi:10.1016/j.cell.2024.09.027](https://doi.org/10.1016/j.cell.2024.09.027)
|
|
69
|
+
7. Hou, X. and He, Y. et al. (2024) 'Using artificial intelligence to document the hidden RNA virosphere', *Cell*, 187(24). [doi:10.1016/j.cell.2024.09.027](https://doi.org/10.1016/j.cell.2024.09.027)
|
|
70
|
+
8. Zayed, A. A., et al. (2022) 'Cryptic and abundant marine viruses at the evolutionary origins of Earth’s RNA virome.' *Science*, 376(6589), 156–162. [doi:10.1126/science.abm5847](https://doi.org/10.1126/science.abm5847)
|
|
71
|
+
|
|
57
72
|
|
|
58
73
|
|
|
59
74
|
## Installation
|
|
@@ -94,13 +109,18 @@ Activate the environment and download the RdRpCATCH databases:
|
|
|
94
109
|
|
|
95
110
|
```bash
|
|
96
111
|
conda activate rdrpcatch
|
|
97
|
-
rdrpcatch
|
|
112
|
+
rdrpcatch databases --destination-dir path/to/store/databases
|
|
98
113
|
```
|
|
99
114
|
|
|
100
115
|
* Note 1: The databases are large files and may take some time to download (~ 3 GB).
|
|
101
116
|
* Note 2: The databases are stored in the specified directory, and the path is required to run RdRpCATCH.
|
|
102
117
|
* Note 3: If you encounter an SSL error while downloading, please try again. The error seems to appear sporadically during testing, and a simple re-initiation of the downloading process seems to fix it.
|
|
103
|
-
|
|
118
|
+
* Note 4: If the SSL error persists, or there is any other error related to downloading the databases, please let us know by raising an issue. In this case, you can download the pre-compiled databases manually via the zenodo repository
|
|
119
|
+
[10.5281/zenodo.15463729](https://doi.org/10.5281/zenodo.15463729) Save the repository to a local directory of your preferance, and then use the following command:
|
|
120
|
+
```bash
|
|
121
|
+
tar -xvf rdrpcatch_dbs.tar
|
|
122
|
+
```
|
|
123
|
+
|
|
104
124
|
## Usage
|
|
105
125
|
RdRpCATCH can be used as a CLI tool as follows:
|
|
106
126
|
|
|
@@ -109,20 +129,58 @@ RdRpCATCH can be used as a CLI tool as follows:
|
|
|
109
129
|
# conda activate rdrpcatch
|
|
110
130
|
|
|
111
131
|
# scan the input fasta file with the selected databases
|
|
112
|
-
rdrpcatch scan -i path/to/input.fasta -o path/to/output_dir -
|
|
132
|
+
rdrpcatch scan -i path/to/input.fasta -o path/to/output_dir -db-dir path/to/database
|
|
113
133
|
```
|
|
114
|
-
|
|
134
|
+
|
|
135
|
+
## Input description
|
|
115
136
|
The input file can be one or more nucleotide or protein sequences in multi-fasta format.
|
|
116
137
|
The output directory is where the results will be stored. We recommend specifying the type of the sequence in the command line,
|
|
117
|
-
An optional argument `--
|
|
138
|
+
An optional argument `--seq-type` (nuc or prot) can be used to specify if the input fasta file sequences are nucleotide or amino acid.
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
## Setting up custom pHMM databases
|
|
142
|
+
It is possible to use custom pHMM databases with RdRpCATCH. As a prerequisite, you need to install the RdRpCATCH
|
|
143
|
+
databases using the `rdrpcatch databases` command as described above, to a directory of your choice.
|
|
144
|
+
|
|
145
|
+
The custom databases should be formatted as follows:
|
|
146
|
+
|
|
147
|
+
- First create a directory and give it a descriptive name, e.g. `my_custom_rdrp_database`. Important: The name should not contain comma `,` characters.
|
|
148
|
+
- Inside the directory put your custom pHMM HMMER pressed database. You can use the `hmmpress` command of HMMER to create the pressed database from your custom HMM file. This creates a set of files with the same name as the original HMM file, but with different extensions (e.g. `.h3f`, `.h3i`, `.h3m`, `.h3p`). The directory should contain all these files. Please refer to the HMMER manual for more information on how to create a pressed database from an HMM file. (http://eddylab.org/software/hmmer/Userguide.pdf)
|
|
149
|
+
- Next you can add the directory to the custom databases that are readable by RdRpCATCH. This can be done by using the rdrpcatch databases command as follows:
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
rdrpcatch databases --add-custom-db path/to/my_custom_rdrp_database --destination-dir path/that/contains/rdrpcatch/databases
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
- This will add the custom database to the list of databases that can be used with RdRpCATCH.
|
|
156
|
+
- The custom database can then be used with the `rdrpcatch scan` command by specifying the `--custom-dbs` argument as follows:
|
|
157
|
+
-
|
|
158
|
+
```bash
|
|
159
|
+
rdrpcatch scan -i path/to/input.fasta -o path/to/output_dir -db-dir path/to/database --custom-dbs custom_database_name
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
- The `custom_database_name` should be the name of the directory that contains the custom pHMM files, without the path.
|
|
163
|
+
- For example, if the custom database is stored in `path/to/my_custom_rdrp_database`, you would use `--custom-dbs my_custom_rdrp_database` in the command line.
|
|
164
|
+
- You can add multiple custom databases by installing them in the same way and specifying them by separating them with commas, e.g. `--custom-dbs my_custom_rdrp_database,another_custom_database`.
|
|
165
|
+
- The custom databases can be used in combination with the pre-compiled databases provided by RdRpCATCH. To do this, you can specify the `--db_options` argument with the names of the pre-compiled databases you want to use, and specify the custom databases with the `--custom-dbs` argument.
|
|
166
|
+
- For example, if you want to use the NeoRdRp and RVMT databases along with your custom database, you would use the following command:
|
|
167
|
+
|
|
168
|
+
```bash
|
|
169
|
+
rdrpcatch scan -i path/to/input.fasta -o path/to/output_dir -db-dir path/to/database --db-options NeoRdRp,RVMT --custom-dbs my_custom_rdrp_database
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
- Note: By default, RdRpCATCH will search against all pre-compiled databases if no `--db_options` argument is specified. If you want to use only the custom databases, you can specify `--db_options none` to avoid searching against the pre-compiled databases.
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
|
|
118
176
|
|
|
119
177
|
## Commands
|
|
120
178
|
The following two commands are available in RdRpCATCH:
|
|
121
179
|
* [`rdrpcatch scan`](#rdrpcatch-scan)
|
|
122
|
-
* [`rdrpcatch
|
|
180
|
+
* [`rdrpcatch databases`](#rdrpcatch-download)
|
|
123
181
|
|
|
124
|
-
### rdrpcatch
|
|
125
|
-
Command to download pre-compiled databases from Zenodo. If the databases are already downloaded in the specified directory
|
|
182
|
+
### rdrpcatch databases:
|
|
183
|
+
Command to download pre-compiled databases from Zenodo and to set up custom databases. If the databases are already downloaded in the specified directory
|
|
126
184
|
, the command will check for updates and download the latest version if available.
|
|
127
185
|
|
|
128
186
|
| Argument | Short Flag | Type | Description |
|
|
@@ -130,28 +188,30 @@ Command to download pre-compiled databases from Zenodo. If the databases are alr
|
|
|
130
188
|
| `--destination_dir` | `-dest` | PATH | Path to the directory to download HMM databases. [required] |
|
|
131
189
|
| `--concept-doi` | `` | TEXT | Zenodo Concept DOI for database repository |
|
|
132
190
|
| `--help` | `` | | Show help message and exit |
|
|
191
|
+
| `--add-custom-db` | `` | PATH | Path to the directory containing custom pHMM files to add to the RdRpCATCH databases. |
|
|
192
|
+
|
|
133
193
|
### rdrpcatch scan:
|
|
134
194
|
Search a given input using selected RdRp databases.
|
|
135
195
|
|
|
136
|
-
| Argument
|
|
137
|
-
|
|
138
|
-
| `--input`
|
|
139
|
-
| `--output`
|
|
140
|
-
| `--
|
|
141
|
-
| `--
|
|
142
|
-
| `--custom-dbs`
|
|
143
|
-
| `--
|
|
144
|
-
| `--verbose`
|
|
145
|
-
| `--evalue`
|
|
146
|
-
| `--incevalue`
|
|
147
|
-
| `--domevalue`
|
|
148
|
-
| `--incdomevalue` | `-incdomE`
|
|
149
|
-
| `--zvalue`
|
|
150
|
-
| `--cpus`
|
|
151
|
-
| `--
|
|
152
|
-
| `--
|
|
153
|
-
| `--bundle`
|
|
154
|
-
| `--
|
|
196
|
+
| Argument | Short Flag | Type | Description |
|
|
197
|
+
|------------------|---------------|------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
|
198
|
+
| `--input` | `-i` | FILE | Path to the input FASTA file. [required] |
|
|
199
|
+
| `--output` | `-o` | DIRECTORY | Path to the output directory. [required] |
|
|
200
|
+
| `--db-dir` | `-db-dir` | PATH | Path to the directory containing RdRpCATCH databases. [required] |
|
|
201
|
+
| `--db-options` | `-dbs` | TEXT | Comma-separated list of pre-installed databases to search against. Valid options: RVMT, NeoRdRp, NeoRdRp.2.1, TSA_Olendraite_fam, TSA_Olendraite_gen, RDRP-scan, Lucaprot_HMM,Zayed_HMM, all |
|
|
202
|
+
| `--custom-dbs` | | PATH | Comma-separated list of custom databases to search against. Valid options: names of the directories that the custom databases are stored in. |
|
|
203
|
+
| `--seq-type` | `-seq-type` | TEXT | Type of sequence to search against: (prot,nuc) Default: unknown |
|
|
204
|
+
| `--verbose` | `-v` | FLAG | Print verbose output. |
|
|
205
|
+
| `--evalue` | `-e` | FLOAT | E-value threshold for HMMsearch. (default: 1e-5) |
|
|
206
|
+
| `--incevalue` | `-incE` | FLOAT | Inclusion E-value threshold for HMMsearch. (default: 1e-5) |
|
|
207
|
+
| `--domevalue` | `-domE` | FLOAT | Domain E-value threshold for HMMsearch. (default: 1e-5) |
|
|
208
|
+
| `--incdomevalue` | `-incdomE` | FLOAT | Inclusion domain E-value threshold for HMMsearch. (default: 1e-5) |
|
|
209
|
+
| `--zvalue` | `-z` | INTEGER | Number of sequences to search against. (default: 1000000) |
|
|
210
|
+
| `--cpus` | `-cpus` | INTEGER | Number of CPUs to use for HMMsearch. (default: 1) |
|
|
211
|
+
| `--length-thr` | `-length-thr` | INTEGER | Minimum length threshold for seqkit seq. (default: 400) |
|
|
212
|
+
| `--gen-code` | `-gen-code` | INTEGER | Genetic code to use for translation. (default: 1) |
|
|
213
|
+
| `--bundle` | `-bundle` | | Bundle the output files into a single archive. (default: False) |
|
|
214
|
+
| `--keep-tmp` | `-keep-tmp` | | Keep the temporary files generated during the analysis. (default: False) |
|
|
155
215
|
|
|
156
216
|
|
|
157
217
|
|
|
@@ -12,8 +12,20 @@ library to perform pHMM searches. In addition, the tool scans each sequence (aa
|
|
|
12
12
|
In addition, RdRpCATCH provides information about the number of profiles
|
|
13
13
|
that were positive for each sequence across all pHMM databases, and taxonomic information based on the MMseqs2 easy-taxonomy and search modules against a custom RefSeq Riboviria database.
|
|
14
14
|
|
|
15
|
+
### Version 0.0.7 -> 0.0.8 Changelog
|
|
16
|
+
- Added support for custom pHMM databases. See the [Setting up custom pHMM databases](#setting-up-custom-phmm-databases) section for more information.
|
|
17
|
+
- All specified flags use '-' instead of '_' (e.g. `--db-dir` instead of `--db_dir`).
|
|
18
|
+
- Fixed issue with specifying the Lucaprot_HMM and Zayed_HMM databases in the `--db-options` argument.
|
|
19
|
+
- Command `rdrpcatch download` renamed as `rdrpcatch databases` for clarity, as it now supports adding custom pHMM
|
|
20
|
+
databases to the RdRpCATCH databases. This is facilitated by the `--add-custom-db` argument.
|
|
21
|
+
- Added none option to the `--db-options` argument to search only against custom databases.
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
|
|
15
25
|
** The tool has been modified to use [rolypoly](https://code.jgi.doe.gov/UNeri/rolypoly) code/approaches **
|
|
16
26
|
|
|
27
|
+
|
|
28
|
+
|
|
17
29
|

|
|
18
30
|
|
|
19
31
|
### Supported databases
|
|
@@ -21,9 +33,10 @@ that were positive for each sequence across all pHMM databases, and taxonomic in
|
|
|
21
33
|
- NeoRdRp2 <sup>2</sup>: 19394 pHMMs
|
|
22
34
|
- RVMT <sup>3</sup>: 710 pHMMs
|
|
23
35
|
- RdRp-Scan <sup>4</sup> : 68 pHMMs
|
|
24
|
-
-
|
|
25
|
-
-
|
|
26
|
-
-
|
|
36
|
+
- TSA_Olendraite_fam <sup>5</sup>: 77 pHMMs
|
|
37
|
+
- TSA_Olendraite_gen <sup>6</sup> : 341 pHMMs
|
|
38
|
+
- LucaProt_HMM<sup>7 </sup> : 754 pHMMs
|
|
39
|
+
- Zayed_HMM<sup>8 </sup> : 2489 pHMMs
|
|
27
40
|
|
|
28
41
|
1. Sakaguchi, S. et al. (2022) 'NeoRdRp: A comprehensive dataset for identifying RNA-dependent RNA polymerases of various RNA viruses from metatranscriptomic data', *Microbes and Environments*, 37(3). [doi:10.1264/jsme2.me22001](https://doi.org/10.1264/jsme2.me22001)
|
|
29
42
|
2. Sakaguchi, S., Nakano, T. and Nakagawa, S. (2024) 'Neordrp2 with improved seed data, annotations, and scoring', *Frontiers in Virology*, 4. [doi:10.3389/fviro.2024.1378695](https://doi.org/10.3389/fviro.2024.1378695)
|
|
@@ -31,7 +44,9 @@ that were positive for each sequence across all pHMM databases, and taxonomic in
|
|
|
31
44
|
4. Charon, J. et al. (2022) 'RDRP-Scan: A bioinformatic resource to identify and annotate divergent RNA viruses in metagenomic sequence data', *Virus Evolution*, 8(2). [doi:10.1093/ve/veac082](https://doi.org/10.1093/ve/veac082)
|
|
32
45
|
5. Olendraite, I., Brown, K. and Firth, A.E. (2023) 'Identification of RNA virus–derived rdrp sequences in publicly available transcriptomic data sets', *Molecular Biology and Evolution*, 40(4). [doi:10.1093/molbev/msad060](https://doi.org/10.1093/molbev/msad060)
|
|
33
46
|
6. Olendraite, I. (2021) 'Mining diverse and novel RNA viruses in transcriptomic datasets', Apollo. Available at: [https://www.repository.cam.ac.uk/items/1fabebd2-429b-45c9-b6eb-41d27d0a90c2](https://www.repository.cam.ac.uk/items/1fabebd2-429b-45c9-b6eb-41d27d0a90c2)
|
|
34
|
-
7. Hou, X. et al. (2024) 'Using artificial intelligence to document the hidden RNA virosphere', *Cell*, 187(24). [doi:10.1016/j.cell.2024.09.027](https://doi.org/10.1016/j.cell.2024.09.027)
|
|
47
|
+
7. Hou, X. and He, Y. et al. (2024) 'Using artificial intelligence to document the hidden RNA virosphere', *Cell*, 187(24). [doi:10.1016/j.cell.2024.09.027](https://doi.org/10.1016/j.cell.2024.09.027)
|
|
48
|
+
8. Zayed, A. A., et al. (2022) 'Cryptic and abundant marine viruses at the evolutionary origins of Earth’s RNA virome.' *Science*, 376(6589), 156–162. [doi:10.1126/science.abm5847](https://doi.org/10.1126/science.abm5847)
|
|
49
|
+
|
|
35
50
|
|
|
36
51
|
|
|
37
52
|
## Installation
|
|
@@ -72,13 +87,18 @@ Activate the environment and download the RdRpCATCH databases:
|
|
|
72
87
|
|
|
73
88
|
```bash
|
|
74
89
|
conda activate rdrpcatch
|
|
75
|
-
rdrpcatch
|
|
90
|
+
rdrpcatch databases --destination-dir path/to/store/databases
|
|
76
91
|
```
|
|
77
92
|
|
|
78
93
|
* Note 1: The databases are large files and may take some time to download (~ 3 GB).
|
|
79
94
|
* Note 2: The databases are stored in the specified directory, and the path is required to run RdRpCATCH.
|
|
80
95
|
* Note 3: If you encounter an SSL error while downloading, please try again. The error seems to appear sporadically during testing, and a simple re-initiation of the downloading process seems to fix it.
|
|
81
|
-
|
|
96
|
+
* Note 4: If the SSL error persists, or there is any other error related to downloading the databases, please let us know by raising an issue. In this case, you can download the pre-compiled databases manually via the zenodo repository
|
|
97
|
+
[10.5281/zenodo.15463729](https://doi.org/10.5281/zenodo.15463729) Save the repository to a local directory of your preferance, and then use the following command:
|
|
98
|
+
```bash
|
|
99
|
+
tar -xvf rdrpcatch_dbs.tar
|
|
100
|
+
```
|
|
101
|
+
|
|
82
102
|
## Usage
|
|
83
103
|
RdRpCATCH can be used as a CLI tool as follows:
|
|
84
104
|
|
|
@@ -87,20 +107,58 @@ RdRpCATCH can be used as a CLI tool as follows:
|
|
|
87
107
|
# conda activate rdrpcatch
|
|
88
108
|
|
|
89
109
|
# scan the input fasta file with the selected databases
|
|
90
|
-
rdrpcatch scan -i path/to/input.fasta -o path/to/output_dir -
|
|
110
|
+
rdrpcatch scan -i path/to/input.fasta -o path/to/output_dir -db-dir path/to/database
|
|
91
111
|
```
|
|
92
|
-
|
|
112
|
+
|
|
113
|
+
## Input description
|
|
93
114
|
The input file can be one or more nucleotide or protein sequences in multi-fasta format.
|
|
94
115
|
The output directory is where the results will be stored. We recommend specifying the type of the sequence in the command line,
|
|
95
|
-
An optional argument `--
|
|
116
|
+
An optional argument `--seq-type` (nuc or prot) can be used to specify if the input fasta file sequences are nucleotide or amino acid.
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
## Setting up custom pHMM databases
|
|
120
|
+
It is possible to use custom pHMM databases with RdRpCATCH. As a prerequisite, you need to install the RdRpCATCH
|
|
121
|
+
databases using the `rdrpcatch databases` command as described above, to a directory of your choice.
|
|
122
|
+
|
|
123
|
+
The custom databases should be formatted as follows:
|
|
124
|
+
|
|
125
|
+
- First create a directory and give it a descriptive name, e.g. `my_custom_rdrp_database`. Important: The name should not contain comma `,` characters.
|
|
126
|
+
- Inside the directory put your custom pHMM HMMER pressed database. You can use the `hmmpress` command of HMMER to create the pressed database from your custom HMM file. This creates a set of files with the same name as the original HMM file, but with different extensions (e.g. `.h3f`, `.h3i`, `.h3m`, `.h3p`). The directory should contain all these files. Please refer to the HMMER manual for more information on how to create a pressed database from an HMM file. (http://eddylab.org/software/hmmer/Userguide.pdf)
|
|
127
|
+
- Next you can add the directory to the custom databases that are readable by RdRpCATCH. This can be done by using the rdrpcatch databases command as follows:
|
|
128
|
+
|
|
129
|
+
```bash
|
|
130
|
+
rdrpcatch databases --add-custom-db path/to/my_custom_rdrp_database --destination-dir path/that/contains/rdrpcatch/databases
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
- This will add the custom database to the list of databases that can be used with RdRpCATCH.
|
|
134
|
+
- The custom database can then be used with the `rdrpcatch scan` command by specifying the `--custom-dbs` argument as follows:
|
|
135
|
+
-
|
|
136
|
+
```bash
|
|
137
|
+
rdrpcatch scan -i path/to/input.fasta -o path/to/output_dir -db-dir path/to/database --custom-dbs custom_database_name
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
- The `custom_database_name` should be the name of the directory that contains the custom pHMM files, without the path.
|
|
141
|
+
- For example, if the custom database is stored in `path/to/my_custom_rdrp_database`, you would use `--custom-dbs my_custom_rdrp_database` in the command line.
|
|
142
|
+
- You can add multiple custom databases by installing them in the same way and specifying them by separating them with commas, e.g. `--custom-dbs my_custom_rdrp_database,another_custom_database`.
|
|
143
|
+
- The custom databases can be used in combination with the pre-compiled databases provided by RdRpCATCH. To do this, you can specify the `--db_options` argument with the names of the pre-compiled databases you want to use, and specify the custom databases with the `--custom-dbs` argument.
|
|
144
|
+
- For example, if you want to use the NeoRdRp and RVMT databases along with your custom database, you would use the following command:
|
|
145
|
+
|
|
146
|
+
```bash
|
|
147
|
+
rdrpcatch scan -i path/to/input.fasta -o path/to/output_dir -db-dir path/to/database --db-options NeoRdRp,RVMT --custom-dbs my_custom_rdrp_database
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
- Note: By default, RdRpCATCH will search against all pre-compiled databases if no `--db_options` argument is specified. If you want to use only the custom databases, you can specify `--db_options none` to avoid searching against the pre-compiled databases.
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
|
|
96
154
|
|
|
97
155
|
## Commands
|
|
98
156
|
The following two commands are available in RdRpCATCH:
|
|
99
157
|
* [`rdrpcatch scan`](#rdrpcatch-scan)
|
|
100
|
-
* [`rdrpcatch
|
|
158
|
+
* [`rdrpcatch databases`](#rdrpcatch-download)
|
|
101
159
|
|
|
102
|
-
### rdrpcatch
|
|
103
|
-
Command to download pre-compiled databases from Zenodo. If the databases are already downloaded in the specified directory
|
|
160
|
+
### rdrpcatch databases:
|
|
161
|
+
Command to download pre-compiled databases from Zenodo and to set up custom databases. If the databases are already downloaded in the specified directory
|
|
104
162
|
, the command will check for updates and download the latest version if available.
|
|
105
163
|
|
|
106
164
|
| Argument | Short Flag | Type | Description |
|
|
@@ -108,28 +166,30 @@ Command to download pre-compiled databases from Zenodo. If the databases are alr
|
|
|
108
166
|
| `--destination_dir` | `-dest` | PATH | Path to the directory to download HMM databases. [required] |
|
|
109
167
|
| `--concept-doi` | `` | TEXT | Zenodo Concept DOI for database repository |
|
|
110
168
|
| `--help` | `` | | Show help message and exit |
|
|
169
|
+
| `--add-custom-db` | `` | PATH | Path to the directory containing custom pHMM files to add to the RdRpCATCH databases. |
|
|
170
|
+
|
|
111
171
|
### rdrpcatch scan:
|
|
112
172
|
Search a given input using selected RdRp databases.
|
|
113
173
|
|
|
114
|
-
| Argument
|
|
115
|
-
|
|
116
|
-
| `--input`
|
|
117
|
-
| `--output`
|
|
118
|
-
| `--
|
|
119
|
-
| `--
|
|
120
|
-
| `--custom-dbs`
|
|
121
|
-
| `--
|
|
122
|
-
| `--verbose`
|
|
123
|
-
| `--evalue`
|
|
124
|
-
| `--incevalue`
|
|
125
|
-
| `--domevalue`
|
|
126
|
-
| `--incdomevalue` | `-incdomE`
|
|
127
|
-
| `--zvalue`
|
|
128
|
-
| `--cpus`
|
|
129
|
-
| `--
|
|
130
|
-
| `--
|
|
131
|
-
| `--bundle`
|
|
132
|
-
| `--
|
|
174
|
+
| Argument | Short Flag | Type | Description |
|
|
175
|
+
|------------------|---------------|------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
|
176
|
+
| `--input` | `-i` | FILE | Path to the input FASTA file. [required] |
|
|
177
|
+
| `--output` | `-o` | DIRECTORY | Path to the output directory. [required] |
|
|
178
|
+
| `--db-dir` | `-db-dir` | PATH | Path to the directory containing RdRpCATCH databases. [required] |
|
|
179
|
+
| `--db-options` | `-dbs` | TEXT | Comma-separated list of pre-installed databases to search against. Valid options: RVMT, NeoRdRp, NeoRdRp.2.1, TSA_Olendraite_fam, TSA_Olendraite_gen, RDRP-scan, Lucaprot_HMM,Zayed_HMM, all |
|
|
180
|
+
| `--custom-dbs` | | PATH | Comma-separated list of custom databases to search against. Valid options: names of the directories that the custom databases are stored in. |
|
|
181
|
+
| `--seq-type` | `-seq-type` | TEXT | Type of sequence to search against: (prot,nuc) Default: unknown |
|
|
182
|
+
| `--verbose` | `-v` | FLAG | Print verbose output. |
|
|
183
|
+
| `--evalue` | `-e` | FLOAT | E-value threshold for HMMsearch. (default: 1e-5) |
|
|
184
|
+
| `--incevalue` | `-incE` | FLOAT | Inclusion E-value threshold for HMMsearch. (default: 1e-5) |
|
|
185
|
+
| `--domevalue` | `-domE` | FLOAT | Domain E-value threshold for HMMsearch. (default: 1e-5) |
|
|
186
|
+
| `--incdomevalue` | `-incdomE` | FLOAT | Inclusion domain E-value threshold for HMMsearch. (default: 1e-5) |
|
|
187
|
+
| `--zvalue` | `-z` | INTEGER | Number of sequences to search against. (default: 1000000) |
|
|
188
|
+
| `--cpus` | `-cpus` | INTEGER | Number of CPUs to use for HMMsearch. (default: 1) |
|
|
189
|
+
| `--length-thr` | `-length-thr` | INTEGER | Minimum length threshold for seqkit seq. (default: 400) |
|
|
190
|
+
| `--gen-code` | `-gen-code` | INTEGER | Genetic code to use for translation. (default: 1) |
|
|
191
|
+
| `--bundle` | `-bundle` | | Bundle the output files into a single archive. (default: False) |
|
|
192
|
+
| `--keep-tmp` | `-keep-tmp` | | Keep the temporary files generated during the analysis. (default: False) |
|
|
133
193
|
|
|
134
194
|
|
|
135
195
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{% set name = "RdRpCATCH" %}
|
|
2
|
-
{% set version = "0.0.
|
|
3
|
-
{% set sha256 = "
|
|
2
|
+
{% set version = "0.0.9" %}
|
|
3
|
+
{% set sha256 = "a4e6015fbcd74df5050ac2ee94a3c2a2b2587b3d167f40742b3f2e782ab06511" %}
|
|
4
4
|
|
|
5
5
|
package:
|
|
6
6
|
name: {{ name|lower }}
|
|
@@ -42,7 +42,7 @@ test:
|
|
|
42
42
|
commands:
|
|
43
43
|
- rdrpcatch --help
|
|
44
44
|
- rdrpcatch scan --help
|
|
45
|
-
- rdrpcatch
|
|
45
|
+
- rdrpcatch databases --help
|
|
46
46
|
|
|
47
47
|
about:
|
|
48
48
|
home: https://github.com/dimitris-karapliafis/RdRpCATCH
|