rdrpcatch 0.0.6__tar.gz → 0.0.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/.idea/workspace.xml +69 -13
  2. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/PKG-INFO +89 -31
  3. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/README.md +88 -30
  4. rdrpcatch-0.0.8/images/rdrpcatch_illustration.png +0 -0
  5. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/pyproject.toml +1 -1
  6. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/rdrpcatch/cli/args.py +36 -47
  7. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/rdrpcatch/rdrpcatch_scripts/fetch_dbs.py +13 -9
  8. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/rdrpcatch/rdrpcatch_wrapper.py +54 -12
  9. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/.gitignore +0 -0
  10. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/.idea/.gitignore +0 -0
  11. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/.idea/.name +0 -0
  12. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/.idea/ColaB-Scan.iml +0 -0
  13. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/.idea/inspectionProfiles/profiles_settings.xml +0 -0
  14. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/.idea/misc.xml +0 -0
  15. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/.idea/modules.xml +0 -0
  16. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/.idea/vcs.xml +0 -0
  17. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/LICENSE +0 -0
  18. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/dependencies/rdrpcatch_test_env.yaml +0 -0
  19. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/dependencies/requirements.txt +0 -0
  20. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/images/rdrpcatch_flowchart_v0.png +0 -0
  21. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/meta.yaml +0 -0
  22. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/rdrpcatch/__init__.py +0 -0
  23. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/rdrpcatch/cli/__init__.py +0 -0
  24. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/rdrpcatch/rdrpcatch_scripts/__init__.py +0 -0
  25. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/rdrpcatch/rdrpcatch_scripts/format_pyhmmer_out.py +0 -0
  26. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/rdrpcatch/rdrpcatch_scripts/gui.py +0 -0
  27. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/rdrpcatch/rdrpcatch_scripts/mmseqs_tax.py +0 -0
  28. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/rdrpcatch/rdrpcatch_scripts/paths.py +0 -0
  29. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/rdrpcatch/rdrpcatch_scripts/plot.py +0 -0
  30. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/rdrpcatch/rdrpcatch_scripts/run_pyhmmer.py +0 -0
  31. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/rdrpcatch/rdrpcatch_scripts/run_seqkit.py +0 -0
  32. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/rdrpcatch/rdrpcatch_scripts/utils.py +0 -0
  33. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/meta_4test.yaml +0 -0
  34. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch-1.0.0-py312_2.tar.bz2 +0 -0
  35. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_env.yaml +0 -0
  36. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/test_translate_gff_files/test_translate_full_aminoacid_rdrpcatch.gff3 +0 -0
  37. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_fasta/test_translate_full_aminoacid_contigs.fasta +0 -0
  38. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_fasta/test_translate_trimmed_aminoacid_contigs.fasta +0 -0
  39. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_output_annotated.tsv +0 -0
  40. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_ID_score_plot.html +0 -0
  41. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_contig_coverage_plot.html +0 -0
  42. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_evalue_plot.html +0 -0
  43. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_norm_bitscore_contig_plot.html +0 -0
  44. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_norm_bitscore_plot_profile.html +0 -0
  45. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_profile_coverage_plot.html +0 -0
  46. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_score_plot.html +0 -0
  47. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_upset_plot.png +0 -0
  48. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/best_hit_hmm_output/test_translate_Lucaprot_hmm_output_best_hit.txt +0 -0
  49. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/best_hit_hmm_output/test_translate_NeoRdRp.2.1_hmm_output_best_hit.txt +0 -0
  50. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/best_hit_hmm_output/test_translate_NeoRdRp_hmm_output_best_hit.txt +0 -0
  51. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/best_hit_hmm_output/test_translate_RDRP-scan_hmm_output_best_hit.txt +0 -0
  52. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/best_hit_hmm_output/test_translate_RVMT_hmm_output_best_hit.txt +0 -0
  53. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/best_hit_hmm_output/test_translate_TSA_Olendraite_fam_hmm_output_best_hit.txt +0 -0
  54. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/best_hit_hmm_output/test_translate_TSA_Olendraite_gen_hmm_output_best_hit.txt +0 -0
  55. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/formatted_hmm_output/test_translate_Lucaprot_hmm_output_formatted.txt +0 -0
  56. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/formatted_hmm_output/test_translate_NeoRdRp.2.1_hmm_output_formatted.txt +0 -0
  57. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/formatted_hmm_output/test_translate_NeoRdRp_hmm_output_formatted.txt +0 -0
  58. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/formatted_hmm_output/test_translate_RDRP-scan_hmm_output_formatted.txt +0 -0
  59. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/formatted_hmm_output/test_translate_RVMT_hmm_output_formatted.txt +0 -0
  60. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/formatted_hmm_output/test_translate_TSA_Olendraite_fam_hmm_output_formatted.txt +0 -0
  61. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/formatted_hmm_output/test_translate_TSA_Olendraite_gen_hmm_output_formatted.txt +0 -0
  62. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_Lucaprot_hmmsearch_output.custom.tsv +0 -0
  63. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_Lucaprot_hmmsearch_output.txt +0 -0
  64. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_NeoRdRp.2.1_hmmsearch_output.custom.tsv +0 -0
  65. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_NeoRdRp.2.1_hmmsearch_output.txt +0 -0
  66. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_NeoRdRp_hmmsearch_output.custom.tsv +0 -0
  67. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_NeoRdRp_hmmsearch_output.txt +0 -0
  68. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_RDRP-scan_hmmsearch_output.custom.tsv +0 -0
  69. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_RDRP-scan_hmmsearch_output.txt +0 -0
  70. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_RVMT_hmmsearch_output.custom.tsv +0 -0
  71. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_RVMT_hmmsearch_output.txt +0 -0
  72. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_TSA_Olendraite_fam_hmmsearch_output.custom.tsv +0 -0
  73. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_TSA_Olendraite_fam_hmmsearch_output.txt +0 -0
  74. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_TSA_Olendraite_gen_hmmsearch_output.custom.tsv +0 -0
  75. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_TSA_Olendraite_gen_hmmsearch_output.txt +0 -0
  76. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/mmseqs_e_search_output/test_translate_mmseqs_e_search.tsv +0 -0
  77. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/mmseqs_tax_output/test_translate_mmseqs_tax_lca.tsv +0 -0
  78. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/mmseqs_tax_output/test_translate_mmseqs_tax_report +0 -0
  79. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/mmseqs_tax_output/test_translate_mmseqs_tax_tophit_aln +0 -0
  80. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/mmseqs_tax_output/test_translate_mmseqs_tax_tophit_report +0 -0
  81. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/mmseqs_tax_output/tmp/16608414482057878997/easy-taxonomy.sh +0 -0
  82. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/test_translate_logs/test_translate_mmseqs_e_search.log +0 -0
  83. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/test_translate_logs/test_translate_mmseqs_tax.log +0 -0
  84. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/test_translate_logs/test_translate_rdrpcatch.log +0 -0
  85. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/tsv_files/test_translate_combined.tsv +0 -0
  86. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/tsv_files/test_translate_rdrpcatch_output.tsv +0 -0
  87. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/rdrpcatch_test_translate/tmp/tsv_files/test_translate_upset_data.tsv +0 -0
  88. {rdrpcatch-0.0.6 → rdrpcatch-0.0.8}/testing/test_translate.fasta +0 -0
@@ -4,10 +4,8 @@
4
4
  <option name="autoReloadType" value="SELECTIVE" />
5
5
  </component>
6
6
  <component name="ChangeListManager">
7
- <list default="true" id="d849e6fa-87f9-4e92-9c33-abef7cc975d3" name="Changes" comment="Updates:&#10;Fixed bug that crushed the script when at least one pHMM DB does not have a match against the sequence database also for nuc branch">
8
- <change beforePath="$PROJECT_DIR$/pyproject.toml" beforeDir="false" afterPath="$PROJECT_DIR$/pyproject.toml" afterDir="false" />
9
- <change beforePath="$PROJECT_DIR$/rdrpcatch/cli/args.py" beforeDir="false" afterPath="$PROJECT_DIR$/rdrpcatch/cli/args.py" afterDir="false" />
10
- <change beforePath="$PROJECT_DIR$/rdrpcatch/rdrpcatch_wrapper.py" beforeDir="false" afterPath="$PROJECT_DIR$/rdrpcatch/rdrpcatch_wrapper.py" afterDir="false" />
7
+ <list default="true" id="d849e6fa-87f9-4e92-9c33-abef7cc975d3" name="Changes" comment="Polish README.md">
8
+ <change beforePath="$PROJECT_DIR$/README.md" beforeDir="false" afterPath="$PROJECT_DIR$/README.md" afterDir="false" />
11
9
  </list>
12
10
  <option name="SHOW_DIALOG" value="false" />
13
11
  <option name="HIGHLIGHT_CONFLICTS" value="true" />
@@ -46,7 +44,7 @@
46
44
  &quot;RunOnceActivity.OpenProjectViewOnStart&quot;: &quot;true&quot;,
47
45
  &quot;RunOnceActivity.ShowReadmeOnStart&quot;: &quot;true&quot;,
48
46
  &quot;ignore.virus.scanning.warn.message&quot;: &quot;true&quot;,
49
- &quot;last_opened_file_path&quot;: &quot;C:/Users/karso/PycharmProjects/rdrpcatch_benchmarks&quot;,
47
+ &quot;last_opened_file_path&quot;: &quot;C:/Users/karso/PycharmProjects/testing_approaches&quot;,
50
48
  &quot;node.js.detected.package.eslint&quot;: &quot;true&quot;,
51
49
  &quot;node.js.detected.package.tslint&quot;: &quot;true&quot;,
52
50
  &quot;node.js.selected.package.eslint&quot;: &quot;(autodetect)&quot;,
@@ -121,7 +119,17 @@
121
119
  <workItem from="1743714892367" duration="21775000" />
122
120
  <workItem from="1744200654491" duration="635000" />
123
121
  <workItem from="1744241097621" duration="28847000" />
124
- <workItem from="1745576502650" duration="11360000" />
122
+ <workItem from="1745576502650" duration="11691000" />
123
+ <workItem from="1746005454102" duration="1271000" />
124
+ <workItem from="1746359600096" duration="3517000" />
125
+ <workItem from="1747128382581" duration="13158000" />
126
+ <workItem from="1747982429311" duration="667000" />
127
+ <workItem from="1749116182040" duration="4263000" />
128
+ <workItem from="1749714727345" duration="626000" />
129
+ <workItem from="1750247217114" duration="128000" />
130
+ <workItem from="1750854909876" duration="599000" />
131
+ <workItem from="1751007640706" duration="596000" />
132
+ <workItem from="1751535404721" duration="16934000" />
125
133
  </task>
126
134
  <task id="LOCAL-00001" summary="First commit: Script for benchmark">
127
135
  <option name="closed" value="true" />
@@ -363,7 +371,55 @@
363
371
  <option name="project" value="LOCAL" />
364
372
  <updated>1744796108058</updated>
365
373
  </task>
366
- <option name="localTasksCounter" value="31" />
374
+ <task id="LOCAL-00031" summary="Updates:&#10;Add -overwrite as a flag&#10;Add informative progress statements in cli">
375
+ <option name="closed" value="true" />
376
+ <created>1745863439863</created>
377
+ <option name="number" value="00031" />
378
+ <option name="presentableId" value="LOCAL-00031" />
379
+ <option name="project" value="LOCAL" />
380
+ <updated>1745863439863</updated>
381
+ </task>
382
+ <task id="LOCAL-00032" summary="Updates:&#10;Add -overwrite as a flag&#10;Add informative progress statements in cli">
383
+ <option name="closed" value="true" />
384
+ <created>1745863445358</created>
385
+ <option name="number" value="00032" />
386
+ <option name="presentableId" value="LOCAL-00032" />
387
+ <option name="project" value="LOCAL" />
388
+ <updated>1745863445358</updated>
389
+ </task>
390
+ <task id="LOCAL-00033" summary="Updates:&#10;add support for Zayed_HMM database.&#10;change name of Lucaprot db to Lucaprot_HMM">
391
+ <option name="closed" value="true" />
392
+ <created>1747652478191</created>
393
+ <option name="number" value="00033" />
394
+ <option name="presentableId" value="LOCAL-00033" />
395
+ <option name="project" value="LOCAL" />
396
+ <updated>1747652478191</updated>
397
+ </task>
398
+ <task id="LOCAL-00034" summary="Updates:&#10;Update .toml">
399
+ <option name="closed" value="true" />
400
+ <created>1747652535824</created>
401
+ <option name="number" value="00034" />
402
+ <option name="presentableId" value="LOCAL-00034" />
403
+ <option name="project" value="LOCAL" />
404
+ <updated>1747652535824</updated>
405
+ </task>
406
+ <task id="LOCAL-00035" summary="Updates:&#10;&#10;&#10;Command line argument parsing without underscore characters&#10;&#10;Module download renamed to databases&#10;&#10;Implementation of integration of custom databases&#10;&#10;Fixed bug with reading the directories of Lucaprot_HMM and Zayed_HMM&#10;&#10;Update .toml">
407
+ <option name="closed" value="true" />
408
+ <created>1751587387896</created>
409
+ <option name="number" value="00035" />
410
+ <option name="presentableId" value="LOCAL-00035" />
411
+ <option name="project" value="LOCAL" />
412
+ <updated>1751587387896</updated>
413
+ </task>
414
+ <task id="LOCAL-00036" summary="Polish README.md">
415
+ <option name="closed" value="true" />
416
+ <created>1751587669980</created>
417
+ <option name="number" value="00036" />
418
+ <option name="presentableId" value="LOCAL-00036" />
419
+ <option name="project" value="LOCAL" />
420
+ <updated>1751587669980</updated>
421
+ </task>
422
+ <option name="localTasksCounter" value="37" />
367
423
  <servers />
368
424
  </component>
369
425
  <component name="TypeScriptGeneratedFilesManager">
@@ -381,11 +437,6 @@
381
437
  </option>
382
438
  </component>
383
439
  <component name="VcsManagerConfiguration">
384
- <MESSAGE value="Commit: Plots and result summary" />
385
- <MESSAGE value="Commit: File name change" />
386
- <MESSAGE value="Commit: Upload script and results" />
387
- <MESSAGE value="Upload Jupyter notebooks and their respective documentation" />
388
- <MESSAGE value="Upload script progress" />
389
440
  <MESSAGE value="Script progress:&#10;GUI implementation with tkinter&#10;Summarised ColabScan output added&#10;Improvement of command line arguments&#10;General fixes" />
390
441
  <MESSAGE value="Script progress:&#10;Reimplementation of argument parsing to support 3 modes: Scan, Download and GUI&#10;Implementation of runtime calculation&#10;Rename seqkit.py to run_seqkit.py" />
391
442
  <MESSAGE value="Script progress:&#10;Introduce test for Fasta seq length to avoid crushing pyHMMER&#10;Add RdRp coordinates to output and Fasta file (only for aminoacid sequences)&#10;Correction of scores calculation for coverage &#10;Fix scaling of e-value plot (converted to log-scale)&#10;Introduce saving intermidiate files to tmp directory" />
@@ -406,6 +457,11 @@
406
457
  <MESSAGE value="Updates:&#10;Optimize fasta writer from O(n*m) to O(n+m)" />
407
458
  <MESSAGE value="Updates:&#10;Polishing ReadME&#10;Fixed bug that crushed the script when at least one pHMM DB does not have a match against the sequence database" />
408
459
  <MESSAGE value="Updates:&#10;Fixed bug that crushed the script when at least one pHMM DB does not have a match against the sequence database also for nuc branch" />
409
- <option name="LAST_COMMIT_MESSAGE" value="Updates:&#10;Fixed bug that crushed the script when at least one pHMM DB does not have a match against the sequence database also for nuc branch" />
460
+ <MESSAGE value="Updates:&#10;Add -overwrite as a flag&#10;Add informative progress statements in cli" />
461
+ <MESSAGE value="Updates:&#10;add support for Zayed_HMM database.&#10;change name of Lucaprot db to Lucaprot_HMM" />
462
+ <MESSAGE value="Updates:&#10;Update .toml" />
463
+ <MESSAGE value="Updates:&#10;&#10;&#10;Command line argument parsing without underscore characters&#10;&#10;Module download renamed to databases&#10;&#10;Implementation of integration of custom databases&#10;&#10;Fixed bug with reading the directories of Lucaprot_HMM and Zayed_HMM&#10;&#10;Update .toml" />
464
+ <MESSAGE value="Polish README.md" />
465
+ <option name="LAST_COMMIT_MESSAGE" value="Polish README.md" />
410
466
  </component>
411
467
  </project>
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rdrpcatch
3
- Version: 0.0.6
3
+ Version: 0.0.8
4
4
  Dynamic: Summary
5
5
  Project-URL: Home, https://github.com/dimitris-karapliafis/RdRpCATCH
6
6
  Project-URL: Source, https://github.com/dimitris-karapliafis/RdRpCATCH
@@ -34,18 +34,31 @@ library to perform pHMM searches. In addition, the tool scans each sequence (aa
34
34
  In addition, RdRpCATCH provides information about the number of profiles
35
35
  that were positive for each sequence across all pHMM databases, and taxonomic information based on the MMseqs2 easy-taxonomy and search modules against a custom RefSeq Riboviria database.
36
36
 
37
+ ### Version 0.0.7 -> 0.0.8 Changelog
38
+ - Added support for custom pHMM databases. See the [Setting up custom pHMM databases](#setting-up-custom-phmm-databases) section for more information.
39
+ - All specified flags use '-' instead of '_' (e.g. `--db-dir` instead of `--db_dir`).
40
+ - Fixed issue with specifying the Lucaprot_HMM and Zayed_HMM databases in the `--db-options` argument.
41
+ - Command `rdrpcatch download` renamed as `rdrpcatch databases` for clarity, as it now supports adding custom pHMM
42
+ databases to the RdRpCATCH databases. This is facilitated by the `--add-custom-db` argument.
43
+ - Added none option to the `--db-options` argument to search only against custom databases.
44
+
45
+
46
+
37
47
  ** The tool has been modified to use [rolypoly](https://code.jgi.doe.gov/UNeri/rolypoly) code/approaches **
38
48
 
39
- ![rdrpcatch_flowchart_v0.png](images%2Frdrpcatch_flowchart_v0.png)
49
+
50
+
51
+ ![rdrpcatch_flowchart_v0.png](images%2Frdrpcatch_illustration.png)
40
52
 
41
53
  ### Supported databases
42
54
  - NeoRdRp <sup>1</sup> : 1182 pHMMs
43
55
  - NeoRdRp2 <sup>2</sup>: 19394 pHMMs
44
56
  - RVMT <sup>3</sup>: 710 pHMMs
45
57
  - RdRp-Scan <sup>4</sup> : 68 pHMMs
46
- - TSA_Oleandrite_fam <sup>5</sup>: 77 pHMMs
47
- - TSA_Oleandrite_gen <sup>6</sup> : 341 pHMMs
48
- - LucaProt_pHMM<sup>7 </sup> : 754 pHMMs
58
+ - TSA_Olendraite_fam <sup>5</sup>: 77 pHMMs
59
+ - TSA_Olendraite_gen <sup>6</sup> : 341 pHMMs
60
+ - LucaProt_HMM<sup>7 </sup> : 754 pHMMs
61
+ - Zayed_HMM<sup>8 </sup> : 2489 pHMMs
49
62
 
50
63
  1. Sakaguchi, S. et al. (2022) 'NeoRdRp: A comprehensive dataset for identifying RNA-dependent RNA polymerases of various RNA viruses from metatranscriptomic data', *Microbes and Environments*, 37(3). [doi:10.1264/jsme2.me22001](https://doi.org/10.1264/jsme2.me22001)
51
64
  2. Sakaguchi, S., Nakano, T. and Nakagawa, S. (2024) 'Neordrp2 with improved seed data, annotations, and scoring', *Frontiers in Virology*, 4. [doi:10.3389/fviro.2024.1378695](https://doi.org/10.3389/fviro.2024.1378695)
@@ -53,7 +66,9 @@ that were positive for each sequence across all pHMM databases, and taxonomic in
53
66
  4. Charon, J. et al. (2022) 'RDRP-Scan: A bioinformatic resource to identify and annotate divergent RNA viruses in metagenomic sequence data', *Virus Evolution*, 8(2). [doi:10.1093/ve/veac082](https://doi.org/10.1093/ve/veac082)
54
67
  5. Olendraite, I., Brown, K. and Firth, A.E. (2023) 'Identification of RNA virus–derived rdrp sequences in publicly available transcriptomic data sets', *Molecular Biology and Evolution*, 40(4). [doi:10.1093/molbev/msad060](https://doi.org/10.1093/molbev/msad060)
55
68
  6. Olendraite, I. (2021) 'Mining diverse and novel RNA viruses in transcriptomic datasets', Apollo. Available at: [https://www.repository.cam.ac.uk/items/1fabebd2-429b-45c9-b6eb-41d27d0a90c2](https://www.repository.cam.ac.uk/items/1fabebd2-429b-45c9-b6eb-41d27d0a90c2)
56
- 7. Hou, X. et al. (2024) 'Using artificial intelligence to document the hidden RNA virosphere', *Cell*, 187(24). [doi:10.1016/j.cell.2024.09.027](https://doi.org/10.1016/j.cell.2024.09.027)
69
+ 7. Hou, X. and He, Y. et al. (2024) 'Using artificial intelligence to document the hidden RNA virosphere', *Cell*, 187(24). [doi:10.1016/j.cell.2024.09.027](https://doi.org/10.1016/j.cell.2024.09.027)
70
+ 8. Zayed, A. A., et al. (2022) 'Cryptic and abundant marine viruses at the evolutionary origins of Earth’s RNA virome.' *Science*, 376(6589), 156–162. [doi:10.1126/science.abm5847](https://doi.org/10.1126/science.abm5847)
71
+
57
72
 
58
73
 
59
74
  ## Installation
@@ -94,13 +109,16 @@ Activate the environment and download the RdRpCATCH databases:
94
109
 
95
110
  ```bash
96
111
  conda activate rdrpcatch
97
- rdrpcatch download --destination_dir path/to/store/databases
112
+ rdrpcatch databases --destination_dir path/to/store/databases
98
113
  ```
99
114
 
100
115
  * Note 1: The databases are large files and may take some time to download (~ 3 GB).
101
116
  * Note 2: The databases are stored in the specified directory, and the path is required to run RdRpCATCH.
102
117
  * Note 3: If you encounter an SSL error while downloading, please try again. The error seems to appear sporadically during testing, and a simple re-initiation of the downloading process seems to fix it.
103
118
 
119
+
120
+
121
+
104
122
  ## Usage
105
123
  RdRpCATCH can be used as a CLI tool as follows:
106
124
 
@@ -109,20 +127,58 @@ RdRpCATCH can be used as a CLI tool as follows:
109
127
  # conda activate rdrpcatch
110
128
 
111
129
  # scan the input fasta file with the selected databases
112
- rdrpcatch scan -i path/to/input.fasta -o path/to/output_dir -db_dir path/to/database
130
+ rdrpcatch scan -i path/to/input.fasta -o path/to/output_dir -db-dir path/to/database
113
131
  ```
114
- ### input:
132
+
133
+ ## Input description
115
134
  The input file can be one or more nucleotide or protein sequences in multi-fasta format.
116
135
  The output directory is where the results will be stored. We recommend specifying the type of the sequence in the command line,
117
136
  An optional argument `--seq_type` (nuc or prot) can be used to specify if the input fasta file sequences are nucleotide or amino acid.
118
137
 
138
+
139
+ ## Setting up custom pHMM databases
140
+ It is possible to use custom pHMM databases with RdRpCATCH. As a prerequisite, you need to install the RdRpCATCH
141
+ databases using the `rdrpcatch databases` command as described above, to a directory of your choice.
142
+
143
+ The custom databases should be formatted as follows:
144
+
145
+ - First create a directory and give it a descriptive name, e.g. `my_custom_rdrp_database`. Important: The name should not contain comma `,` characters.
146
+ - Inside the directory put your custom pHMM HMMER pressed database. You can use the `hmmpress` command of HMMER to create the pressed database from your custom HMM file. This creates a set of files with the same name as the original HMM file, but with different extensions (e.g. `.h3f`, `.h3i`, `.h3m`, `.h3p`). The directory should contain all these files. Please refer to the HMMER manual for more information on how to create a pressed database from an HMM file. (http://eddylab.org/software/hmmer/Userguide.pdf)
147
+ - Next you can add the directory to the custom databases that are readable by RdRpCATCH. This can be done by using the rdrpcatch databases command as follows:
148
+
149
+ ```bash
150
+ rdrpcatch databases --add-custom-db path/to/my_custom_rdrp_database --destination-dir path/that/contains/rdrpcatch/databases
151
+ ```
152
+
153
+ - This will add the custom database to the list of databases that can be used with RdRpCATCH.
154
+ - The custom database can then be used with the `rdrpcatch scan` command by specifying the `--custom-dbs` argument as follows:
155
+ -
156
+ ```bash
157
+ rdrpcatch scan -i path/to/input.fasta -o path/to/output_dir -db_dir path/to/database --custom-dbs custom_database_name
158
+ ```
159
+
160
+ - The `custom_database_name` should be the name of the directory that contains the custom pHMM files, without the path.
161
+ - For example, if the custom database is stored in `path/to/my_custom_rdrp_database`, you would use `--custom-dbs my_custom_rdrp_database` in the command line.
162
+ - You can add multiple custom databases by installing them in the same way and specifying them by separating them with commas, e.g. `--custom-dbs my_custom_rdrp_database,another_custom_database`.
163
+ - The custom databases can be used in combination with the pre-compiled databases provided by RdRpCATCH. To do this, you can specify the `--db_options` argument with the names of the pre-compiled databases you want to use, and specify the custom databases with the `--custom-dbs` argument.
164
+ - For example, if you want to use the NeoRdRp and RVMT databases along with your custom database, you would use the following command:
165
+
166
+ ```bash
167
+ rdrpcatch scan -i path/to/input.fasta -o path/to/output_dir -db_dir path/to/database --db_options NeoRdRp,RVMT --custom-dbs my_custom_rdrp_database
168
+ ```
169
+
170
+ - Note: By default, RdRpCATCH will search against all pre-compiled databases if no `--db_options` argument is specified. If you want to use only the custom databases, you can specify `--db_options none` to avoid searching against the pre-compiled databases.
171
+
172
+
173
+
174
+
119
175
  ## Commands
120
176
  The following two commands are available in RdRpCATCH:
121
177
  * [`rdrpcatch scan`](#rdrpcatch-scan)
122
- * [`rdrpcatch download`](#rdrpcatch-download)
178
+ * [`rdrpcatch databases`](#rdrpcatch-download)
123
179
 
124
- ### rdrpcatch download:
125
- Command to download pre-compiled databases from Zenodo. If the databases are already downloaded in the specified directory
180
+ ### rdrpcatch databases:
181
+ Command to download pre-compiled databases from Zenodo and to set up custom databases. If the databases are already downloaded in the specified directory
126
182
  , the command will check for updates and download the latest version if available.
127
183
 
128
184
  | Argument | Short Flag | Type | Description |
@@ -130,28 +186,30 @@ Command to download pre-compiled databases from Zenodo. If the databases are alr
130
186
  | `--destination_dir` | `-dest` | PATH | Path to the directory to download HMM databases. [required] |
131
187
  | `--concept-doi` | `` | TEXT | Zenodo Concept DOI for database repository |
132
188
  | `--help` | `` | | Show help message and exit |
189
+ | `--add-custom-db` | `` | PATH | Path to the directory containing custom pHMM files to add to the RdRpCATCH databases. |
190
+
133
191
  ### rdrpcatch scan:
134
192
  Search a given input using selected RdRp databases.
135
193
 
136
- | Argument | Short Flag | Type | Description |
137
- |----------|------------|------|-------------|
138
- | `--input` | `-i` | FILE | Path to the input FASTA file. [required] |
139
- | `--output` | `-o` | DIRECTORY | Path to the output directory. [required] |
140
- | `--db_dir` | `-db_dir` | PATH | Path to the directory containing RdRpCATCH databases. [required] |
141
- | `--db_options` | `-dbs` | TEXT | Comma-separated list of databases to search against. Valid options: RVMT, NeoRdRp, NeoRdRp.2.1, TSA_Olendraite_fam, TSA_Olendraite_gen, RDRP-scan, Lucaprot, all |
142
- | `--custom-dbs` | | PATH | Path to directory containing custom MSAs/pHMM files to use as additional databases |
143
- | `--seq_type` | `-seq_type` | TEXT | Type of sequence to search against: (prot,nuc) Default: unknown |
144
- | `--verbose` | `-v` | FLAG | Print verbose output. |
145
- | `--evalue` | `-e` | FLOAT | E-value threshold for HMMsearch. (default: 1e-5) |
146
- | `--incevalue` | `-incE` | FLOAT | Inclusion E-value threshold for HMMsearch. (default: 1e-5) |
147
- | `--domevalue` | `-domE` | FLOAT | Domain E-value threshold for HMMsearch. (default: 1e-5) |
148
- | `--incdomevalue` | `-incdomE` | FLOAT | Inclusion domain E-value threshold for HMMsearch. (default: 1e-5) |
149
- | `--zvalue` | `-z` | INTEGER | Number of sequences to search against. (default: 1000000) |
150
- | `--cpus` | `-cpus` | INTEGER | Number of CPUs to use for HMMsearch. (default: 1) |
151
- | `--length_thr` | `-length_thr` | INTEGER | Minimum length threshold for seqkit seq. (default: 400) |
152
- | `--gen_code` | `-gen_code` | INTEGER | Genetic code to use for translation. (default: 1) |
153
- | `--bundle` | `-bundle` | | Bundle the output files into a single archive. (default: False) |
154
- | `--keep_tmp` | `-keep_tmp` | | Keep the temporary files generated during the analysis. (default: False) |
194
+ | Argument | Short Flag | Type | Description |
195
+ |------------------|---------------|------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
196
+ | `--input` | `-i` | FILE | Path to the input FASTA file. [required] |
197
+ | `--output` | `-o` | DIRECTORY | Path to the output directory. [required] |
198
+ | `--db-dir` | `-db-dir` | PATH | Path to the directory containing RdRpCATCH databases. [required] |
199
+ | `--db-options` | `-dbs` | TEXT | Comma-separated list of pre-installed databases to search against. Valid options: RVMT, NeoRdRp, NeoRdRp.2.1, TSA_Olendraite_fam, TSA_Olendraite_gen, RDRP-scan, Lucaprot_HMM,Zayed_HMM, all |
200
+ | `--custom-dbs` | | PATH | Comma-separated list of custom databases to search against. Valid options: names of the directories that the custom databases are stored in. |
201
+ | `--seq-type` | `-seq-type` | TEXT | Type of sequence to search against: (prot,nuc) Default: unknown |
202
+ | `--verbose` | `-v` | FLAG | Print verbose output. |
203
+ | `--evalue` | `-e` | FLOAT | E-value threshold for HMMsearch. (default: 1e-5) |
204
+ | `--incevalue` | `-incE` | FLOAT | Inclusion E-value threshold for HMMsearch. (default: 1e-5) |
205
+ | `--domevalue` | `-domE` | FLOAT | Domain E-value threshold for HMMsearch. (default: 1e-5) |
206
+ | `--incdomevalue` | `-incdomE` | FLOAT | Inclusion domain E-value threshold for HMMsearch. (default: 1e-5) |
207
+ | `--zvalue` | `-z` | INTEGER | Number of sequences to search against. (default: 1000000) |
208
+ | `--cpus` | `-cpus` | INTEGER | Number of CPUs to use for HMMsearch. (default: 1) |
209
+ | `--length-thr` | `-length-thr` | INTEGER | Minimum length threshold for seqkit seq. (default: 400) |
210
+ | `--gen-code` | `-gen-code` | INTEGER | Genetic code to use for translation. (default: 1) |
211
+ | `--bundle` | `-bundle` | | Bundle the output files into a single archive. (default: False) |
212
+ | `--keep-tmp` | `-keep-tmp` | | Keep the temporary files generated during the analysis. (default: False) |
155
213
 
156
214
 
157
215
 
@@ -12,18 +12,31 @@ library to perform pHMM searches. In addition, the tool scans each sequence (aa
12
12
  In addition, RdRpCATCH provides information about the number of profiles
13
13
  that were positive for each sequence across all pHMM databases, and taxonomic information based on the MMseqs2 easy-taxonomy and search modules against a custom RefSeq Riboviria database.
14
14
 
15
+ ### Version 0.0.7 -> 0.0.8 Changelog
16
+ - Added support for custom pHMM databases. See the [Setting up custom pHMM databases](#setting-up-custom-phmm-databases) section for more information.
17
+ - All specified flags use '-' instead of '_' (e.g. `--db-dir` instead of `--db_dir`).
18
+ - Fixed issue with specifying the Lucaprot_HMM and Zayed_HMM databases in the `--db-options` argument.
19
+ - Command `rdrpcatch download` renamed as `rdrpcatch databases` for clarity, as it now supports adding custom pHMM
20
+ databases to the RdRpCATCH databases. This is facilitated by the `--add-custom-db` argument.
21
+ - Added none option to the `--db-options` argument to search only against custom databases.
22
+
23
+
24
+
15
25
  ** The tool has been modified to use [rolypoly](https://code.jgi.doe.gov/UNeri/rolypoly) code/approaches **
16
26
 
17
- ![rdrpcatch_flowchart_v0.png](images%2Frdrpcatch_flowchart_v0.png)
27
+
28
+
29
+ ![rdrpcatch_flowchart_v0.png](images%2Frdrpcatch_illustration.png)
18
30
 
19
31
  ### Supported databases
20
32
  - NeoRdRp <sup>1</sup> : 1182 pHMMs
21
33
  - NeoRdRp2 <sup>2</sup>: 19394 pHMMs
22
34
  - RVMT <sup>3</sup>: 710 pHMMs
23
35
  - RdRp-Scan <sup>4</sup> : 68 pHMMs
24
- - TSA_Oleandrite_fam <sup>5</sup>: 77 pHMMs
25
- - TSA_Oleandrite_gen <sup>6</sup> : 341 pHMMs
26
- - LucaProt_pHMM<sup>7 </sup> : 754 pHMMs
36
+ - TSA_Olendraite_fam <sup>5</sup>: 77 pHMMs
37
+ - TSA_Olendraite_gen <sup>6</sup> : 341 pHMMs
38
+ - LucaProt_HMM<sup>7 </sup> : 754 pHMMs
39
+ - Zayed_HMM<sup>8 </sup> : 2489 pHMMs
27
40
 
28
41
  1. Sakaguchi, S. et al. (2022) 'NeoRdRp: A comprehensive dataset for identifying RNA-dependent RNA polymerases of various RNA viruses from metatranscriptomic data', *Microbes and Environments*, 37(3). [doi:10.1264/jsme2.me22001](https://doi.org/10.1264/jsme2.me22001)
29
42
  2. Sakaguchi, S., Nakano, T. and Nakagawa, S. (2024) 'Neordrp2 with improved seed data, annotations, and scoring', *Frontiers in Virology*, 4. [doi:10.3389/fviro.2024.1378695](https://doi.org/10.3389/fviro.2024.1378695)
@@ -31,7 +44,9 @@ that were positive for each sequence across all pHMM databases, and taxonomic in
31
44
  4. Charon, J. et al. (2022) 'RDRP-Scan: A bioinformatic resource to identify and annotate divergent RNA viruses in metagenomic sequence data', *Virus Evolution*, 8(2). [doi:10.1093/ve/veac082](https://doi.org/10.1093/ve/veac082)
32
45
  5. Olendraite, I., Brown, K. and Firth, A.E. (2023) 'Identification of RNA virus–derived rdrp sequences in publicly available transcriptomic data sets', *Molecular Biology and Evolution*, 40(4). [doi:10.1093/molbev/msad060](https://doi.org/10.1093/molbev/msad060)
33
46
  6. Olendraite, I. (2021) 'Mining diverse and novel RNA viruses in transcriptomic datasets', Apollo. Available at: [https://www.repository.cam.ac.uk/items/1fabebd2-429b-45c9-b6eb-41d27d0a90c2](https://www.repository.cam.ac.uk/items/1fabebd2-429b-45c9-b6eb-41d27d0a90c2)
34
- 7. Hou, X. et al. (2024) 'Using artificial intelligence to document the hidden RNA virosphere', *Cell*, 187(24). [doi:10.1016/j.cell.2024.09.027](https://doi.org/10.1016/j.cell.2024.09.027)
47
+ 7. Hou, X. and He, Y. et al. (2024) 'Using artificial intelligence to document the hidden RNA virosphere', *Cell*, 187(24). [doi:10.1016/j.cell.2024.09.027](https://doi.org/10.1016/j.cell.2024.09.027)
48
+ 8. Zayed, A. A., et al. (2022) 'Cryptic and abundant marine viruses at the evolutionary origins of Earth’s RNA virome.' *Science*, 376(6589), 156–162. [doi:10.1126/science.abm5847](https://doi.org/10.1126/science.abm5847)
49
+
35
50
 
36
51
 
37
52
  ## Installation
@@ -72,13 +87,16 @@ Activate the environment and download the RdRpCATCH databases:
72
87
 
73
88
  ```bash
74
89
  conda activate rdrpcatch
75
- rdrpcatch download --destination_dir path/to/store/databases
90
+ rdrpcatch databases --destination_dir path/to/store/databases
76
91
  ```
77
92
 
78
93
  * Note 1: The databases are large files and may take some time to download (~ 3 GB).
79
94
  * Note 2: The databases are stored in the specified directory, and the path is required to run RdRpCATCH.
80
95
  * Note 3: If you encounter an SSL error while downloading, please try again. The error seems to appear sporadically during testing, and a simple re-initiation of the downloading process seems to fix it.
81
96
 
97
+
98
+
99
+
82
100
  ## Usage
83
101
  RdRpCATCH can be used as a CLI tool as follows:
84
102
 
@@ -87,20 +105,58 @@ RdRpCATCH can be used as a CLI tool as follows:
87
105
  # conda activate rdrpcatch
88
106
 
89
107
  # scan the input fasta file with the selected databases
90
- rdrpcatch scan -i path/to/input.fasta -o path/to/output_dir -db_dir path/to/database
108
+ rdrpcatch scan -i path/to/input.fasta -o path/to/output_dir -db-dir path/to/database
91
109
  ```
92
- ### input:
110
+
111
+ ## Input description
93
112
  The input file can be one or more nucleotide or protein sequences in multi-fasta format.
94
113
  The output directory is where the results will be stored. We recommend specifying the type of the sequence in the command line,
95
114
  An optional argument `--seq_type` (nuc or prot) can be used to specify if the input fasta file sequences are nucleotide or amino acid.
96
115
 
116
+
117
+ ## Setting up custom pHMM databases
118
+ It is possible to use custom pHMM databases with RdRpCATCH. As a prerequisite, you need to install the RdRpCATCH
119
+ databases using the `rdrpcatch databases` command as described above, to a directory of your choice.
120
+
121
+ The custom databases should be formatted as follows:
122
+
123
+ - First create a directory and give it a descriptive name, e.g. `my_custom_rdrp_database`. Important: The name should not contain comma `,` characters.
124
+ - Inside the directory put your custom pHMM HMMER pressed database. You can use the `hmmpress` command of HMMER to create the pressed database from your custom HMM file. This creates a set of files with the same name as the original HMM file, but with different extensions (e.g. `.h3f`, `.h3i`, `.h3m`, `.h3p`). The directory should contain all these files. Please refer to the HMMER manual for more information on how to create a pressed database from an HMM file. (http://eddylab.org/software/hmmer/Userguide.pdf)
125
+ - Next you can add the directory to the custom databases that are readable by RdRpCATCH. This can be done by using the rdrpcatch databases command as follows:
126
+
127
+ ```bash
128
+ rdrpcatch databases --add-custom-db path/to/my_custom_rdrp_database --destination-dir path/that/contains/rdrpcatch/databases
129
+ ```
130
+
131
+ - This will add the custom database to the list of databases that can be used with RdRpCATCH.
132
+ - The custom database can then be used with the `rdrpcatch scan` command by specifying the `--custom-dbs` argument as follows:
133
+ -
134
+ ```bash
135
+ rdrpcatch scan -i path/to/input.fasta -o path/to/output_dir -db_dir path/to/database --custom-dbs custom_database_name
136
+ ```
137
+
138
+ - The `custom_database_name` should be the name of the directory that contains the custom pHMM files, without the path.
139
+ - For example, if the custom database is stored in `path/to/my_custom_rdrp_database`, you would use `--custom-dbs my_custom_rdrp_database` in the command line.
140
+ - You can add multiple custom databases by installing them in the same way and specifying them by separating them with commas, e.g. `--custom-dbs my_custom_rdrp_database,another_custom_database`.
141
+ - The custom databases can be used in combination with the pre-compiled databases provided by RdRpCATCH. To do this, you can specify the `--db_options` argument with the names of the pre-compiled databases you want to use, and specify the custom databases with the `--custom-dbs` argument.
142
+ - For example, if you want to use the NeoRdRp and RVMT databases along with your custom database, you would use the following command:
143
+
144
+ ```bash
145
+ rdrpcatch scan -i path/to/input.fasta -o path/to/output_dir -db_dir path/to/database --db_options NeoRdRp,RVMT --custom-dbs my_custom_rdrp_database
146
+ ```
147
+
148
+ - Note: By default, RdRpCATCH will search against all pre-compiled databases if no `--db_options` argument is specified. If you want to use only the custom databases, you can specify `--db_options none` to avoid searching against the pre-compiled databases.
149
+
150
+
151
+
152
+
97
153
  ## Commands
98
154
  The following two commands are available in RdRpCATCH:
99
155
  * [`rdrpcatch scan`](#rdrpcatch-scan)
100
- * [`rdrpcatch download`](#rdrpcatch-download)
156
+ * [`rdrpcatch databases`](#rdrpcatch-download)
101
157
 
102
- ### rdrpcatch download:
103
- Command to download pre-compiled databases from Zenodo. If the databases are already downloaded in the specified directory
158
+ ### rdrpcatch databases:
159
+ Command to download pre-compiled databases from Zenodo and to set up custom databases. If the databases are already downloaded in the specified directory
104
160
  , the command will check for updates and download the latest version if available.
105
161
 
106
162
  | Argument | Short Flag | Type | Description |
@@ -108,28 +164,30 @@ Command to download pre-compiled databases from Zenodo. If the databases are alr
108
164
  | `--destination_dir` | `-dest` | PATH | Path to the directory to download HMM databases. [required] |
109
165
  | `--concept-doi` | `` | TEXT | Zenodo Concept DOI for database repository |
110
166
  | `--help` | `` | | Show help message and exit |
167
+ | `--add-custom-db` | `` | PATH | Path to the directory containing custom pHMM files to add to the RdRpCATCH databases. |
168
+
111
169
  ### rdrpcatch scan:
112
170
  Search a given input using selected RdRp databases.
113
171
 
114
- | Argument | Short Flag | Type | Description |
115
- |----------|------------|------|-------------|
116
- | `--input` | `-i` | FILE | Path to the input FASTA file. [required] |
117
- | `--output` | `-o` | DIRECTORY | Path to the output directory. [required] |
118
- | `--db_dir` | `-db_dir` | PATH | Path to the directory containing RdRpCATCH databases. [required] |
119
- | `--db_options` | `-dbs` | TEXT | Comma-separated list of databases to search against. Valid options: RVMT, NeoRdRp, NeoRdRp.2.1, TSA_Olendraite_fam, TSA_Olendraite_gen, RDRP-scan, Lucaprot, all |
120
- | `--custom-dbs` | | PATH | Path to directory containing custom MSAs/pHMM files to use as additional databases |
121
- | `--seq_type` | `-seq_type` | TEXT | Type of sequence to search against: (prot,nuc) Default: unknown |
122
- | `--verbose` | `-v` | FLAG | Print verbose output. |
123
- | `--evalue` | `-e` | FLOAT | E-value threshold for HMMsearch. (default: 1e-5) |
124
- | `--incevalue` | `-incE` | FLOAT | Inclusion E-value threshold for HMMsearch. (default: 1e-5) |
125
- | `--domevalue` | `-domE` | FLOAT | Domain E-value threshold for HMMsearch. (default: 1e-5) |
126
- | `--incdomevalue` | `-incdomE` | FLOAT | Inclusion domain E-value threshold for HMMsearch. (default: 1e-5) |
127
- | `--zvalue` | `-z` | INTEGER | Number of sequences to search against. (default: 1000000) |
128
- | `--cpus` | `-cpus` | INTEGER | Number of CPUs to use for HMMsearch. (default: 1) |
129
- | `--length_thr` | `-length_thr` | INTEGER | Minimum length threshold for seqkit seq. (default: 400) |
130
- | `--gen_code` | `-gen_code` | INTEGER | Genetic code to use for translation. (default: 1) |
131
- | `--bundle` | `-bundle` | | Bundle the output files into a single archive. (default: False) |
132
- | `--keep_tmp` | `-keep_tmp` | | Keep the temporary files generated during the analysis. (default: False) |
172
+ | Argument | Short Flag | Type | Description |
173
+ |------------------|---------------|------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
174
+ | `--input` | `-i` | FILE | Path to the input FASTA file. [required] |
175
+ | `--output` | `-o` | DIRECTORY | Path to the output directory. [required] |
176
+ | `--db-dir` | `-db-dir` | PATH | Path to the directory containing RdRpCATCH databases. [required] |
177
+ | `--db-options` | `-dbs` | TEXT | Comma-separated list of pre-installed databases to search against. Valid options: RVMT, NeoRdRp, NeoRdRp.2.1, TSA_Olendraite_fam, TSA_Olendraite_gen, RDRP-scan, Lucaprot_HMM,Zayed_HMM, all |
178
+ | `--custom-dbs` | | PATH | Comma-separated list of custom databases to search against. Valid options: names of the directories that the custom databases are stored in. |
179
+ | `--seq-type` | `-seq-type` | TEXT | Type of sequence to search against: (prot,nuc) Default: unknown |
180
+ | `--verbose` | `-v` | FLAG | Print verbose output. |
181
+ | `--evalue` | `-e` | FLOAT | E-value threshold for HMMsearch. (default: 1e-5) |
182
+ | `--incevalue` | `-incE` | FLOAT | Inclusion E-value threshold for HMMsearch. (default: 1e-5) |
183
+ | `--domevalue` | `-domE` | FLOAT | Domain E-value threshold for HMMsearch. (default: 1e-5) |
184
+ | `--incdomevalue` | `-incdomE` | FLOAT | Inclusion domain E-value threshold for HMMsearch. (default: 1e-5) |
185
+ | `--zvalue` | `-z` | INTEGER | Number of sequences to search against. (default: 1000000) |
186
+ | `--cpus` | `-cpus` | INTEGER | Number of CPUs to use for HMMsearch. (default: 1) |
187
+ | `--length-thr` | `-length-thr` | INTEGER | Minimum length threshold for seqkit seq. (default: 400) |
188
+ | `--gen-code` | `-gen-code` | INTEGER | Genetic code to use for translation. (default: 1) |
189
+ | `--bundle` | `-bundle` | | Bundle the output files into a single archive. (default: False) |
190
+ | `--keep-tmp` | `-keep-tmp` | | Keep the temporary files generated during the analysis. (default: False) |
133
191
 
134
192
 
135
193
 
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "rdrpcatch"
7
- version = "0.0.6"
7
+ version = "0.0.8"
8
8
  authors = [
9
9
  {name = "Dimitris Karapliafis", email = "dimitris.karapliafis@wur.nl"},
10
10
  {name = "Uri Neri", email = "uneri@lbl.gov"},
@@ -26,7 +26,7 @@ def parse_comma_separated_options(ctx, param, value):
26
26
  return ['all']
27
27
 
28
28
  allowed_choices = ['RVMT', 'NeoRdRp', 'NeoRdRp.2.1', 'TSA_Olendraite_fam', 'TSA_Olendraite_gen', 'RDRP-scan',
29
- 'Lucaprot', 'all']
29
+ 'Lucaprot_HMM', 'Zayed_HMM', 'all', 'none']
30
30
  lower_choices = [choice.lower() for choice in allowed_choices]
31
31
  options = value.split(',')
32
32
  lower_options = [option.lower() for option in options]
@@ -66,17 +66,16 @@ def cli():
66
66
  @click.option("-o", "--output",
67
67
  help="Path to the output directory.",
68
68
  type=click.Path(exists=False, file_okay=False, writable=True, path_type=Path), required=True)
69
- @click.option("-db_dir", "--db_dir",
69
+ @click.option("-db-dir", "--db-dir",
70
70
  help="Path to the directory containing RdRpCATCH databases.",
71
71
  type=click.Path(exists=True, dir_okay=True, readable=True, path_type=Path),required=True)
72
- @click.option("-dbs", "--db_options",
72
+ @click.option("-dbs", "--db-options",
73
73
  callback=parse_comma_separated_options,
74
74
  default="all",
75
75
  help="Comma-separated list of databases to search against. Valid options: RVMT, NeoRdRp, NeoRdRp.2.1,"
76
- " TSA_Olendraite_fam, TSA_Olendraite_gen, RDRP-scan,Lucaprot, all")
76
+ " TSA_Olendraite_fam, TSA_Olendraite_gen, RDRP-scan,Lucaprot_HMM, Zayed_HMM, all, none. ")
77
77
  @click.option("--custom-dbs",
78
- help="Path to directory containing custom MSAs/pHMM files to use as additional databases",
79
- type=click.Path(exists=True, path_type=Path))
78
+ help="Path to directory containing custom MSAs/pHMM files to use as additional databases")
80
79
  @click.option("-seq_type", "--seq_type",
81
80
  type=click.STRING,
82
81
  default=None,
@@ -112,7 +111,7 @@ def cli():
112
111
  type=click.INT,
113
112
  default=400,
114
113
  help="Minimum length threshold for seqkit seq. (default: 400)")
115
- @click.option('-gen_code', '--gen_code',
114
+ @click.option('-gen-code', '--gen_code',
116
115
  type=click.INT,
117
116
  default=1,
118
117
  help='Genetic code to use for translation. (default: 1) Possible genetic codes (supported by seqkit translate) : 1: The Standard Code, '
@@ -143,7 +142,7 @@ def cli():
143
142
  is_flag=True,
144
143
  default=False,
145
144
  help="Bundle the output files into a single archive. (default: False)")
146
- @click.option('-keep_tmp', '--keep_tmp',
145
+ @click.option('-keep-tmp', '--keep_tmp',
147
146
  is_flag=True,
148
147
  default=False,
149
148
  help="Keep temporary files (Expert users) (default: False)")
@@ -164,7 +163,7 @@ def scan(ctx, input, output, db_options, db_dir, custom_dbs, seq_type, verbose,
164
163
 
165
164
  table.add_row("Input File", str(input))
166
165
  table.add_row("Output Directory", str(output))
167
- table.add_row("Databases", ", ".join(db_options))
166
+ table.add_row("Supported databases", ", ".join(db_options))
168
167
  table.add_row("Database Directory", str(db_dir))
169
168
  if custom_dbs:
170
169
  table.add_row("Custom Databases", str(custom_dbs))
@@ -184,24 +183,13 @@ def scan(ctx, input, output, db_options, db_dir, custom_dbs, seq_type, verbose,
184
183
 
185
184
  console.print(Panel(table, title="Scan Configuration"))
186
185
 
187
- # Add custom databases if provided
188
- if custom_dbs:
189
- db = db_fetcher(db_dir)
190
- if os.path.isfile(custom_dbs):
191
- db.add_custom_db(custom_dbs)
192
- else:
193
- for item in os.listdir(custom_dbs):
194
- item_path = os.path.join(custom_dbs, item)
195
- if os.path.isfile(item_path) and item_path.endswith(('.hmm', '.h3m', '.msa', '.sto', '.fasta', '.fa')):
196
- db.add_custom_db(item_path)
197
- elif os.path.isdir(item_path):
198
- db.add_custom_db(item_path, item)
199
186
 
200
187
  run_scan(
201
188
  input_file=input,
202
189
  output_dir=output,
203
190
  db_options=db_options,
204
191
  db_dir=db_dir,
192
+ custom_dbs=custom_dbs,
205
193
  seq_type=seq_type,
206
194
  verbose=verbose,
207
195
  e=evalue,
@@ -217,29 +205,6 @@ def scan(ctx, input, output, db_options, db_dir, custom_dbs, seq_type, verbose,
217
205
  overwrite=overwrite
218
206
  )
219
207
 
220
- # @cli.command("download", help="Download RdRpCATCH databases.")
221
- # @click.option("--destination_dir", "-dest",
222
- # help="Path to the directory to download HMM databases.",
223
- # type=click.Path(exists=False, file_okay=False, writable=True, path_type=Path), required=True)
224
- # @click.option("--check-updates", "-u",
225
- # is_flag=True,
226
- # help="Check for database updates")
227
- # @click.pass_context
228
- # def download(ctx, destination_dir, check_updates):
229
- # """Download RdRpCATCH databases."""
230
- #
231
- # # if check_updates:
232
- # # db = db_fetcher(destination_dir)
233
- # # version_info = db.check_db_updates()
234
- # # if version_info:
235
- # # console.print("Current database versions:")
236
- # # for db_name, info in version_info.items():
237
- # # console.print(f"- {db_name}: {info}")
238
- # # else:
239
- # # console.print("No version information available")
240
- # # return
241
- #
242
- # run_download(destination_dir)
243
208
  #
244
209
  # # @cli.command("gui", help="Launch the GUI.")
245
210
  # # @click.pass_context
@@ -251,17 +216,41 @@ def scan(ctx, input, output, db_options, db_dir, custom_dbs, seq_type, verbose,
251
216
 
252
217
 
253
218
 
254
- @cli.command("download", help="Download & update RdRpCATCH databases. If databases are already installed in the "
219
+ @cli.command("databases", help="Download & update RdRpCATCH databases. If databases are already installed in the "
255
220
  "specified directory,"
256
221
  " it will check for updates and download the latest version if available.")
257
- @click.option("--destination_dir", "-dest",
222
+ @click.option("--destination-dir", "-dest",
258
223
  help="Path to directory to download databases",
259
224
  type=click.Path(path_type=Path, file_okay=False, writable=True),
260
225
  required=True)
261
226
  @click.option("--concept-doi", default="10.5281/zenodo.14358348",
262
227
  help="Zenodo Concept DOI for database repository")
263
- def download(destination_dir: Path, concept_doi: str):
228
+ @click.option("--add-custom-db", "-cdb",
229
+ help="Path to a custom, pressed pHMM database directory. This only works"
230
+ "if the supported databases have already been downloaded."
231
+ " Please point to the directory the databases are stored ('rdrpcatch_dbs') using"
232
+ "the '--destination-dir' flag."
233
+
234
+ , type = click.Path(exists=True, dir_okay=True, readable=True, path_type=Path))
235
+
236
+ def databases(destination_dir: Path, concept_doi: str, add_custom_db: Path | None = None):
264
237
  """Handle database download/update workflow"""
238
+
239
+ if add_custom_db:
240
+ if not destination_dir.exists():
241
+ console.print("[red]× Destination directory does not exist![/red]")
242
+ raise click.Abort()
243
+ if not destination_dir.is_dir():
244
+ console.print("[red]× Destination path is not a directory![/red]")
245
+ raise click.Abort()
246
+
247
+ db = db_fetcher(destination_dir)
248
+ db.add_custom_db(add_custom_db)
249
+
250
+ console.print(f"[green]✓ Custom database added successfully to {destination_dir}[/green]")
251
+
252
+
253
+
265
254
  downloader = ZenodoDownloader(concept_doi, destination_dir)
266
255
 
267
256
  try:
@@ -49,20 +49,23 @@ class db_fetcher:
49
49
 
50
50
  # Copy the database file
51
51
  if os.path.isfile(db_path):
52
- shutil.copy2(db_path, target_path)
52
+ raise ValueError("Custom database must be a directory, not a file. Name the directory as the database name,"
53
+ "and include a pressed HMMER HMM database inside. For directions, see the README.md file.")
54
+
53
55
  elif os.path.isdir(db_path):
54
56
  if os.path.exists(target_path):
55
57
  shutil.rmtree(target_path)
56
58
  shutil.copytree(db_path, target_path)
57
59
 
58
- # Update version info
59
- version_info = self._get_db_version()
60
- version_info.setdefault('custom_dbs', {})
61
- version_info['custom_dbs'][db_name] = {
62
- 'added': datetime.datetime.now().isoformat(),
63
- 'path': target_path
64
- }
65
- self._save_db_version(version_info)
60
+ # For now, we stop saving the version info, cause it messes up with the download module.
61
+ # # Update version info
62
+ # version_info = self._get_db_version()
63
+ # version_info.setdefault('custom_dbs', {})
64
+ # version_info['custom_dbs'][db_name] = {
65
+ # 'added': datetime.datetime.now().isoformat(),
66
+ # 'path': target_path
67
+ # }
68
+ # self._save_db_version(version_info)
66
69
 
67
70
  def _resolve_rdrpcatch_path(self):
68
71
  """Automatically detect correct database path structure"""
@@ -93,6 +96,7 @@ class db_fetcher:
93
96
 
94
97
  # First check custom databases
95
98
  if os.path.exists(self.custom_db_dir):
99
+
96
100
  custom_path = os.path.join(self.custom_db_dir, db_name)
97
101
  if os.path.exists(custom_path):
98
102
  if os.path.isfile(custom_path) and custom_path.endswith(('.h3m', '.hmm')):
@@ -53,7 +53,7 @@ def bundle_results(output_dir, prefix):
53
53
 
54
54
  return archive_path
55
55
 
56
- def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,incdomE,domE,incE,z, cpus, length_thr, gen_code, bundle, keep_tmp, overwrite):
56
+ def run_scan(input_file, output_dir, db_options, db_dir, custom_dbs, seq_type, verbose, e,incdomE,domE,incE,z, cpus, length_thr, gen_code, bundle, keep_tmp, overwrite):
57
57
  """
58
58
  Run RdRpCATCH scan.
59
59
 
@@ -127,7 +127,8 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
127
127
 
128
128
  logger.silent_log(f"Input File: {input_file}")
129
129
  logger.silent_log(f"Output Directory: {output_dir}")
130
- logger.silent_log(f"Databases: {db_options}")
130
+ logger.silent_log(f"Supported Databases: {db_options}")
131
+ logger.silent_log(f"Custom Databases: {custom_dbs}")
131
132
  logger.silent_log(f"Database Directory: {db_dir}")
132
133
  logger.silent_log(f"Sequence Type: {seq_type}")
133
134
  logger.silent_log(f"Verbose Mode: {'ON' if verbose else 'OFF'}")
@@ -170,7 +171,7 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
170
171
 
171
172
  logger.loud_log("Fetching HMM databases...")
172
173
 
173
- ## Fetch HMM databases- RVMT, NeoRdRp, NeoRdRp.2.1, TSA_Olendraite, RDRP-scan, Lucaprot
174
+ ## Fetch HMM databases- RVMT, NeoRdRp, NeoRdRp.2.1, TSA_Olendraite, RDRP-scan, Lucaprot_HMM,Zayed_HMM
174
175
  rvmt_hmm_db = fetch_dbs.db_fetcher(db_dir).fetch_hmm_db_path("RVMT")
175
176
  if verbose:
176
177
  logger.loud_log(f"RVMT HMM database fetched from: {rvmt_hmm_db}")
@@ -202,20 +203,32 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
202
203
  logger.loud_log(f"RDRP-scan HMM database fetched from: {rdrpscan_hmm_db}")
203
204
  else:
204
205
  logger.silent_log(f"RDRP-scan HMM database fetched from: {rdrpscan_hmm_db}")
205
- lucaprot_hmm_db = fetch_dbs.db_fetcher(db_dir).fetch_hmm_db_path("Lucaprot")
206
+ lucaprot_hmm_db = fetch_dbs.db_fetcher(db_dir).fetch_hmm_db_path("Lucaprot_HMM")
206
207
  if verbose:
207
208
  logger.loud_log(f"Lucaprot HMM database fetched from: {lucaprot_hmm_db}")
208
209
  else:
209
210
  logger.silent_log(f"Lucaprot HMM database fetched from: {lucaprot_hmm_db}")
211
+ zayed_hmm_db = fetch_dbs.db_fetcher(db_dir).fetch_hmm_db_path("Zayed_HMM")
212
+ if verbose:
213
+ logger.loud_log(f"Zayed HMM database fetched from: {zayed_hmm_db}")
214
+ else:
215
+ logger.silent_log(f"Zayed HMM database fetched from: {zayed_hmm_db}")
210
216
 
211
217
  db_name_list = []
212
218
  db_path_list = []
213
219
 
214
220
  ## Set up HMM databases
215
221
  if db_options == ['all']:
216
- db_name_list = ["RVMT", "NeoRdRp", "NeoRdRp.2.1", "TSA_Olendraite_fam","TSA_Olendraite_gen", "RDRP-scan", "Lucaprot"]
217
- db_path_list = [rvmt_hmm_db, neordrp_hmm_db, neordrp_2_hmm_db, tsa_olen_fam_hmm_db,tsa_olen_gen_hmm_db, rdrpscan_hmm_db, lucaprot_hmm_db]
218
-
222
+ db_name_list = ["RVMT", "NeoRdRp", "NeoRdRp.2.1", "TSA_Olendraite_fam","TSA_Olendraite_gen", "RDRP-scan", "Lucaprot_HMM", "Zayed_HMM"]
223
+ db_path_list = [rvmt_hmm_db, neordrp_hmm_db, neordrp_2_hmm_db, tsa_olen_fam_hmm_db,tsa_olen_gen_hmm_db, rdrpscan_hmm_db, lucaprot_hmm_db, zayed_hmm_db]
224
+ elif db_options == ['none'] and not custom_dbs:
225
+ raise Exception("No databases selected. Please select at least one database or provide custom databases.")
226
+ elif db_options == ['none'] and custom_dbs:
227
+ logger.loud_log("No supported databases selected, but custom databases provided. Using only custom databases.")
228
+ if not os.path.exists(os.path.join(db_dir, "custom_dbs")):
229
+ raise Exception(f"Custom databases directory not found: {os.path.join(db_dir, 'custom_dbs')}. Please"
230
+ f" use rdrpcatch databases to create a valid custom database as described in the "
231
+ f"documentation.")
219
232
  else:
220
233
  for db in db_options:
221
234
  if db == "RVMT".lower():
@@ -236,15 +249,42 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
236
249
  elif db == "RDRP-scan".lower():
237
250
  db_name_list.append("RDRP-scan")
238
251
  db_path_list.append(rdrpscan_hmm_db)
239
- elif db == "Lucaprot".lower():
240
- db_name_list.append("Lucaprot")
252
+ elif db == "Lucaprot_HMM".lower():
253
+ db_name_list.append("Lucaprot_HMM")
241
254
  db_path_list.append(lucaprot_hmm_db)
255
+ elif db == "Zayed_HMM".lower():
256
+ db_name_list.append("Zayed_HMM")
257
+ db_path_list.append(zayed_hmm_db)
242
258
  else:
243
259
  raise Exception(f"Invalid database option: {db}")
244
260
 
245
- # Fetch mmseqs database
261
+ ## Check if custom databases are provided
262
+ if custom_dbs:
246
263
 
264
+ if not os.path.exists(os.path.join(db_dir, "custom_dbs")):
265
+ raise Exception(f"Custom databases directory not found: {os.path.join(db_dir, 'custom_dbs')}. Please"
266
+ f" use rdrpcatch databases to create a valid custom database as described in the "
267
+ f"documentation.")
268
+
269
+ custom_db_names = custom_dbs.split(',')
270
+ for custom_db in custom_db_names:
271
+ if verbose:
272
+ logger.loud_log(f"Fetching custom database: {custom_db}")
273
+ else:
274
+ logger.silent_log(f"Fetching custom database: {custom_db}")
247
275
 
276
+ custom_db = custom_db.strip()
277
+ custom_db_path = fetch_dbs.db_fetcher(db_dir).fetch_hmm_db_path(custom_db)
278
+ db_name_list.append(custom_db)
279
+ db_path_list.append(custom_db_path)
280
+
281
+ if verbose:
282
+ logger.loud_log(f"Custom database {custom_db} fetched from: {custom_db_path}")
283
+ else:
284
+ logger.silent_log(f"Custom database {custom_db} fetched from: {custom_db_path}")
285
+
286
+
287
+ # Fetch mmseqs database
248
288
  logger.loud_log("Fetching Mmseqs2 databases...")
249
289
 
250
290
  mmseqs_db_path = fetch_dbs.db_fetcher(db_dir).fetch_mmseqs_db_path("mmseqs_refseq_riboviria_20250211")
@@ -382,7 +422,8 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
382
422
 
383
423
  # Check if the combined dataframe is empty
384
424
  if combined_df.is_empty():
385
- logger.loud_log("No hits found by RdRpCATCH. Exiting.")
425
+ db_name_string = ', '.join(db_name_list)
426
+ logger.loud_log(f"No hits found by RdRpCATCH for databases {db_name_string}. Exiting.")
386
427
  return None
387
428
 
388
429
  # Generate upset plot
@@ -549,7 +590,8 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
549
590
 
550
591
  # Check if the combined dataframe is empty
551
592
  if combined_df.is_empty():
552
- logger.loud_log("No hits found by RdRpCATCH. Exiting.")
593
+ db_name_string = ', '.join(db_name_list)
594
+ logger.loud_log(f"No hits found by RdRpCATCH for databases {db_name_string}. Exiting.")
553
595
  return None
554
596
 
555
597
  # Generate upset plot
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes