rdrpcatch 0.0.3__tar.gz → 0.0.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/.idea/workspace.xml +47 -21
  2. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/PKG-INFO +2 -2
  3. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/README.md +1 -1
  4. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/meta.yaml +14 -14
  5. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/pyproject.toml +1 -1
  6. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/rdrpcatch/rdrpcatch_scripts/format_pyhmmer_out.py +1 -1
  7. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/rdrpcatch/rdrpcatch_scripts/utils.py +30 -15
  8. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/rdrpcatch/rdrpcatch_wrapper.py +7 -8
  9. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/.gitignore +0 -0
  10. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/.idea/.gitignore +0 -0
  11. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/.idea/.name +0 -0
  12. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/.idea/ColaB-Scan.iml +0 -0
  13. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/.idea/inspectionProfiles/profiles_settings.xml +0 -0
  14. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/.idea/misc.xml +0 -0
  15. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/.idea/modules.xml +0 -0
  16. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/.idea/vcs.xml +0 -0
  17. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/LICENSE +0 -0
  18. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/dependencies/rdrpcatch_test_env.yaml +0 -0
  19. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/dependencies/requirements.txt +0 -0
  20. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/images/rdrpcatch_flowchart_v0.png +0 -0
  21. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/rdrpcatch/__init__.py +0 -0
  22. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/rdrpcatch/cli/__init__.py +0 -0
  23. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/rdrpcatch/cli/args.py +0 -0
  24. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/rdrpcatch/rdrpcatch_scripts/__init__.py +0 -0
  25. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/rdrpcatch/rdrpcatch_scripts/fetch_dbs.py +0 -0
  26. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/rdrpcatch/rdrpcatch_scripts/gui.py +0 -0
  27. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/rdrpcatch/rdrpcatch_scripts/mmseqs_tax.py +0 -0
  28. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/rdrpcatch/rdrpcatch_scripts/paths.py +0 -0
  29. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/rdrpcatch/rdrpcatch_scripts/plot.py +0 -0
  30. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/rdrpcatch/rdrpcatch_scripts/run_pyhmmer.py +0 -0
  31. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/rdrpcatch/rdrpcatch_scripts/run_seqkit.py +0 -0
  32. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/meta_4test.yaml +0 -0
  33. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch-1.0.0-py312_2.tar.bz2 +0 -0
  34. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_env.yaml +0 -0
  35. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/test_translate_gff_files/test_translate_full_aminoacid_rdrpcatch.gff3 +0 -0
  36. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_fasta/test_translate_full_aminoacid_contigs.fasta +0 -0
  37. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_fasta/test_translate_trimmed_aminoacid_contigs.fasta +0 -0
  38. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_output_annotated.tsv +0 -0
  39. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_ID_score_plot.html +0 -0
  40. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_contig_coverage_plot.html +0 -0
  41. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_evalue_plot.html +0 -0
  42. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_norm_bitscore_contig_plot.html +0 -0
  43. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_norm_bitscore_plot_profile.html +0 -0
  44. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_profile_coverage_plot.html +0 -0
  45. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_score_plot.html +0 -0
  46. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_upset_plot.png +0 -0
  47. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/best_hit_hmm_output/test_translate_Lucaprot_hmm_output_best_hit.txt +0 -0
  48. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/best_hit_hmm_output/test_translate_NeoRdRp.2.1_hmm_output_best_hit.txt +0 -0
  49. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/best_hit_hmm_output/test_translate_NeoRdRp_hmm_output_best_hit.txt +0 -0
  50. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/best_hit_hmm_output/test_translate_RDRP-scan_hmm_output_best_hit.txt +0 -0
  51. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/best_hit_hmm_output/test_translate_RVMT_hmm_output_best_hit.txt +0 -0
  52. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/best_hit_hmm_output/test_translate_TSA_Olendraite_fam_hmm_output_best_hit.txt +0 -0
  53. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/best_hit_hmm_output/test_translate_TSA_Olendraite_gen_hmm_output_best_hit.txt +0 -0
  54. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/formatted_hmm_output/test_translate_Lucaprot_hmm_output_formatted.txt +0 -0
  55. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/formatted_hmm_output/test_translate_NeoRdRp.2.1_hmm_output_formatted.txt +0 -0
  56. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/formatted_hmm_output/test_translate_NeoRdRp_hmm_output_formatted.txt +0 -0
  57. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/formatted_hmm_output/test_translate_RDRP-scan_hmm_output_formatted.txt +0 -0
  58. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/formatted_hmm_output/test_translate_RVMT_hmm_output_formatted.txt +0 -0
  59. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/formatted_hmm_output/test_translate_TSA_Olendraite_fam_hmm_output_formatted.txt +0 -0
  60. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/formatted_hmm_output/test_translate_TSA_Olendraite_gen_hmm_output_formatted.txt +0 -0
  61. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_Lucaprot_hmmsearch_output.custom.tsv +0 -0
  62. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_Lucaprot_hmmsearch_output.txt +0 -0
  63. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_NeoRdRp.2.1_hmmsearch_output.custom.tsv +0 -0
  64. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_NeoRdRp.2.1_hmmsearch_output.txt +0 -0
  65. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_NeoRdRp_hmmsearch_output.custom.tsv +0 -0
  66. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_NeoRdRp_hmmsearch_output.txt +0 -0
  67. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_RDRP-scan_hmmsearch_output.custom.tsv +0 -0
  68. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_RDRP-scan_hmmsearch_output.txt +0 -0
  69. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_RVMT_hmmsearch_output.custom.tsv +0 -0
  70. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_RVMT_hmmsearch_output.txt +0 -0
  71. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_TSA_Olendraite_fam_hmmsearch_output.custom.tsv +0 -0
  72. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_TSA_Olendraite_fam_hmmsearch_output.txt +0 -0
  73. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_TSA_Olendraite_gen_hmmsearch_output.custom.tsv +0 -0
  74. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_TSA_Olendraite_gen_hmmsearch_output.txt +0 -0
  75. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/mmseqs_e_search_output/test_translate_mmseqs_e_search.tsv +0 -0
  76. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/mmseqs_tax_output/test_translate_mmseqs_tax_lca.tsv +0 -0
  77. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/mmseqs_tax_output/test_translate_mmseqs_tax_report +0 -0
  78. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/mmseqs_tax_output/test_translate_mmseqs_tax_tophit_aln +0 -0
  79. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/mmseqs_tax_output/test_translate_mmseqs_tax_tophit_report +0 -0
  80. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/mmseqs_tax_output/tmp/16608414482057878997/easy-taxonomy.sh +0 -0
  81. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/test_translate_logs/test_translate_mmseqs_e_search.log +0 -0
  82. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/test_translate_logs/test_translate_mmseqs_tax.log +0 -0
  83. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/test_translate_logs/test_translate_rdrpcatch.log +0 -0
  84. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/tsv_files/test_translate_combined.tsv +0 -0
  85. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/tsv_files/test_translate_rdrpcatch_output.tsv +0 -0
  86. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/tsv_files/test_translate_upset_data.tsv +0 -0
  87. {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/test_translate.fasta +0 -0
@@ -4,9 +4,7 @@
4
4
  <option name="autoReloadType" value="SELECTIVE" />
5
5
  </component>
6
6
  <component name="ChangeListManager">
7
- <list default="true" id="d849e6fa-87f9-4e92-9c33-abef7cc975d3" name="Changes" comment="Updates:&#10;Update meta.yaml and .toml file&#10;Preparing for Pypi and Bioconda upload">
8
- <change beforePath="$PROJECT_DIR$/meta.yaml" beforeDir="false" afterPath="$PROJECT_DIR$/meta.yaml" afterDir="false" />
9
- </list>
7
+ <list default="true" id="d849e6fa-87f9-4e92-9c33-abef7cc975d3" name="Changes" comment="Updates:&#10;Fixed bug that crushed the script when at least one pHMM DB does not have a match against the sequence database also for nuc branch" />
10
8
  <option name="SHOW_DIALOG" value="false" />
11
9
  <option name="HIGHLIGHT_CONFLICTS" value="true" />
12
10
  <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
@@ -38,22 +36,22 @@
38
36
  <option name="hideEmptyMiddlePackages" value="true" />
39
37
  <option name="showLibraryContents" value="true" />
40
38
  </component>
41
- <component name="PropertiesComponent">{
42
- &quot;keyToString&quot;: {
43
- &quot;ASKED_ADD_EXTERNAL_FILES&quot;: &quot;true&quot;,
44
- &quot;RunOnceActivity.OpenProjectViewOnStart&quot;: &quot;true&quot;,
45
- &quot;RunOnceActivity.ShowReadmeOnStart&quot;: &quot;true&quot;,
46
- &quot;ignore.virus.scanning.warn.message&quot;: &quot;true&quot;,
47
- &quot;last_opened_file_path&quot;: &quot;C:/Users/karso/PycharmProjects/ColaB-Scan/testing&quot;,
48
- &quot;node.js.detected.package.eslint&quot;: &quot;true&quot;,
49
- &quot;node.js.detected.package.tslint&quot;: &quot;true&quot;,
50
- &quot;node.js.selected.package.eslint&quot;: &quot;(autodetect)&quot;,
51
- &quot;node.js.selected.package.tslint&quot;: &quot;(autodetect)&quot;,
52
- &quot;nodejs_package_manager_path&quot;: &quot;npm&quot;,
53
- &quot;settings.editor.selected.configurable&quot;: &quot;preferences.pluginManager&quot;,
54
- &quot;vue.rearranger.settings.migration&quot;: &quot;true&quot;
39
+ <component name="PropertiesComponent"><![CDATA[{
40
+ "keyToString": {
41
+ "ASKED_ADD_EXTERNAL_FILES": "true",
42
+ "RunOnceActivity.OpenProjectViewOnStart": "true",
43
+ "RunOnceActivity.ShowReadmeOnStart": "true",
44
+ "ignore.virus.scanning.warn.message": "true",
45
+ "last_opened_file_path": "C:/Users/karso/PycharmProjects/rdrpcatch_benchmarks",
46
+ "node.js.detected.package.eslint": "true",
47
+ "node.js.detected.package.tslint": "true",
48
+ "node.js.selected.package.eslint": "(autodetect)",
49
+ "node.js.selected.package.tslint": "(autodetect)",
50
+ "nodejs_package_manager_path": "npm",
51
+ "settings.editor.selected.configurable": "preferences.pluginManager",
52
+ "vue.rearranger.settings.migration": "true"
55
53
  }
56
- }</component>
54
+ }]]></component>
57
55
  <component name="RecentsManager">
58
56
  <key name="CopyFile.RECENT_KEYS">
59
57
  <recent name="C:\Users\karso\PycharmProjects\ColaB-Scan\testing" />
@@ -117,6 +115,8 @@
117
115
  <workItem from="1743013480572" duration="6846000" />
118
116
  <workItem from="1743683517488" duration="7013000" />
119
117
  <workItem from="1743714892367" duration="21775000" />
118
+ <workItem from="1744200654491" duration="635000" />
119
+ <workItem from="1744241097621" duration="28249000" />
120
120
  </task>
121
121
  <task id="LOCAL-00001" summary="First commit: Script for benchmark">
122
122
  <option name="closed" value="true" />
@@ -334,7 +334,31 @@
334
334
  <option name="project" value="LOCAL" />
335
335
  <updated>1743738696182</updated>
336
336
  </task>
337
- <option name="localTasksCounter" value="28" />
337
+ <task id="LOCAL-00028" summary="Updates:&#10;Optimize fasta writer from O(n*m) to O(n+m)">
338
+ <option name="closed" value="true" />
339
+ <created>1744246540802</created>
340
+ <option name="number" value="00028" />
341
+ <option name="presentableId" value="LOCAL-00028" />
342
+ <option name="project" value="LOCAL" />
343
+ <updated>1744246540804</updated>
344
+ </task>
345
+ <task id="LOCAL-00029" summary="Updates:&#10;Polishing ReadME&#10;Fixed bug that crushed the script when at least one pHMM DB does not have a match against the sequence database">
346
+ <option name="closed" value="true" />
347
+ <created>1744793957201</created>
348
+ <option name="number" value="00029" />
349
+ <option name="presentableId" value="LOCAL-00029" />
350
+ <option name="project" value="LOCAL" />
351
+ <updated>1744793957201</updated>
352
+ </task>
353
+ <task id="LOCAL-00030" summary="Updates:&#10;Fixed bug that crushed the script when at least one pHMM DB does not have a match against the sequence database also for nuc branch">
354
+ <option name="closed" value="true" />
355
+ <created>1744796108058</created>
356
+ <option name="number" value="00030" />
357
+ <option name="presentableId" value="LOCAL-00030" />
358
+ <option name="project" value="LOCAL" />
359
+ <updated>1744796108058</updated>
360
+ </task>
361
+ <option name="localTasksCounter" value="31" />
338
362
  <servers />
339
363
  </component>
340
364
  <component name="TypeScriptGeneratedFilesManager">
@@ -352,7 +376,6 @@
352
376
  </option>
353
377
  </component>
354
378
  <component name="VcsManagerConfiguration">
355
- <MESSAGE value="First commit: Script for benchmark" />
356
379
  <MESSAGE value="Commit: Plots and result summary" />
357
380
  <MESSAGE value="Commit: File name change" />
358
381
  <MESSAGE value="Commit: Upload script and results" />
@@ -375,6 +398,9 @@
375
398
  <MESSAGE value="Updates:&#10;Refactor Database installation to support download and automatic update&#10;Handle empty files &#10;General bug fixes" />
376
399
  <MESSAGE value="Updates:&#10;Change test case" />
377
400
  <MESSAGE value="Updates:&#10;Update meta.yaml and .toml file&#10;Preparing for Pypi and Bioconda upload" />
378
- <option name="LAST_COMMIT_MESSAGE" value="Updates:&#10;Update meta.yaml and .toml file&#10;Preparing for Pypi and Bioconda upload" />
401
+ <MESSAGE value="Updates:&#10;Optimize fasta writer from O(n*m) to O(n+m)" />
402
+ <MESSAGE value="Updates:&#10;Polishing ReadME&#10;Fixed bug that crushed the script when at least one pHMM DB does not have a match against the sequence database" />
403
+ <MESSAGE value="Updates:&#10;Fixed bug that crushed the script when at least one pHMM DB does not have a match against the sequence database also for nuc branch" />
404
+ <option name="LAST_COMMIT_MESSAGE" value="Updates:&#10;Fixed bug that crushed the script when at least one pHMM DB does not have a match against the sequence database also for nuc branch" />
379
405
  </component>
380
406
  </project>
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rdrpcatch
3
- Version: 0.0.3
3
+ Version: 0.0.5
4
4
  Dynamic: Summary
5
5
  Project-URL: Home, https://github.com/dimitris-karapliafis/RdRpCATCH
6
6
  Project-URL: Source, https://github.com/dimitris-karapliafis/RdRpCATCH
@@ -81,7 +81,7 @@ The dependencies can be installed using conda or mamba. Follow these steps:
81
81
 
82
82
  Create a new conda environment and install the dependencies:
83
83
  ```bash
84
- conda create -n rdrpcatch python=3.12
84
+ conda env create -n rdrpcatch python=3.12
85
85
  conda activate rdrpcatch
86
86
  conda install -c bioconda mmseqs2==17.b804f seqkit==2.10.0
87
87
  ```
@@ -59,7 +59,7 @@ The dependencies can be installed using conda or mamba. Follow these steps:
59
59
 
60
60
  Create a new conda environment and install the dependencies:
61
61
  ```bash
62
- conda create -n rdrpcatch python=3.12
62
+ conda env create -n rdrpcatch python=3.12
63
63
  conda activate rdrpcatch
64
64
  conda install -c bioconda mmseqs2==17.b804f seqkit==2.10.0
65
65
  ```
@@ -1,5 +1,5 @@
1
1
  {% set name = "RdRpCATCH" %}
2
- {% set version = "0.0.2" %}
2
+ {% set version = "0.0.1" %}
3
3
  {% set sha256 = "6454b1a6f98e461ca82d181b1595e5f06448786540b6daa2c4de31c38b255412" %}
4
4
 
5
5
  package:
@@ -13,7 +13,7 @@ source:
13
13
  build:
14
14
  number: 0
15
15
  noarch: python
16
- script: {{ PYTHON }} -m pip install . -vv
16
+ script: {{ PYTHON }} -m pip install . --no-deps --no-build-isolation --no-cache-dir -vvv
17
17
  run_exports:
18
18
  - {{ pin_compatible('rdrpcatch', max_pin='x.x.x') }}
19
19
  entry_points:
@@ -26,17 +26,17 @@ requirements:
26
26
  - pip
27
27
  run:
28
28
  - python >=3.12
29
- - mmseqs2 =17.b804f
30
- - seqkit =2.10.0
31
- - pyhmmer =0.11.0
32
- - needletail =0.6.3
33
- - polars =1.26.0
34
- - rich =13.9.4
35
- - rich-click =1.8.8
36
- - upsetplot =0.9.0
37
- - matplotlib-base =3.10.1
38
- - altair =5.5.0
39
- - requests =2.32.3
29
+ - mmseqs2 ==17.b804f
30
+ - seqkit ==2.10.0
31
+ - pyhmmer ==0.11.0
32
+ - needletail ==0.6.3
33
+ - polars ==1.26.0
34
+ - rich ==13.9.4
35
+ - rich-click ==1.8.8
36
+ - upsetplot ==0.9.0
37
+ - matplotlib-base ==3.10.1
38
+ - altair ==5.5.0
39
+ - requests ==2.32.3
40
40
 
41
41
  test:
42
42
  commands:
@@ -49,7 +49,7 @@ about:
49
49
  license: MIT
50
50
  license_family: MIT
51
51
  license_file: LICENSE
52
- summary: RNA virus RdRp sequence scanner
52
+ summary: "RNA virus RdRp sequence scanner."
53
53
  description: |
54
54
  RdRpCATCH (RNA-dependent RNA polymerase Collaborative Analysis Tool with Collections of pHMMs)
55
55
  is a tool for scanning sequences for RNA-dependent RNA polymerases (RdRps) using profile HMMs.
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "rdrpcatch"
7
- version = "0.0.3"
7
+ version = "0.0.5"
8
8
  authors = [
9
9
  {name = "Dimitris Karapliafis", email = "dimitris.karapliafis@wur.nl"},
10
10
  {name = "Uri Neri", email = "uneri@lbl.gov"},
@@ -68,7 +68,7 @@ class hmmsearch_formatter:
68
68
  # Check if the dataframe is empty
69
69
  if data_df.is_empty():
70
70
  title_line= ['Contig_name', 'Translated_contig_name (frame)', 'Sequence_length(AA)', 'Profile_name',
71
- 'Profile_length', 'E-value', 'score', 'acc', 'norm_bitscore_profile',
71
+ 'Profile_length', 'E-value', 'score','norm_bitscore_profile',
72
72
  'norm_bitscore_contig', 'ID_score', 'RdRp_from(AA)', 'RdRp_to(AA)', 'profile_coverage',
73
73
  'contig_coverage']
74
74
  data_df = pl.DataFrame({col: [] for col in title_line})
@@ -255,33 +255,48 @@ class fasta:
255
255
  self.logger.silent_log(f"Processing {len(rdrp_coords_list)} coordinates")
256
256
  self.logger.silent_log(f"First few coordinates: {rdrp_coords_list[:3]}")
257
257
 
258
+ contig_dict = {}
259
+ for contig_name, rdrp_from, rdrp_to in rdrp_coords_list:
260
+ contig_key = str(contig_name).strip()
261
+ if contig_key not in contig_dict:
262
+ contig_dict[contig_key] = []
263
+ contig_dict[contig_key].append((rdrp_from, rdrp_to))
264
+
258
265
  reader = needletail.parse_fastx_file(self.fasta_file)
259
266
  matches_found = 0
260
267
  with open(outfile, 'w') as out_handle:
261
268
  for record in reader:
262
- # pyhmmer uses the first word of the header as the ID, so split on whitespace
269
+ # Get the record ID
263
270
  record_id = record.id.strip().split(" ")[0]
264
- if self.logger:
265
- self.logger.silent_log(f"Processing record with ID: '{record_id}'")
266
- for contig_name, rdrp_from, rdrp_to in rdrp_coords_list:
267
- contig_name = str(contig_name).strip()
271
+
272
+ # Check if this record matches any of our target contigs
273
+ if record_id in contig_dict:
268
274
  if self.logger:
269
- self.logger.silent_log(f"Comparing record '{record_id}' with contig '{contig_name}'")
270
- if record_id == contig_name:
271
- matches_found += 1
272
- seq = record.seq[rdrp_from-1:rdrp_to]
275
+ self.logger.silent_log(f"Match found for record ID: '{record_id}'")
276
+
277
+ # Process all matching coordinates for this contig
278
+ for rdrp_from, rdrp_to in contig_dict[record_id]:
279
+ seq = record.seq[rdrp_from - 1:rdrp_to]
273
280
  fasta_header = f"{record_id}_RdRp_{rdrp_from}-{rdrp_to}"
274
281
  out_handle.write(f">{fasta_header}\n{seq}\n")
282
+ matches_found += 1
283
+
284
+ # Remove the processed contig to avoid future checks
285
+ del contig_dict[record_id]
286
+
287
+ # If all contigs have been found, exit early
288
+ if not contig_dict:
275
289
  if self.logger:
276
- self.logger.silent_log(f"Match found! Writing sequence of length {len(seq)}")
277
- else:
278
- if self.logger:
279
- self.logger.silent_log(f"No match - lengths: {len(record_id)}|{len(contig_name)}, "
280
- f"record_id bytes: {record_id.encode()}, contig bytes: {contig_name.encode()}")
281
-
290
+ self.logger.silent_log("All contigs processed. Exiting early.")
291
+ break
292
+
282
293
  if self.logger:
283
294
  self.logger.silent_log(f"Total matches found: {matches_found}")
284
295
 
296
+ return matches_found
297
+
298
+
299
+
285
300
 
286
301
  class mmseqs_parser:
287
302
 
@@ -111,7 +111,7 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
111
111
  if not os.path.exists(outputs.output_dir):
112
112
  os.makedirs(outputs.output_dir)
113
113
  else:
114
- raise FileExistsError(f"Output directory already exists: {outputs.output_dir}, please choose a different directory.")
114
+ raise FileExistsError(f"Output directory already exists: {outputs.output_dir}, Please choose a different directory.")
115
115
  if not os.path.exists(outputs.log_dir):
116
116
  os.makedirs(outputs.log_dir)
117
117
 
@@ -363,7 +363,7 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
363
363
 
364
364
 
365
365
  # Combine all the dataframes in the list
366
- combined_df = pl.concat(df_list, how='vertical')
366
+ combined_df = pl.concat(df_list, how='vertical_relaxed')
367
367
  # Write the combined dataframe to a tsv file
368
368
  for col in ['E-value', 'score', 'norm_bitscore_profile', 'norm_bitscore_contig',
369
369
  'ID_score', 'profile_coverage', 'contig_coverage']:
@@ -526,9 +526,8 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
526
526
  outputs.tsv_outdir.mkdir(parents=True)
527
527
 
528
528
 
529
-
530
529
  # Combine all the dataframes in the list
531
- combined_df = pl.concat(df_list, how='vertical')
530
+ combined_df = pl.concat(df_list, how='vertical_relaxed')
532
531
  # Write the combined dataframe to a tsv file
533
532
  for col in ['E-value', 'score', 'norm_bitscore_profile', 'norm_bitscore_contig',
534
533
  'ID_score', 'profile_coverage', 'contig_coverage']:
@@ -581,9 +580,9 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
581
580
  utils.fasta(input_file).write_fasta(utils.fasta(input_file).extract_contigs(combined_set), outputs.fasta_prot_out_path)
582
581
 
583
582
  if verbose:
584
- logger.loud_log(f"Contigs written to: {outputs.fasta_prot_out_path}")
583
+ logger.loud_log(f"Full aminoacid contigs written to: {outputs.fasta_prot_out_path}")
585
584
  else:
586
- logger.silent_log(f"Contigs written to: {outputs.fasta_prot_out_path}")
585
+ logger.silent_log(f" Full aminoacid contigs written to: {outputs.fasta_prot_out_path}")
587
586
 
588
587
  if not os.path.exists(outputs.gff_output_dir):
589
588
  outputs.gff_output_dir.mkdir(parents=True)
@@ -594,9 +593,9 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
594
593
  utils.fasta(input_file, logger).write_fasta_coords(rdrp_coords_list,outputs.fasta_trimmed_out_path, seq_type)
595
594
 
596
595
  if verbose:
597
- logger.loud_log(f"RdRpCATCH output file written to: {outputs.fasta_prot_out_path}")
596
+ logger.loud_log(f"Trimmed contigs written to: {outputs.fasta_trimmed_out_path}")
598
597
  else:
599
- logger.silent_log(f"RdRpCATCH output file written to: {outputs.fasta_prot_out_path}")
598
+ logger.silent_log(f"Trimmed contigs written to: {outputs.fasta_trimmed_out_path}")
600
599
 
601
600
  if not os.path.exists(outputs.mmseqs_tax_output_dir):
602
601
  outputs.mmseqs_tax_output_dir.mkdir(parents=True)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes