rdrpcatch 0.0.3__tar.gz → 0.0.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/.idea/workspace.xml +47 -21
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/PKG-INFO +2 -2
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/README.md +1 -1
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/meta.yaml +14 -14
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/pyproject.toml +1 -1
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/rdrpcatch/rdrpcatch_scripts/format_pyhmmer_out.py +1 -1
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/rdrpcatch/rdrpcatch_scripts/utils.py +30 -15
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/rdrpcatch/rdrpcatch_wrapper.py +7 -8
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/.gitignore +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/.idea/.gitignore +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/.idea/.name +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/.idea/ColaB-Scan.iml +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/.idea/inspectionProfiles/profiles_settings.xml +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/.idea/misc.xml +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/.idea/modules.xml +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/.idea/vcs.xml +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/LICENSE +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/dependencies/rdrpcatch_test_env.yaml +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/dependencies/requirements.txt +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/images/rdrpcatch_flowchart_v0.png +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/rdrpcatch/__init__.py +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/rdrpcatch/cli/__init__.py +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/rdrpcatch/cli/args.py +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/rdrpcatch/rdrpcatch_scripts/__init__.py +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/rdrpcatch/rdrpcatch_scripts/fetch_dbs.py +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/rdrpcatch/rdrpcatch_scripts/gui.py +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/rdrpcatch/rdrpcatch_scripts/mmseqs_tax.py +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/rdrpcatch/rdrpcatch_scripts/paths.py +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/rdrpcatch/rdrpcatch_scripts/plot.py +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/rdrpcatch/rdrpcatch_scripts/run_pyhmmer.py +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/rdrpcatch/rdrpcatch_scripts/run_seqkit.py +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/meta_4test.yaml +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch-1.0.0-py312_2.tar.bz2 +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_env.yaml +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/test_translate_gff_files/test_translate_full_aminoacid_rdrpcatch.gff3 +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_fasta/test_translate_full_aminoacid_contigs.fasta +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_fasta/test_translate_trimmed_aminoacid_contigs.fasta +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_output_annotated.tsv +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_ID_score_plot.html +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_contig_coverage_plot.html +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_evalue_plot.html +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_norm_bitscore_contig_plot.html +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_norm_bitscore_plot_profile.html +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_profile_coverage_plot.html +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_score_plot.html +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/test_translate_rdrpcatch_plots/test_translate_upset_plot.png +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/best_hit_hmm_output/test_translate_Lucaprot_hmm_output_best_hit.txt +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/best_hit_hmm_output/test_translate_NeoRdRp.2.1_hmm_output_best_hit.txt +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/best_hit_hmm_output/test_translate_NeoRdRp_hmm_output_best_hit.txt +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/best_hit_hmm_output/test_translate_RDRP-scan_hmm_output_best_hit.txt +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/best_hit_hmm_output/test_translate_RVMT_hmm_output_best_hit.txt +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/best_hit_hmm_output/test_translate_TSA_Olendraite_fam_hmm_output_best_hit.txt +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/best_hit_hmm_output/test_translate_TSA_Olendraite_gen_hmm_output_best_hit.txt +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/formatted_hmm_output/test_translate_Lucaprot_hmm_output_formatted.txt +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/formatted_hmm_output/test_translate_NeoRdRp.2.1_hmm_output_formatted.txt +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/formatted_hmm_output/test_translate_NeoRdRp_hmm_output_formatted.txt +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/formatted_hmm_output/test_translate_RDRP-scan_hmm_output_formatted.txt +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/formatted_hmm_output/test_translate_RVMT_hmm_output_formatted.txt +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/formatted_hmm_output/test_translate_TSA_Olendraite_fam_hmm_output_formatted.txt +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/formatted_hmm_output/test_translate_TSA_Olendraite_gen_hmm_output_formatted.txt +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_Lucaprot_hmmsearch_output.custom.tsv +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_Lucaprot_hmmsearch_output.txt +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_NeoRdRp.2.1_hmmsearch_output.custom.tsv +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_NeoRdRp.2.1_hmmsearch_output.txt +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_NeoRdRp_hmmsearch_output.custom.tsv +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_NeoRdRp_hmmsearch_output.txt +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_RDRP-scan_hmmsearch_output.custom.tsv +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_RDRP-scan_hmmsearch_output.txt +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_RVMT_hmmsearch_output.custom.tsv +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_RVMT_hmmsearch_output.txt +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_TSA_Olendraite_fam_hmmsearch_output.custom.tsv +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_TSA_Olendraite_fam_hmmsearch_output.txt +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_TSA_Olendraite_gen_hmmsearch_output.custom.tsv +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/hmm_output/test_translate_TSA_Olendraite_gen_hmmsearch_output.txt +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/mmseqs_e_search_output/test_translate_mmseqs_e_search.tsv +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/mmseqs_tax_output/test_translate_mmseqs_tax_lca.tsv +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/mmseqs_tax_output/test_translate_mmseqs_tax_report +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/mmseqs_tax_output/test_translate_mmseqs_tax_tophit_aln +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/mmseqs_tax_output/test_translate_mmseqs_tax_tophit_report +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/mmseqs_tax_output/tmp/16608414482057878997/easy-taxonomy.sh +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/test_translate_logs/test_translate_mmseqs_e_search.log +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/test_translate_logs/test_translate_mmseqs_tax.log +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/test_translate_logs/test_translate_rdrpcatch.log +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/tsv_files/test_translate_combined.tsv +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/tsv_files/test_translate_rdrpcatch_output.tsv +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/rdrpcatch_test_translate/tmp/tsv_files/test_translate_upset_data.tsv +0 -0
- {rdrpcatch-0.0.3 → rdrpcatch-0.0.5}/testing/test_translate.fasta +0 -0
|
@@ -4,9 +4,7 @@
|
|
|
4
4
|
<option name="autoReloadType" value="SELECTIVE" />
|
|
5
5
|
</component>
|
|
6
6
|
<component name="ChangeListManager">
|
|
7
|
-
<list default="true" id="d849e6fa-87f9-4e92-9c33-abef7cc975d3" name="Changes" comment="Updates:
|
|
8
|
-
<change beforePath="$PROJECT_DIR$/meta.yaml" beforeDir="false" afterPath="$PROJECT_DIR$/meta.yaml" afterDir="false" />
|
|
9
|
-
</list>
|
|
7
|
+
<list default="true" id="d849e6fa-87f9-4e92-9c33-abef7cc975d3" name="Changes" comment="Updates: Fixed bug that crushed the script when at least one pHMM DB does not have a match against the sequence database also for nuc branch" />
|
|
10
8
|
<option name="SHOW_DIALOG" value="false" />
|
|
11
9
|
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
|
12
10
|
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
|
|
@@ -38,22 +36,22 @@
|
|
|
38
36
|
<option name="hideEmptyMiddlePackages" value="true" />
|
|
39
37
|
<option name="showLibraryContents" value="true" />
|
|
40
38
|
</component>
|
|
41
|
-
<component name="PropertiesComponent"
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
39
|
+
<component name="PropertiesComponent"><![CDATA[{
|
|
40
|
+
"keyToString": {
|
|
41
|
+
"ASKED_ADD_EXTERNAL_FILES": "true",
|
|
42
|
+
"RunOnceActivity.OpenProjectViewOnStart": "true",
|
|
43
|
+
"RunOnceActivity.ShowReadmeOnStart": "true",
|
|
44
|
+
"ignore.virus.scanning.warn.message": "true",
|
|
45
|
+
"last_opened_file_path": "C:/Users/karso/PycharmProjects/rdrpcatch_benchmarks",
|
|
46
|
+
"node.js.detected.package.eslint": "true",
|
|
47
|
+
"node.js.detected.package.tslint": "true",
|
|
48
|
+
"node.js.selected.package.eslint": "(autodetect)",
|
|
49
|
+
"node.js.selected.package.tslint": "(autodetect)",
|
|
50
|
+
"nodejs_package_manager_path": "npm",
|
|
51
|
+
"settings.editor.selected.configurable": "preferences.pluginManager",
|
|
52
|
+
"vue.rearranger.settings.migration": "true"
|
|
55
53
|
}
|
|
56
|
-
}
|
|
54
|
+
}]]></component>
|
|
57
55
|
<component name="RecentsManager">
|
|
58
56
|
<key name="CopyFile.RECENT_KEYS">
|
|
59
57
|
<recent name="C:\Users\karso\PycharmProjects\ColaB-Scan\testing" />
|
|
@@ -117,6 +115,8 @@
|
|
|
117
115
|
<workItem from="1743013480572" duration="6846000" />
|
|
118
116
|
<workItem from="1743683517488" duration="7013000" />
|
|
119
117
|
<workItem from="1743714892367" duration="21775000" />
|
|
118
|
+
<workItem from="1744200654491" duration="635000" />
|
|
119
|
+
<workItem from="1744241097621" duration="28249000" />
|
|
120
120
|
</task>
|
|
121
121
|
<task id="LOCAL-00001" summary="First commit: Script for benchmark">
|
|
122
122
|
<option name="closed" value="true" />
|
|
@@ -334,7 +334,31 @@
|
|
|
334
334
|
<option name="project" value="LOCAL" />
|
|
335
335
|
<updated>1743738696182</updated>
|
|
336
336
|
</task>
|
|
337
|
-
<
|
|
337
|
+
<task id="LOCAL-00028" summary="Updates: Optimize fasta writer from O(n*m) to O(n+m)">
|
|
338
|
+
<option name="closed" value="true" />
|
|
339
|
+
<created>1744246540802</created>
|
|
340
|
+
<option name="number" value="00028" />
|
|
341
|
+
<option name="presentableId" value="LOCAL-00028" />
|
|
342
|
+
<option name="project" value="LOCAL" />
|
|
343
|
+
<updated>1744246540804</updated>
|
|
344
|
+
</task>
|
|
345
|
+
<task id="LOCAL-00029" summary="Updates: Polishing ReadME Fixed bug that crushed the script when at least one pHMM DB does not have a match against the sequence database">
|
|
346
|
+
<option name="closed" value="true" />
|
|
347
|
+
<created>1744793957201</created>
|
|
348
|
+
<option name="number" value="00029" />
|
|
349
|
+
<option name="presentableId" value="LOCAL-00029" />
|
|
350
|
+
<option name="project" value="LOCAL" />
|
|
351
|
+
<updated>1744793957201</updated>
|
|
352
|
+
</task>
|
|
353
|
+
<task id="LOCAL-00030" summary="Updates: Fixed bug that crushed the script when at least one pHMM DB does not have a match against the sequence database also for nuc branch">
|
|
354
|
+
<option name="closed" value="true" />
|
|
355
|
+
<created>1744796108058</created>
|
|
356
|
+
<option name="number" value="00030" />
|
|
357
|
+
<option name="presentableId" value="LOCAL-00030" />
|
|
358
|
+
<option name="project" value="LOCAL" />
|
|
359
|
+
<updated>1744796108058</updated>
|
|
360
|
+
</task>
|
|
361
|
+
<option name="localTasksCounter" value="31" />
|
|
338
362
|
<servers />
|
|
339
363
|
</component>
|
|
340
364
|
<component name="TypeScriptGeneratedFilesManager">
|
|
@@ -352,7 +376,6 @@
|
|
|
352
376
|
</option>
|
|
353
377
|
</component>
|
|
354
378
|
<component name="VcsManagerConfiguration">
|
|
355
|
-
<MESSAGE value="First commit: Script for benchmark" />
|
|
356
379
|
<MESSAGE value="Commit: Plots and result summary" />
|
|
357
380
|
<MESSAGE value="Commit: File name change" />
|
|
358
381
|
<MESSAGE value="Commit: Upload script and results" />
|
|
@@ -375,6 +398,9 @@
|
|
|
375
398
|
<MESSAGE value="Updates: Refactor Database installation to support download and automatic update Handle empty files General bug fixes" />
|
|
376
399
|
<MESSAGE value="Updates: Change test case" />
|
|
377
400
|
<MESSAGE value="Updates: Update meta.yaml and .toml file Preparing for Pypi and Bioconda upload" />
|
|
378
|
-
<
|
|
401
|
+
<MESSAGE value="Updates: Optimize fasta writer from O(n*m) to O(n+m)" />
|
|
402
|
+
<MESSAGE value="Updates: Polishing ReadME Fixed bug that crushed the script when at least one pHMM DB does not have a match against the sequence database" />
|
|
403
|
+
<MESSAGE value="Updates: Fixed bug that crushed the script when at least one pHMM DB does not have a match against the sequence database also for nuc branch" />
|
|
404
|
+
<option name="LAST_COMMIT_MESSAGE" value="Updates: Fixed bug that crushed the script when at least one pHMM DB does not have a match against the sequence database also for nuc branch" />
|
|
379
405
|
</component>
|
|
380
406
|
</project>
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rdrpcatch
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.5
|
|
4
4
|
Dynamic: Summary
|
|
5
5
|
Project-URL: Home, https://github.com/dimitris-karapliafis/RdRpCATCH
|
|
6
6
|
Project-URL: Source, https://github.com/dimitris-karapliafis/RdRpCATCH
|
|
@@ -81,7 +81,7 @@ The dependencies can be installed using conda or mamba. Follow these steps:
|
|
|
81
81
|
|
|
82
82
|
Create a new conda environment and install the dependencies:
|
|
83
83
|
```bash
|
|
84
|
-
conda create -n rdrpcatch python=3.12
|
|
84
|
+
conda env create -n rdrpcatch python=3.12
|
|
85
85
|
conda activate rdrpcatch
|
|
86
86
|
conda install -c bioconda mmseqs2==17.b804f seqkit==2.10.0
|
|
87
87
|
```
|
|
@@ -59,7 +59,7 @@ The dependencies can be installed using conda or mamba. Follow these steps:
|
|
|
59
59
|
|
|
60
60
|
Create a new conda environment and install the dependencies:
|
|
61
61
|
```bash
|
|
62
|
-
conda create -n rdrpcatch python=3.12
|
|
62
|
+
conda env create -n rdrpcatch python=3.12
|
|
63
63
|
conda activate rdrpcatch
|
|
64
64
|
conda install -c bioconda mmseqs2==17.b804f seqkit==2.10.0
|
|
65
65
|
```
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
{% set name = "RdRpCATCH" %}
|
|
2
|
-
{% set version = "0.0.
|
|
2
|
+
{% set version = "0.0.1" %}
|
|
3
3
|
{% set sha256 = "6454b1a6f98e461ca82d181b1595e5f06448786540b6daa2c4de31c38b255412" %}
|
|
4
4
|
|
|
5
5
|
package:
|
|
@@ -13,7 +13,7 @@ source:
|
|
|
13
13
|
build:
|
|
14
14
|
number: 0
|
|
15
15
|
noarch: python
|
|
16
|
-
script: {{ PYTHON }} -m pip install . -
|
|
16
|
+
script: {{ PYTHON }} -m pip install . --no-deps --no-build-isolation --no-cache-dir -vvv
|
|
17
17
|
run_exports:
|
|
18
18
|
- {{ pin_compatible('rdrpcatch', max_pin='x.x.x') }}
|
|
19
19
|
entry_points:
|
|
@@ -26,17 +26,17 @@ requirements:
|
|
|
26
26
|
- pip
|
|
27
27
|
run:
|
|
28
28
|
- python >=3.12
|
|
29
|
-
- mmseqs2
|
|
30
|
-
- seqkit
|
|
31
|
-
- pyhmmer
|
|
32
|
-
- needletail
|
|
33
|
-
- polars
|
|
34
|
-
- rich
|
|
35
|
-
- rich-click
|
|
36
|
-
- upsetplot
|
|
37
|
-
- matplotlib-base
|
|
38
|
-
- altair
|
|
39
|
-
- requests
|
|
29
|
+
- mmseqs2 ==17.b804f
|
|
30
|
+
- seqkit ==2.10.0
|
|
31
|
+
- pyhmmer ==0.11.0
|
|
32
|
+
- needletail ==0.6.3
|
|
33
|
+
- polars ==1.26.0
|
|
34
|
+
- rich ==13.9.4
|
|
35
|
+
- rich-click ==1.8.8
|
|
36
|
+
- upsetplot ==0.9.0
|
|
37
|
+
- matplotlib-base ==3.10.1
|
|
38
|
+
- altair ==5.5.0
|
|
39
|
+
- requests ==2.32.3
|
|
40
40
|
|
|
41
41
|
test:
|
|
42
42
|
commands:
|
|
@@ -49,7 +49,7 @@ about:
|
|
|
49
49
|
license: MIT
|
|
50
50
|
license_family: MIT
|
|
51
51
|
license_file: LICENSE
|
|
52
|
-
summary: RNA virus RdRp sequence scanner
|
|
52
|
+
summary: "RNA virus RdRp sequence scanner."
|
|
53
53
|
description: |
|
|
54
54
|
RdRpCATCH (RNA-dependent RNA polymerase Collaborative Analysis Tool with Collections of pHMMs)
|
|
55
55
|
is a tool for scanning sequences for RNA-dependent RNA polymerases (RdRps) using profile HMMs.
|
|
@@ -68,7 +68,7 @@ class hmmsearch_formatter:
|
|
|
68
68
|
# Check if the dataframe is empty
|
|
69
69
|
if data_df.is_empty():
|
|
70
70
|
title_line= ['Contig_name', 'Translated_contig_name (frame)', 'Sequence_length(AA)', 'Profile_name',
|
|
71
|
-
'Profile_length', 'E-value', 'score',
|
|
71
|
+
'Profile_length', 'E-value', 'score','norm_bitscore_profile',
|
|
72
72
|
'norm_bitscore_contig', 'ID_score', 'RdRp_from(AA)', 'RdRp_to(AA)', 'profile_coverage',
|
|
73
73
|
'contig_coverage']
|
|
74
74
|
data_df = pl.DataFrame({col: [] for col in title_line})
|
|
@@ -255,33 +255,48 @@ class fasta:
|
|
|
255
255
|
self.logger.silent_log(f"Processing {len(rdrp_coords_list)} coordinates")
|
|
256
256
|
self.logger.silent_log(f"First few coordinates: {rdrp_coords_list[:3]}")
|
|
257
257
|
|
|
258
|
+
contig_dict = {}
|
|
259
|
+
for contig_name, rdrp_from, rdrp_to in rdrp_coords_list:
|
|
260
|
+
contig_key = str(contig_name).strip()
|
|
261
|
+
if contig_key not in contig_dict:
|
|
262
|
+
contig_dict[contig_key] = []
|
|
263
|
+
contig_dict[contig_key].append((rdrp_from, rdrp_to))
|
|
264
|
+
|
|
258
265
|
reader = needletail.parse_fastx_file(self.fasta_file)
|
|
259
266
|
matches_found = 0
|
|
260
267
|
with open(outfile, 'w') as out_handle:
|
|
261
268
|
for record in reader:
|
|
262
|
-
#
|
|
269
|
+
# Get the record ID
|
|
263
270
|
record_id = record.id.strip().split(" ")[0]
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
contig_name = str(contig_name).strip()
|
|
271
|
+
|
|
272
|
+
# Check if this record matches any of our target contigs
|
|
273
|
+
if record_id in contig_dict:
|
|
268
274
|
if self.logger:
|
|
269
|
-
self.logger.silent_log(f"
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
275
|
+
self.logger.silent_log(f"Match found for record ID: '{record_id}'")
|
|
276
|
+
|
|
277
|
+
# Process all matching coordinates for this contig
|
|
278
|
+
for rdrp_from, rdrp_to in contig_dict[record_id]:
|
|
279
|
+
seq = record.seq[rdrp_from - 1:rdrp_to]
|
|
273
280
|
fasta_header = f"{record_id}_RdRp_{rdrp_from}-{rdrp_to}"
|
|
274
281
|
out_handle.write(f">{fasta_header}\n{seq}\n")
|
|
282
|
+
matches_found += 1
|
|
283
|
+
|
|
284
|
+
# Remove the processed contig to avoid future checks
|
|
285
|
+
del contig_dict[record_id]
|
|
286
|
+
|
|
287
|
+
# If all contigs have been found, exit early
|
|
288
|
+
if not contig_dict:
|
|
275
289
|
if self.logger:
|
|
276
|
-
self.logger.silent_log(
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
self.logger.silent_log(f"No match - lengths: {len(record_id)}|{len(contig_name)}, "
|
|
280
|
-
f"record_id bytes: {record_id.encode()}, contig bytes: {contig_name.encode()}")
|
|
281
|
-
|
|
290
|
+
self.logger.silent_log("All contigs processed. Exiting early.")
|
|
291
|
+
break
|
|
292
|
+
|
|
282
293
|
if self.logger:
|
|
283
294
|
self.logger.silent_log(f"Total matches found: {matches_found}")
|
|
284
295
|
|
|
296
|
+
return matches_found
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
|
|
285
300
|
|
|
286
301
|
class mmseqs_parser:
|
|
287
302
|
|
|
@@ -111,7 +111,7 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
111
111
|
if not os.path.exists(outputs.output_dir):
|
|
112
112
|
os.makedirs(outputs.output_dir)
|
|
113
113
|
else:
|
|
114
|
-
raise FileExistsError(f"Output directory already exists: {outputs.output_dir},
|
|
114
|
+
raise FileExistsError(f"Output directory already exists: {outputs.output_dir}, Please choose a different directory.")
|
|
115
115
|
if not os.path.exists(outputs.log_dir):
|
|
116
116
|
os.makedirs(outputs.log_dir)
|
|
117
117
|
|
|
@@ -363,7 +363,7 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
363
363
|
|
|
364
364
|
|
|
365
365
|
# Combine all the dataframes in the list
|
|
366
|
-
combined_df = pl.concat(df_list, how='
|
|
366
|
+
combined_df = pl.concat(df_list, how='vertical_relaxed')
|
|
367
367
|
# Write the combined dataframe to a tsv file
|
|
368
368
|
for col in ['E-value', 'score', 'norm_bitscore_profile', 'norm_bitscore_contig',
|
|
369
369
|
'ID_score', 'profile_coverage', 'contig_coverage']:
|
|
@@ -526,9 +526,8 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
526
526
|
outputs.tsv_outdir.mkdir(parents=True)
|
|
527
527
|
|
|
528
528
|
|
|
529
|
-
|
|
530
529
|
# Combine all the dataframes in the list
|
|
531
|
-
combined_df = pl.concat(df_list, how='
|
|
530
|
+
combined_df = pl.concat(df_list, how='vertical_relaxed')
|
|
532
531
|
# Write the combined dataframe to a tsv file
|
|
533
532
|
for col in ['E-value', 'score', 'norm_bitscore_profile', 'norm_bitscore_contig',
|
|
534
533
|
'ID_score', 'profile_coverage', 'contig_coverage']:
|
|
@@ -581,9 +580,9 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
581
580
|
utils.fasta(input_file).write_fasta(utils.fasta(input_file).extract_contigs(combined_set), outputs.fasta_prot_out_path)
|
|
582
581
|
|
|
583
582
|
if verbose:
|
|
584
|
-
logger.loud_log(f"
|
|
583
|
+
logger.loud_log(f"Full aminoacid contigs written to: {outputs.fasta_prot_out_path}")
|
|
585
584
|
else:
|
|
586
|
-
logger.silent_log(f"
|
|
585
|
+
logger.silent_log(f" Full aminoacid contigs written to: {outputs.fasta_prot_out_path}")
|
|
587
586
|
|
|
588
587
|
if not os.path.exists(outputs.gff_output_dir):
|
|
589
588
|
outputs.gff_output_dir.mkdir(parents=True)
|
|
@@ -594,9 +593,9 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
594
593
|
utils.fasta(input_file, logger).write_fasta_coords(rdrp_coords_list,outputs.fasta_trimmed_out_path, seq_type)
|
|
595
594
|
|
|
596
595
|
if verbose:
|
|
597
|
-
logger.loud_log(f"
|
|
596
|
+
logger.loud_log(f"Trimmed contigs written to: {outputs.fasta_trimmed_out_path}")
|
|
598
597
|
else:
|
|
599
|
-
logger.silent_log(f"
|
|
598
|
+
logger.silent_log(f"Trimmed contigs written to: {outputs.fasta_trimmed_out_path}")
|
|
600
599
|
|
|
601
600
|
if not os.path.exists(outputs.mmseqs_tax_output_dir):
|
|
602
601
|
outputs.mmseqs_tax_output_dir.mkdir(parents=True)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|