XspecT 0.2.7__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of XspecT might be problematic. Click here for more details.
- {xspect-0.2.7 → xspect-0.4.0}/.github/workflows/test.yml +1 -1
- {xspect-0.2.7/src/XspecT.egg-info → xspect-0.4.0}/PKG-INFO +14 -21
- {xspect-0.2.7 → xspect-0.4.0}/README.md +11 -19
- {xspect-0.2.7 → xspect-0.4.0}/pyproject.toml +1 -1
- {xspect-0.2.7 → xspect-0.4.0/src/XspecT.egg-info}/PKG-INFO +14 -21
- {xspect-0.2.7 → xspect-0.4.0}/src/XspecT.egg-info/SOURCES.txt +2 -11
- {xspect-0.2.7 → xspect-0.4.0}/src/xspect/definitions.py +0 -7
- xspect-0.4.0/src/xspect/download_models.py +34 -0
- {xspect-0.2.7 → xspect-0.4.0}/src/xspect/fastapi.py +23 -26
- xspect-0.4.0/src/xspect/file_io.py +171 -0
- xspect-0.4.0/src/xspect/main.py +416 -0
- {xspect-0.2.7 → xspect-0.4.0}/src/xspect/mlst_feature/mlst_helper.py +4 -6
- {xspect-0.2.7 → xspect-0.4.0}/src/xspect/model_management.py +6 -0
- {xspect-0.2.7 → xspect-0.4.0}/src/xspect/models/probabilistic_filter_model.py +16 -5
- {xspect-0.2.7 → xspect-0.4.0}/src/xspect/models/probabilistic_filter_svm_model.py +33 -18
- {xspect-0.2.7 → xspect-0.4.0}/src/xspect/models/probabilistic_single_filter_model.py +8 -1
- {xspect-0.2.7 → xspect-0.4.0}/src/xspect/models/result.py +14 -60
- xspect-0.4.0/src/xspect/ncbi.py +265 -0
- xspect-0.4.0/src/xspect/train.py +283 -0
- {xspect-0.2.7 → xspect-0.4.0}/tests/conftest.py +3 -7
- {xspect-0.2.7 → xspect-0.4.0}/tests/test_cli.py +46 -14
- xspect-0.4.0/tests/test_ncbi.py +94 -0
- {xspect-0.2.7 → xspect-0.4.0}/tests/test_probabilistic_filter_model.py +3 -3
- {xspect-0.2.7 → xspect-0.4.0}/tests/test_probabilistic_filter_svm_model.py +8 -5
- xspect-0.4.0/tests/test_train.py +33 -0
- xspect-0.2.7/src/xspect/download_models.py +0 -33
- xspect-0.2.7/src/xspect/file_io.py +0 -87
- xspect-0.2.7/src/xspect/main.py +0 -181
- xspect-0.2.7/src/xspect/pipeline.py +0 -201
- xspect-0.2.7/src/xspect/run.py +0 -38
- xspect-0.2.7/src/xspect/train.py +0 -267
- xspect-0.2.7/src/xspect/train_filter/create_svm.py +0 -45
- xspect-0.2.7/src/xspect/train_filter/extract_and_concatenate.py +0 -124
- xspect-0.2.7/src/xspect/train_filter/ncbi_api/__init__.py +0 -0
- xspect-0.2.7/src/xspect/train_filter/ncbi_api/download_assemblies.py +0 -31
- xspect-0.2.7/src/xspect/train_filter/ncbi_api/ncbi_assembly_metadata.py +0 -110
- xspect-0.2.7/src/xspect/train_filter/ncbi_api/ncbi_children_tree.py +0 -53
- xspect-0.2.7/src/xspect/train_filter/ncbi_api/ncbi_taxon_metadata.py +0 -55
- xspect-0.2.7/tests/__init__.py +0 -0
- xspect-0.2.7/tests/test_pipeline.py +0 -26
- xspect-0.2.7/tests/test_train.py +0 -15
- {xspect-0.2.7 → xspect-0.4.0}/.github/workflows/black.yml +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/.github/workflows/docs.yml +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/.github/workflows/pylint.yml +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/.github/workflows/pypi.yml +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/.gitignore +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/LICENSE +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/Instructions/pictures/About.png +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/Instructions/pictures/AddFilter.png +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/Instructions/pictures/AddSpecies1.png +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/Instructions/pictures/AddSpecies2.png +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/Instructions/pictures/BF.png +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/Instructions/pictures/ClAssT_Ergebnis1.png +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/Instructions/pictures/ClAssT_Ergebnis2.png +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/Instructions/pictures/ClAssT_Ergebnis3.png +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/Instructions/pictures/ClAssT_Hauptseite.png +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/Instructions/pictures/CommandLine_Input.png +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/Instructions/pictures/CommandLine_results.png +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/Instructions/pictures/CommandLine_whole.png +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/Instructions/pictures/How2Use.png +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/Instructions/pictures/HowtouseAspecT.png +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/Instructions/pictures/XspecT_Ergebnis1.png +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/Instructions/pictures/XspecT_Ergebnis2.png +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/Instructions/pictures/XspecT_Ergebnis3.png +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/Instructions/pictures/XspecT_Ergebnis4.png +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/Instructions/pictures/XspecT_Hauptseite.png +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/Instructions/pictures/XspecT_Runtime.png +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/Instructions/pictures/XspecT_Runtime_Oxa.png +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/Instructions/pictures/XspecT_Startseite.png +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/Instructions/pictures/change_pw.png +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/Instructions/pictures/modify_vecs.png +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/Instructions/pictures/secretkey.png +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/Makefile +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/cli.md +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/conf.py +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/diagrams/probabilistic_filter_models.md +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/img/logo.png +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/index.md +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/input_data.md +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/installation.md +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/make.bat +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/quickstart.md +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/docs/web.md +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/setup.cfg +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/src/XspecT.egg-info/dependency_links.txt +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/src/XspecT.egg-info/entry_points.txt +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/src/XspecT.egg-info/requires.txt +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/src/XspecT.egg-info/top_level.txt +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/src/xspect/__init__.py +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/src/xspect/mlst_feature/__init__.py +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/src/xspect/mlst_feature/pub_mlst_handler.py +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/src/xspect/models/__init__.py +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/src/xspect/models/probabilistic_filter_mlst_model.py +0 -0
- {xspect-0.2.7/src/xspect/train_filter → xspect-0.4.0/tests}/__init__.py +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/tests/test_file_io.py +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/tests/test_model_management.py +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/tests/test_model_result.py +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/tests/test_probabilisitc_filter_mlst_model.py +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/tests/test_probabilistic_single_filter_model.py +0 -0
- {xspect-0.2.7 → xspect-0.4.0}/tests/test_pub_mlst_handler.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: XspecT
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Tool to monitor and characterize pathogens using Bloom filters.
|
|
5
5
|
License: MIT License
|
|
6
6
|
|
|
@@ -54,34 +54,27 @@ Requires-Dist: sphinx-autobuild; extra == "docs"
|
|
|
54
54
|
Provides-Extra: test
|
|
55
55
|
Requires-Dist: pytest; extra == "test"
|
|
56
56
|
Requires-Dist: pytest-cov; extra == "test"
|
|
57
|
+
Dynamic: license-file
|
|
57
58
|
|
|
58
59
|
# XspecT - Acinetobacter Species Assignment Tool
|
|
60
|
+
<!-- start intro -->
|
|
59
61
|

|
|
60
62
|
[](https://github.com/pylint-dev/pylint)
|
|
61
63
|
[](https://github.com/psf/black)
|
|
62
64
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
<!-- start intro -->
|
|
66
|
-
XspecT is a Python-based tool to taxonomically classify sequence-reads (or assembled genomes) on the species and/or MLST level using [Bloom Filters] and a [Support Vector Machine].
|
|
67
|
-
<br/><br/>
|
|
68
|
-
|
|
69
|
-
XspecT utilizes the uniqueness of kmers and compares extracted kmers from the input-data to a reference database. Bloom Filter ensure a fast lookup in this process. For a final prediction the results are classified using a Support Vector Machine.
|
|
70
|
-
<br/>
|
|
65
|
+
XspecT is a Python-based tool to taxonomically classify sequence-reads (or assembled genomes) on the species and/or MLST level using [kmer indices] and a [Support Vector Machine].
|
|
71
66
|
|
|
72
|
-
|
|
73
|
-
<br/>
|
|
67
|
+
XspecT utilizes the uniqueness of kmers and compares extracted kmers from the input-data to a reference database. Bloom Filter ensure a fast lookup in this process. For a final prediction, the results are classified using a Support Vector Machine.
|
|
74
68
|
|
|
75
|
-
The tool is available as a web-based application and a
|
|
69
|
+
The tool is available as a web-based application and as a command line interface.
|
|
76
70
|
|
|
77
|
-
[
|
|
71
|
+
[kmer indices]: https://arxiv.org/abs/1905.09624
|
|
78
72
|
[Support Vector Machine]: https://en.wikipedia.org/wiki/Support-vector_machine
|
|
79
|
-
[blaOxa-genes]: https://en.wikipedia.org/wiki/Beta-lactamase#OXA_beta-lactamases_(class_D)
|
|
80
73
|
<!-- end intro -->
|
|
81
74
|
|
|
82
75
|
<!-- start quickstart -->
|
|
83
76
|
## Installation
|
|
84
|
-
To install
|
|
77
|
+
To install XspecT, please download the lastest 64 bit Python version and install the package using pip:
|
|
85
78
|
```
|
|
86
79
|
pip install xspect
|
|
87
80
|
```
|
|
@@ -91,23 +84,23 @@ Please note that Windows and Alpine Linux is currently not supported.
|
|
|
91
84
|
### Get the models
|
|
92
85
|
To download basic pre-trained models, you can use the built-in command:
|
|
93
86
|
```
|
|
94
|
-
xspect download
|
|
87
|
+
xspect models download
|
|
95
88
|
```
|
|
96
89
|
Additional species models can be trained using:
|
|
97
90
|
```
|
|
98
|
-
xspect train
|
|
91
|
+
xspect models train ncbi
|
|
99
92
|
```
|
|
100
93
|
|
|
101
94
|
### How to run the web app
|
|
102
95
|
To run the web app, install and run [XspecT Web](https://github.com/aromberg/xspect-web). Additionally, run XspecT in API mode:
|
|
103
96
|
```
|
|
104
|
-
xspect
|
|
97
|
+
xspect web
|
|
105
98
|
```
|
|
106
99
|
|
|
107
100
|
### How to use the XspecT command line interface
|
|
108
|
-
Run
|
|
101
|
+
Run XspecT with the configuration you want to run it with as arguments.
|
|
109
102
|
```
|
|
110
|
-
xspect classify
|
|
103
|
+
xspect classify species
|
|
111
104
|
```
|
|
112
105
|
For further instructions on how to use the command line interface, please refer to the [documentation] or execute:
|
|
113
106
|
```
|
|
@@ -1,30 +1,22 @@
|
|
|
1
1
|
# XspecT - Acinetobacter Species Assignment Tool
|
|
2
|
+
<!-- start intro -->
|
|
2
3
|

|
|
3
4
|
[](https://github.com/pylint-dev/pylint)
|
|
4
5
|
[](https://github.com/psf/black)
|
|
5
6
|
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
<!-- start intro -->
|
|
9
|
-
XspecT is a Python-based tool to taxonomically classify sequence-reads (or assembled genomes) on the species and/or MLST level using [Bloom Filters] and a [Support Vector Machine].
|
|
10
|
-
<br/><br/>
|
|
11
|
-
|
|
12
|
-
XspecT utilizes the uniqueness of kmers and compares extracted kmers from the input-data to a reference database. Bloom Filter ensure a fast lookup in this process. For a final prediction the results are classified using a Support Vector Machine.
|
|
13
|
-
<br/>
|
|
7
|
+
XspecT is a Python-based tool to taxonomically classify sequence-reads (or assembled genomes) on the species and/or MLST level using [kmer indices] and a [Support Vector Machine].
|
|
14
8
|
|
|
15
|
-
|
|
16
|
-
<br/>
|
|
9
|
+
XspecT utilizes the uniqueness of kmers and compares extracted kmers from the input-data to a reference database. Bloom Filter ensure a fast lookup in this process. For a final prediction, the results are classified using a Support Vector Machine.
|
|
17
10
|
|
|
18
|
-
The tool is available as a web-based application and a
|
|
11
|
+
The tool is available as a web-based application and as a command line interface.
|
|
19
12
|
|
|
20
|
-
[
|
|
13
|
+
[kmer indices]: https://arxiv.org/abs/1905.09624
|
|
21
14
|
[Support Vector Machine]: https://en.wikipedia.org/wiki/Support-vector_machine
|
|
22
|
-
[blaOxa-genes]: https://en.wikipedia.org/wiki/Beta-lactamase#OXA_beta-lactamases_(class_D)
|
|
23
15
|
<!-- end intro -->
|
|
24
16
|
|
|
25
17
|
<!-- start quickstart -->
|
|
26
18
|
## Installation
|
|
27
|
-
To install
|
|
19
|
+
To install XspecT, please download the lastest 64 bit Python version and install the package using pip:
|
|
28
20
|
```
|
|
29
21
|
pip install xspect
|
|
30
22
|
```
|
|
@@ -34,23 +26,23 @@ Please note that Windows and Alpine Linux is currently not supported.
|
|
|
34
26
|
### Get the models
|
|
35
27
|
To download basic pre-trained models, you can use the built-in command:
|
|
36
28
|
```
|
|
37
|
-
xspect download
|
|
29
|
+
xspect models download
|
|
38
30
|
```
|
|
39
31
|
Additional species models can be trained using:
|
|
40
32
|
```
|
|
41
|
-
xspect train
|
|
33
|
+
xspect models train ncbi
|
|
42
34
|
```
|
|
43
35
|
|
|
44
36
|
### How to run the web app
|
|
45
37
|
To run the web app, install and run [XspecT Web](https://github.com/aromberg/xspect-web). Additionally, run XspecT in API mode:
|
|
46
38
|
```
|
|
47
|
-
xspect
|
|
39
|
+
xspect web
|
|
48
40
|
```
|
|
49
41
|
|
|
50
42
|
### How to use the XspecT command line interface
|
|
51
|
-
Run
|
|
43
|
+
Run XspecT with the configuration you want to run it with as arguments.
|
|
52
44
|
```
|
|
53
|
-
xspect classify
|
|
45
|
+
xspect classify species
|
|
54
46
|
```
|
|
55
47
|
For further instructions on how to use the command line interface, please refer to the [documentation] or execute:
|
|
56
48
|
```
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: XspecT
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Tool to monitor and characterize pathogens using Bloom filters.
|
|
5
5
|
License: MIT License
|
|
6
6
|
|
|
@@ -54,34 +54,27 @@ Requires-Dist: sphinx-autobuild; extra == "docs"
|
|
|
54
54
|
Provides-Extra: test
|
|
55
55
|
Requires-Dist: pytest; extra == "test"
|
|
56
56
|
Requires-Dist: pytest-cov; extra == "test"
|
|
57
|
+
Dynamic: license-file
|
|
57
58
|
|
|
58
59
|
# XspecT - Acinetobacter Species Assignment Tool
|
|
60
|
+
<!-- start intro -->
|
|
59
61
|

|
|
60
62
|
[](https://github.com/pylint-dev/pylint)
|
|
61
63
|
[](https://github.com/psf/black)
|
|
62
64
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
<!-- start intro -->
|
|
66
|
-
XspecT is a Python-based tool to taxonomically classify sequence-reads (or assembled genomes) on the species and/or MLST level using [Bloom Filters] and a [Support Vector Machine].
|
|
67
|
-
<br/><br/>
|
|
68
|
-
|
|
69
|
-
XspecT utilizes the uniqueness of kmers and compares extracted kmers from the input-data to a reference database. Bloom Filter ensure a fast lookup in this process. For a final prediction the results are classified using a Support Vector Machine.
|
|
70
|
-
<br/>
|
|
65
|
+
XspecT is a Python-based tool to taxonomically classify sequence-reads (or assembled genomes) on the species and/or MLST level using [kmer indices] and a [Support Vector Machine].
|
|
71
66
|
|
|
72
|
-
|
|
73
|
-
<br/>
|
|
67
|
+
XspecT utilizes the uniqueness of kmers and compares extracted kmers from the input-data to a reference database. Bloom Filter ensure a fast lookup in this process. For a final prediction, the results are classified using a Support Vector Machine.
|
|
74
68
|
|
|
75
|
-
The tool is available as a web-based application and a
|
|
69
|
+
The tool is available as a web-based application and as a command line interface.
|
|
76
70
|
|
|
77
|
-
[
|
|
71
|
+
[kmer indices]: https://arxiv.org/abs/1905.09624
|
|
78
72
|
[Support Vector Machine]: https://en.wikipedia.org/wiki/Support-vector_machine
|
|
79
|
-
[blaOxa-genes]: https://en.wikipedia.org/wiki/Beta-lactamase#OXA_beta-lactamases_(class_D)
|
|
80
73
|
<!-- end intro -->
|
|
81
74
|
|
|
82
75
|
<!-- start quickstart -->
|
|
83
76
|
## Installation
|
|
84
|
-
To install
|
|
77
|
+
To install XspecT, please download the lastest 64 bit Python version and install the package using pip:
|
|
85
78
|
```
|
|
86
79
|
pip install xspect
|
|
87
80
|
```
|
|
@@ -91,23 +84,23 @@ Please note that Windows and Alpine Linux is currently not supported.
|
|
|
91
84
|
### Get the models
|
|
92
85
|
To download basic pre-trained models, you can use the built-in command:
|
|
93
86
|
```
|
|
94
|
-
xspect download
|
|
87
|
+
xspect models download
|
|
95
88
|
```
|
|
96
89
|
Additional species models can be trained using:
|
|
97
90
|
```
|
|
98
|
-
xspect train
|
|
91
|
+
xspect models train ncbi
|
|
99
92
|
```
|
|
100
93
|
|
|
101
94
|
### How to run the web app
|
|
102
95
|
To run the web app, install and run [XspecT Web](https://github.com/aromberg/xspect-web). Additionally, run XspecT in API mode:
|
|
103
96
|
```
|
|
104
|
-
xspect
|
|
97
|
+
xspect web
|
|
105
98
|
```
|
|
106
99
|
|
|
107
100
|
### How to use the XspecT command line interface
|
|
108
|
-
Run
|
|
101
|
+
Run XspecT with the configuration you want to run it with as arguments.
|
|
109
102
|
```
|
|
110
|
-
xspect classify
|
|
103
|
+
xspect classify species
|
|
111
104
|
```
|
|
112
105
|
For further instructions on how to use the command line interface, please refer to the [documentation] or execute:
|
|
113
106
|
```
|
|
@@ -56,8 +56,7 @@ src/xspect/fastapi.py
|
|
|
56
56
|
src/xspect/file_io.py
|
|
57
57
|
src/xspect/main.py
|
|
58
58
|
src/xspect/model_management.py
|
|
59
|
-
src/xspect/
|
|
60
|
-
src/xspect/run.py
|
|
59
|
+
src/xspect/ncbi.py
|
|
61
60
|
src/xspect/train.py
|
|
62
61
|
src/xspect/mlst_feature/__init__.py
|
|
63
62
|
src/xspect/mlst_feature/mlst_helper.py
|
|
@@ -68,21 +67,13 @@ src/xspect/models/probabilistic_filter_model.py
|
|
|
68
67
|
src/xspect/models/probabilistic_filter_svm_model.py
|
|
69
68
|
src/xspect/models/probabilistic_single_filter_model.py
|
|
70
69
|
src/xspect/models/result.py
|
|
71
|
-
src/xspect/train_filter/__init__.py
|
|
72
|
-
src/xspect/train_filter/create_svm.py
|
|
73
|
-
src/xspect/train_filter/extract_and_concatenate.py
|
|
74
|
-
src/xspect/train_filter/ncbi_api/__init__.py
|
|
75
|
-
src/xspect/train_filter/ncbi_api/download_assemblies.py
|
|
76
|
-
src/xspect/train_filter/ncbi_api/ncbi_assembly_metadata.py
|
|
77
|
-
src/xspect/train_filter/ncbi_api/ncbi_children_tree.py
|
|
78
|
-
src/xspect/train_filter/ncbi_api/ncbi_taxon_metadata.py
|
|
79
70
|
tests/__init__.py
|
|
80
71
|
tests/conftest.py
|
|
81
72
|
tests/test_cli.py
|
|
82
73
|
tests/test_file_io.py
|
|
83
74
|
tests/test_model_management.py
|
|
84
75
|
tests/test_model_result.py
|
|
85
|
-
tests/
|
|
76
|
+
tests/test_ncbi.py
|
|
86
77
|
tests/test_probabilisitc_filter_mlst_model.py
|
|
87
78
|
tests/test_probabilistic_filter_model.py
|
|
88
79
|
tests/test_probabilistic_filter_svm_model.py
|
|
@@ -21,13 +21,6 @@ def get_xspect_model_path():
|
|
|
21
21
|
return model_path
|
|
22
22
|
|
|
23
23
|
|
|
24
|
-
def get_xspect_tmp_path():
|
|
25
|
-
"""Return the path to the XspecT temporary files."""
|
|
26
|
-
tmp_path = get_xspect_root_path() / "tmp"
|
|
27
|
-
tmp_path.mkdir(exist_ok=True, parents=True)
|
|
28
|
-
return tmp_path
|
|
29
|
-
|
|
30
|
-
|
|
31
24
|
def get_xspect_upload_path():
|
|
32
25
|
"""Return the path to the XspecT upload directory."""
|
|
33
26
|
upload_path = get_xspect_root_path() / "uploads"
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""Download filters from public repository."""
|
|
2
|
+
|
|
3
|
+
import shutil
|
|
4
|
+
from tempfile import TemporaryDirectory
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
import requests
|
|
7
|
+
|
|
8
|
+
from xspect.definitions import get_xspect_model_path
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def download_test_models(url):
|
|
12
|
+
"""Download models."""
|
|
13
|
+
with TemporaryDirectory() as tmp_dir:
|
|
14
|
+
tmp_dir = Path(tmp_dir)
|
|
15
|
+
download_path = tmp_dir / "models.zip"
|
|
16
|
+
extract_path = tmp_dir / "extracted_models"
|
|
17
|
+
|
|
18
|
+
r = requests.get(url, allow_redirects=True, timeout=10)
|
|
19
|
+
with open(download_path, "wb") as f:
|
|
20
|
+
f.write(r.content)
|
|
21
|
+
|
|
22
|
+
shutil.unpack_archive(
|
|
23
|
+
download_path,
|
|
24
|
+
extract_path,
|
|
25
|
+
"zip",
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
shutil.copytree(
|
|
29
|
+
extract_path,
|
|
30
|
+
get_xspect_model_path(),
|
|
31
|
+
dirs_exist_ok=True,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
shutil.rmtree(extract_path)
|
|
@@ -1,15 +1,14 @@
|
|
|
1
1
|
"""FastAPI application for XspecT."""
|
|
2
2
|
|
|
3
|
-
import
|
|
3
|
+
from uuid import uuid4
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from shutil import copyfileobj
|
|
6
6
|
from fastapi import FastAPI, UploadFile, BackgroundTasks
|
|
7
7
|
from xspect.definitions import get_xspect_runs_path, get_xspect_upload_path
|
|
8
8
|
from xspect.download_models import download_test_models
|
|
9
|
+
from xspect.file_io import filter_sequences
|
|
9
10
|
import xspect.model_management as mm
|
|
10
|
-
from xspect.
|
|
11
|
-
from xspect.pipeline import ModelExecution, Pipeline, PipelineStep
|
|
12
|
-
from xspect.train import train_ncbi
|
|
11
|
+
from xspect.train import train_from_ncbi
|
|
13
12
|
|
|
14
13
|
app = FastAPI()
|
|
15
14
|
|
|
@@ -17,43 +16,41 @@ app = FastAPI()
|
|
|
17
16
|
@app.get("/download-filters")
|
|
18
17
|
def download_filters():
|
|
19
18
|
"""Download filters."""
|
|
20
|
-
download_test_models("
|
|
19
|
+
download_test_models("http://assets.adrianromberg.com/xspect-models.zip")
|
|
21
20
|
|
|
22
21
|
|
|
23
22
|
@app.get("/classify")
|
|
24
23
|
def classify(genus: str, file: str, meta: bool = False, step: int = 500):
|
|
25
24
|
"""Classify uploaded sample."""
|
|
26
25
|
|
|
27
|
-
|
|
26
|
+
input_path = get_xspect_upload_path() / file
|
|
27
|
+
|
|
28
|
+
uuid = str(uuid4())
|
|
28
29
|
|
|
29
|
-
pipeline = Pipeline(genus + " classification", "Test Author", "test@example.com")
|
|
30
|
-
species_execution = ModelExecution(
|
|
31
|
-
genus.lower() + "-species", sparse_sampling_step=step
|
|
32
|
-
)
|
|
33
30
|
if meta:
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
)
|
|
37
|
-
|
|
38
|
-
|
|
31
|
+
genus_model = mm.get_genus_model(genus)
|
|
32
|
+
genus_result = genus_model.predict(input_path, step=step)
|
|
33
|
+
included_ids = genus_result.get_filtered_subsequence_labels(genus)
|
|
34
|
+
if not included_ids:
|
|
35
|
+
return {"message": "No sequences found for the given genus."}
|
|
36
|
+
filtered_path = get_xspect_runs_path() / f"filtered_{uuid}.fasta"
|
|
37
|
+
filter_sequences(
|
|
38
|
+
Path(input_path),
|
|
39
|
+
Path(filtered_path),
|
|
40
|
+
included_ids=included_ids,
|
|
39
41
|
)
|
|
40
|
-
|
|
41
|
-
pipeline.add_pipeline_step(genus_execution)
|
|
42
|
-
else:
|
|
43
|
-
pipeline.add_pipeline_step(species_execution)
|
|
44
|
-
|
|
45
|
-
run = pipeline.run(Path(path))
|
|
46
|
-
time_str = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
|
|
47
|
-
save_path = get_xspect_runs_path() / f"run_{time_str}.json"
|
|
48
|
-
run.save(save_path)
|
|
42
|
+
input_path = filtered_path
|
|
49
43
|
|
|
50
|
-
|
|
44
|
+
species_model = mm.get_species_model(genus)
|
|
45
|
+
species_result = species_model.predict(input_path, step=step)
|
|
46
|
+
species_result.save(get_xspect_runs_path() / f"result_{uuid}.json")
|
|
47
|
+
return species_result.to_dict()
|
|
51
48
|
|
|
52
49
|
|
|
53
50
|
@app.post("/train")
|
|
54
51
|
def train(genus: str, background_tasks: BackgroundTasks, svm_steps: int = 1):
|
|
55
52
|
"""Train NCBI model."""
|
|
56
|
-
background_tasks.add_task(
|
|
53
|
+
background_tasks.add_task(train_from_ncbi, genus, svm_steps)
|
|
57
54
|
|
|
58
55
|
return {"message": "Training started."}
|
|
59
56
|
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
"""
|
|
2
|
+
File IO module.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from json import loads
|
|
6
|
+
import os
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
import zipfile
|
|
9
|
+
from Bio import SeqIO
|
|
10
|
+
from xspect.definitions import fasta_endings, fastq_endings
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def delete_zip_files(dir_path):
|
|
14
|
+
"""Delete all zip files in the given directory."""
|
|
15
|
+
files = os.listdir(dir_path)
|
|
16
|
+
for file in files:
|
|
17
|
+
if zipfile.is_zipfile(file):
|
|
18
|
+
file_path = dir_path / str(file)
|
|
19
|
+
os.remove(file_path)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def extract_zip(zip_path: Path, unzipped_path: Path):
|
|
23
|
+
"""Extracts all files from a directory with zip files."""
|
|
24
|
+
# Make new directory.
|
|
25
|
+
unzipped_path.mkdir(parents=True, exist_ok=True)
|
|
26
|
+
|
|
27
|
+
file_names = os.listdir(zip_path)
|
|
28
|
+
for file in file_names:
|
|
29
|
+
file_path = zip_path / file
|
|
30
|
+
if zipfile.is_zipfile(file_path):
|
|
31
|
+
with zipfile.ZipFile(file_path) as item:
|
|
32
|
+
directory = unzipped_path / file.replace(".zip", "")
|
|
33
|
+
item.extractall(directory)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def concatenate_meta(path: Path, genus: str):
|
|
37
|
+
"""Concatenates all species files to one fasta file.
|
|
38
|
+
|
|
39
|
+
:param path: Path to the directory with the concatenated fasta files.
|
|
40
|
+
:type path: Path
|
|
41
|
+
:param genus: Genus name.
|
|
42
|
+
:type genus: str
|
|
43
|
+
"""
|
|
44
|
+
files_path = path / "concatenate"
|
|
45
|
+
meta_path = path / (genus + ".fasta")
|
|
46
|
+
files = os.listdir(files_path)
|
|
47
|
+
|
|
48
|
+
with open(meta_path, "w", encoding="utf-8") as meta_file:
|
|
49
|
+
# Write the header.
|
|
50
|
+
meta_header = f">{genus} metagenome\n"
|
|
51
|
+
meta_file.write(meta_header)
|
|
52
|
+
|
|
53
|
+
# Open each concatenated species file and write the sequence in the meta file.
|
|
54
|
+
for file in files:
|
|
55
|
+
file_ending = str(file).rsplit(".", maxsplit=1)[-1]
|
|
56
|
+
if file_ending in fasta_endings:
|
|
57
|
+
with open(
|
|
58
|
+
(files_path / str(file)), "r", encoding="utf-8"
|
|
59
|
+
) as species_file:
|
|
60
|
+
for line in species_file:
|
|
61
|
+
if line[0] != ">":
|
|
62
|
+
meta_file.write(line.replace("\n", ""))
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def get_record_iterator(file_path: Path):
|
|
66
|
+
"""Returns a record iterator for a fasta or fastq file."""
|
|
67
|
+
if not isinstance(file_path, Path):
|
|
68
|
+
raise ValueError("Path must be a Path object")
|
|
69
|
+
|
|
70
|
+
if not file_path.exists():
|
|
71
|
+
raise ValueError("File does not exist")
|
|
72
|
+
|
|
73
|
+
if not file_path.is_file():
|
|
74
|
+
raise ValueError("Path must be a file")
|
|
75
|
+
|
|
76
|
+
if file_path.suffix[1:] in fasta_endings:
|
|
77
|
+
return SeqIO.parse(file_path, "fasta")
|
|
78
|
+
|
|
79
|
+
if file_path.suffix[1:] in fastq_endings:
|
|
80
|
+
return SeqIO.parse(file_path, "fastq")
|
|
81
|
+
|
|
82
|
+
raise ValueError("Invalid file format, must be a fasta or fastq file")
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def get_records_by_id(file: Path, ids: list[str]):
|
|
86
|
+
"""Return records with the specified ids."""
|
|
87
|
+
records = get_record_iterator(file)
|
|
88
|
+
return [record for record in records if record.id in ids]
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def concatenate_species_fasta_files(input_folders: list[Path], output_directory: Path):
|
|
92
|
+
"""Concatenate fasta files from different species into one file per species.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
input_species_folders (list[Path]): List of paths to species folders.
|
|
96
|
+
output_directory (Path): Path to the output directory.
|
|
97
|
+
"""
|
|
98
|
+
for species_folder in input_folders:
|
|
99
|
+
species_name = species_folder.name
|
|
100
|
+
fasta_files = [
|
|
101
|
+
f for ending in fasta_endings for f in species_folder.glob(f"*.{ending}")
|
|
102
|
+
]
|
|
103
|
+
if len(fasta_files) == 0:
|
|
104
|
+
raise ValueError(f"no fasta files found in {species_folder}")
|
|
105
|
+
|
|
106
|
+
# concatenate fasta files
|
|
107
|
+
concatenated_fasta = output_directory / f"{species_name}.fasta"
|
|
108
|
+
with open(concatenated_fasta, "w", encoding="utf-8") as f:
|
|
109
|
+
for fasta_file in fasta_files:
|
|
110
|
+
with open(fasta_file, "r", encoding="utf-8") as f_in:
|
|
111
|
+
f.write(f_in.read())
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def concatenate_metagenome(fasta_dir: Path, meta_path: Path):
|
|
115
|
+
"""Concatenate all fasta files in a directory into one file.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
fasta_dir (Path): Path to the directory with the fasta files.
|
|
119
|
+
meta_path (Path): Path to the output file.
|
|
120
|
+
"""
|
|
121
|
+
with open(meta_path, "w", encoding="utf-8") as meta_file:
|
|
122
|
+
for fasta_file in fasta_dir.glob("*.fasta"):
|
|
123
|
+
with open(fasta_file, "r", encoding="utf-8") as f_in:
|
|
124
|
+
meta_file.write(f_in.read())
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def get_ncbi_dataset_accession_paths(
|
|
128
|
+
ncbi_dataset_path: Path,
|
|
129
|
+
) -> dict[str, Path]:
|
|
130
|
+
"""Get the paths of the NCBI dataset accessions.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
ncbi_dataset_path (Path): Path to the NCBI dataset directory.
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
dict[str, Path]: Dictionary with the accession as key and the path as value.
|
|
137
|
+
"""
|
|
138
|
+
data_path = ncbi_dataset_path / "ncbi_dataset" / "data"
|
|
139
|
+
if not data_path.exists():
|
|
140
|
+
raise ValueError(f"Path {data_path} does not exist.")
|
|
141
|
+
|
|
142
|
+
accession_paths = {}
|
|
143
|
+
with open(data_path / "dataset_catalog.json", "r", encoding="utf-8") as f:
|
|
144
|
+
res = loads(f.read())
|
|
145
|
+
for assembly in res["assemblies"][1:]: # the first item is the data report
|
|
146
|
+
accession = assembly["accession"]
|
|
147
|
+
assembly_path = data_path / assembly["files"][0]["filePath"]
|
|
148
|
+
accession_paths[accession] = assembly_path
|
|
149
|
+
return accession_paths
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def filter_sequences(
|
|
153
|
+
input_file: Path,
|
|
154
|
+
output_file: Path,
|
|
155
|
+
included_ids: list[str],
|
|
156
|
+
):
|
|
157
|
+
"""Filter sequences by IDs from an input file and save them to an output file.
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
input_file (Path): Path to the input file.
|
|
161
|
+
output_file (Path): Path to the output file.
|
|
162
|
+
included_ids (list[str], optional): List of IDs to include. If None, no output file is created.
|
|
163
|
+
"""
|
|
164
|
+
if not included_ids:
|
|
165
|
+
print("No IDs provided, no output file will be created.")
|
|
166
|
+
return
|
|
167
|
+
|
|
168
|
+
with open(output_file, "w", encoding="utf-8") as out_f:
|
|
169
|
+
for record in get_record_iterator(input_file):
|
|
170
|
+
if record.id in included_ids:
|
|
171
|
+
SeqIO.write(record, out_f, "fasta")
|