XspecT 0.2.5__tar.gz → 0.2.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of XspecT might be problematic. Click here for more details.
- {xspect-0.2.5 → xspect-0.2.6}/.github/workflows/test.yml +2 -2
- {xspect-0.2.5/src/XspecT.egg-info → xspect-0.2.6}/PKG-INFO +9 -9
- {xspect-0.2.5 → xspect-0.2.6}/README.md +7 -7
- xspect-0.2.6/docs/cli.md +80 -0
- xspect-0.2.6/docs/input_data.md +2 -0
- {xspect-0.2.5 → xspect-0.2.6}/pyproject.toml +1 -1
- {xspect-0.2.5 → xspect-0.2.6/src/XspecT.egg-info}/PKG-INFO +9 -9
- {xspect-0.2.5 → xspect-0.2.6}/src/XspecT.egg-info/SOURCES.txt +7 -1
- {xspect-0.2.5 → xspect-0.2.6}/src/xspect/definitions.py +7 -0
- xspect-0.2.5/src/xspect/download_filters.py → xspect-0.2.6/src/xspect/download_models.py +2 -2
- {xspect-0.2.5 → xspect-0.2.6}/src/xspect/fastapi.py +2 -2
- {xspect-0.2.5 → xspect-0.2.6}/src/xspect/main.py +61 -8
- xspect-0.2.6/src/xspect/mlst_feature/mlst_helper.py +155 -0
- xspect-0.2.6/src/xspect/mlst_feature/pub_mlst_handler.py +119 -0
- {xspect-0.2.5 → xspect-0.2.6}/src/xspect/model_management.py +3 -4
- xspect-0.2.6/src/xspect/models/probabilistic_filter_mlst_model.py +287 -0
- {xspect-0.2.5 → xspect-0.2.6}/src/xspect/models/probabilistic_filter_model.py +2 -11
- {xspect-0.2.5 → xspect-0.2.6}/src/xspect/models/probabilistic_filter_svm_model.py +3 -0
- {xspect-0.2.5 → xspect-0.2.6}/src/xspect/models/probabilistic_single_filter_model.py +4 -6
- {xspect-0.2.5 → xspect-0.2.6}/src/xspect/models/result.py +7 -6
- {xspect-0.2.5 → xspect-0.2.6}/src/xspect/train.py +1 -33
- xspect-0.2.6/tests/__init__.py +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/tests/test_cli.py +3 -3
- {xspect-0.2.5 → xspect-0.2.6}/tests/test_file_io.py +1 -1
- xspect-0.2.6/tests/test_probabilisitc_filter_mlst_model.py +121 -0
- {xspect-0.2.5 → xspect-0.2.6}/tests/test_probabilistic_filter_model.py +1 -1
- {xspect-0.2.5 → xspect-0.2.6}/tests/test_probabilistic_filter_svm_model.py +1 -1
- {xspect-0.2.5 → xspect-0.2.6}/tests/test_probabilistic_single_filter_model.py +1 -1
- xspect-0.2.6/tests/test_pub_mlst_handler.py +34 -0
- {xspect-0.2.5 → xspect-0.2.6}/tests/test_train.py +1 -1
- xspect-0.2.5/docs/cli.md +0 -114
- xspect-0.2.5/docs/input_data.md +0 -4
- {xspect-0.2.5 → xspect-0.2.6}/.github/workflows/black.yml +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/.github/workflows/docs.yml +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/.github/workflows/pylint.yml +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/.github/workflows/pypi.yml +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/.gitignore +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/LICENSE +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/docs/Instructions/pictures/About.png +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/docs/Instructions/pictures/AddFilter.png +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/docs/Instructions/pictures/AddSpecies1.png +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/docs/Instructions/pictures/AddSpecies2.png +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/docs/Instructions/pictures/BF.png +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/docs/Instructions/pictures/ClAssT_Ergebnis1.png +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/docs/Instructions/pictures/ClAssT_Ergebnis2.png +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/docs/Instructions/pictures/ClAssT_Ergebnis3.png +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/docs/Instructions/pictures/ClAssT_Hauptseite.png +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/docs/Instructions/pictures/CommandLine_Input.png +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/docs/Instructions/pictures/CommandLine_results.png +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/docs/Instructions/pictures/CommandLine_whole.png +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/docs/Instructions/pictures/How2Use.png +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/docs/Instructions/pictures/HowtouseAspecT.png +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/docs/Instructions/pictures/XspecT_Ergebnis1.png +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/docs/Instructions/pictures/XspecT_Ergebnis2.png +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/docs/Instructions/pictures/XspecT_Ergebnis3.png +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/docs/Instructions/pictures/XspecT_Ergebnis4.png +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/docs/Instructions/pictures/XspecT_Hauptseite.png +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/docs/Instructions/pictures/XspecT_Runtime.png +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/docs/Instructions/pictures/XspecT_Runtime_Oxa.png +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/docs/Instructions/pictures/XspecT_Startseite.png +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/docs/Instructions/pictures/change_pw.png +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/docs/Instructions/pictures/modify_vecs.png +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/docs/Instructions/pictures/secretkey.png +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/docs/Makefile +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/docs/conf.py +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/docs/diagrams/probabilistic_filter_models.md +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/docs/img/logo.png +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/docs/index.md +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/docs/installation.md +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/docs/make.bat +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/docs/quickstart.md +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/docs/web.md +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/setup.cfg +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/src/XspecT.egg-info/dependency_links.txt +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/src/XspecT.egg-info/entry_points.txt +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/src/XspecT.egg-info/requires.txt +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/src/XspecT.egg-info/top_level.txt +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/src/xspect/__init__.py +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/src/xspect/file_io.py +0 -0
- {xspect-0.2.5/src/xspect/models → xspect-0.2.6/src/xspect/mlst_feature}/__init__.py +0 -0
- {xspect-0.2.5/src/xspect/train_filter → xspect-0.2.6/src/xspect/models}/__init__.py +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/src/xspect/pipeline.py +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/src/xspect/run.py +0 -0
- {xspect-0.2.5/src/xspect/train_filter/ncbi_api → xspect-0.2.6/src/xspect/train_filter}/__init__.py +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/src/xspect/train_filter/create_svm.py +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/src/xspect/train_filter/extract_and_concatenate.py +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/src/xspect/train_filter/html_scrap.py +0 -0
- {xspect-0.2.5/tests → xspect-0.2.6/src/xspect/train_filter/ncbi_api}/__init__.py +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/src/xspect/train_filter/ncbi_api/download_assemblies.py +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/src/xspect/train_filter/ncbi_api/ncbi_assembly_metadata.py +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/src/xspect/train_filter/ncbi_api/ncbi_children_tree.py +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/src/xspect/train_filter/ncbi_api/ncbi_taxon_metadata.py +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/tests/conftest.py +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/tests/test_model_management.py +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/tests/test_model_result.py +0 -0
- {xspect-0.2.5 → xspect-0.2.6}/tests/test_pipeline.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: XspecT
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.6
|
|
4
4
|
Summary: Tool to monitor and characterize pathogens using Bloom filters.
|
|
5
5
|
License: MIT License
|
|
6
6
|
|
|
@@ -63,7 +63,7 @@ Requires-Dist: pytest-cov; extra == "test"
|
|
|
63
63
|
<img src="/docs/img/logo.png" height="50%" width="50%">
|
|
64
64
|
|
|
65
65
|
<!-- start intro -->
|
|
66
|
-
XspecT is a Python-based tool to taxonomically classify sequence-reads (or assembled genomes) on the species and/or
|
|
66
|
+
XspecT is a Python-based tool to taxonomically classify sequence-reads (or assembled genomes) on the species and/or MLST level using [Bloom Filters] and a [Support Vector Machine].
|
|
67
67
|
<br/><br/>
|
|
68
68
|
|
|
69
69
|
XspecT utilizes the uniqueness of kmers and compares extracted kmers from the input-data to a reference database. Bloom Filter ensure a fast lookup in this process. For a final prediction the results are classified using a Support Vector Machine.
|
|
@@ -88,14 +88,14 @@ pip install xspect
|
|
|
88
88
|
Please note that Windows and Alpine Linux is currently not supported.
|
|
89
89
|
|
|
90
90
|
## Usage
|
|
91
|
-
### Get the
|
|
92
|
-
To download basic pre-trained
|
|
91
|
+
### Get the models
|
|
92
|
+
To download basic pre-trained models, you can use the built-in command:
|
|
93
93
|
```
|
|
94
|
-
xspect download-
|
|
94
|
+
xspect download-models
|
|
95
95
|
```
|
|
96
|
-
Additional species
|
|
96
|
+
Additional species models can be trained using:
|
|
97
97
|
```
|
|
98
|
-
xspect train you-ncbi-genus-name
|
|
98
|
+
xspect train-species you-ncbi-genus-name
|
|
99
99
|
```
|
|
100
100
|
|
|
101
101
|
### How to run the web app
|
|
@@ -107,7 +107,7 @@ xspect api
|
|
|
107
107
|
### How to use the XspecT command line interface
|
|
108
108
|
Run xspect with the configuration you want to run it with as arguments.
|
|
109
109
|
```
|
|
110
|
-
xspect classify your-genus path/to/your/input-set
|
|
110
|
+
xspect classify-species your-genus path/to/your/input-set
|
|
111
111
|
```
|
|
112
112
|
For further instructions on how to use the command line interface, please refer to the [documentation] or execute:
|
|
113
113
|
```
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
<img src="/docs/img/logo.png" height="50%" width="50%">
|
|
7
7
|
|
|
8
8
|
<!-- start intro -->
|
|
9
|
-
XspecT is a Python-based tool to taxonomically classify sequence-reads (or assembled genomes) on the species and/or
|
|
9
|
+
XspecT is a Python-based tool to taxonomically classify sequence-reads (or assembled genomes) on the species and/or MLST level using [Bloom Filters] and a [Support Vector Machine].
|
|
10
10
|
<br/><br/>
|
|
11
11
|
|
|
12
12
|
XspecT utilizes the uniqueness of kmers and compares extracted kmers from the input-data to a reference database. Bloom Filter ensure a fast lookup in this process. For a final prediction the results are classified using a Support Vector Machine.
|
|
@@ -31,14 +31,14 @@ pip install xspect
|
|
|
31
31
|
Please note that Windows and Alpine Linux is currently not supported.
|
|
32
32
|
|
|
33
33
|
## Usage
|
|
34
|
-
### Get the
|
|
35
|
-
To download basic pre-trained
|
|
34
|
+
### Get the models
|
|
35
|
+
To download basic pre-trained models, you can use the built-in command:
|
|
36
36
|
```
|
|
37
|
-
xspect download-
|
|
37
|
+
xspect download-models
|
|
38
38
|
```
|
|
39
|
-
Additional species
|
|
39
|
+
Additional species models can be trained using:
|
|
40
40
|
```
|
|
41
|
-
xspect train you-ncbi-genus-name
|
|
41
|
+
xspect train-species you-ncbi-genus-name
|
|
42
42
|
```
|
|
43
43
|
|
|
44
44
|
### How to run the web app
|
|
@@ -50,7 +50,7 @@ xspect api
|
|
|
50
50
|
### How to use the XspecT command line interface
|
|
51
51
|
Run xspect with the configuration you want to run it with as arguments.
|
|
52
52
|
```
|
|
53
|
-
xspect classify your-genus path/to/your/input-set
|
|
53
|
+
xspect classify-species your-genus path/to/your/input-set
|
|
54
54
|
```
|
|
55
55
|
For further instructions on how to use the command line interface, please refer to the [documentation] or execute:
|
|
56
56
|
```
|
xspect-0.2.6/docs/cli.md
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# How to use the CLI
|
|
2
|
+
|
|
3
|
+
XspecT comes with a built-in command line interface (CLI), which enables quick classifications without the need to use the web interface. The command line interface can also be used to download and train models.
|
|
4
|
+
|
|
5
|
+
After installing XspecT, a list of available commands can be viewed by running:
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
xspect --help
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Model downloads
|
|
12
|
+
|
|
13
|
+
A basic set of pre-trained models (Acinetobacter and Salonella) can be downloaded using the following command:
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
xspect download-models
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
For the moment, it is not possible to specify exactly which models should be downloaded.
|
|
20
|
+
|
|
21
|
+
## Classification
|
|
22
|
+
|
|
23
|
+
To classify samples, the command
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
xspect classify-species GENUS PATH
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
can be used, when `GENUS` refers to the NCBI genus name of your sample and `PATH` refers to the path to your sample *directory*. This command will classify the species of your sample within the given genus.
|
|
30
|
+
|
|
31
|
+
The following options are available:
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
-m, --meta / --no-meta Metagenome classification.
|
|
35
|
+
-s, --step INTEGER Sparse sampling step size (e. g. only every 500th
|
|
36
|
+
kmer for step=500).
|
|
37
|
+
--help Show this message and exit.
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
To speed up the analysis, only every nth kmer can be considered ("sparse sampling"). For example, to only consider every 10th kmer, run:
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
xspect classify-species -s 10 Acinetobacter path
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### Metagenome Mode
|
|
47
|
+
|
|
48
|
+
To analyze a sample in metagenome mode, the `-m`/`--meta` (`--no-meta`) option can be used:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
xspect classify-species -m Acinetobacter path
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Compared to normal XspecT species classification, this mode first identifies reads belonging to the given genus and continues classification only with the resulting reads, It is thus more suitable for metagenomic samples as the resulting runtime is decreased.
|
|
55
|
+
|
|
56
|
+
### MLST Classification
|
|
57
|
+
|
|
58
|
+
Samples can also be classified based on Multi-locus sequence type schemas. To MLST-classify a sample, run:
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
xspect classify-mlst -p path
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Model Training
|
|
65
|
+
|
|
66
|
+
Models can be trained based on data from NCBI, which is automatically downloaded and processed by XspecT.
|
|
67
|
+
|
|
68
|
+
To train a model, run the following command:
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
xspect train-species your-ncbi-genus
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
`you-ncbi-genus` can be a genus name from NCBI or an NCBI taxonomy ID.
|
|
75
|
+
|
|
76
|
+
To train models for MLST classifications, run:
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
xspect train-mlst
|
|
80
|
+
```
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: XspecT
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.6
|
|
4
4
|
Summary: Tool to monitor and characterize pathogens using Bloom filters.
|
|
5
5
|
License: MIT License
|
|
6
6
|
|
|
@@ -63,7 +63,7 @@ Requires-Dist: pytest-cov; extra == "test"
|
|
|
63
63
|
<img src="/docs/img/logo.png" height="50%" width="50%">
|
|
64
64
|
|
|
65
65
|
<!-- start intro -->
|
|
66
|
-
XspecT is a Python-based tool to taxonomically classify sequence-reads (or assembled genomes) on the species and/or
|
|
66
|
+
XspecT is a Python-based tool to taxonomically classify sequence-reads (or assembled genomes) on the species and/or MLST level using [Bloom Filters] and a [Support Vector Machine].
|
|
67
67
|
<br/><br/>
|
|
68
68
|
|
|
69
69
|
XspecT utilizes the uniqueness of kmers and compares extracted kmers from the input-data to a reference database. Bloom Filter ensure a fast lookup in this process. For a final prediction the results are classified using a Support Vector Machine.
|
|
@@ -88,14 +88,14 @@ pip install xspect
|
|
|
88
88
|
Please note that Windows and Alpine Linux is currently not supported.
|
|
89
89
|
|
|
90
90
|
## Usage
|
|
91
|
-
### Get the
|
|
92
|
-
To download basic pre-trained
|
|
91
|
+
### Get the models
|
|
92
|
+
To download basic pre-trained models, you can use the built-in command:
|
|
93
93
|
```
|
|
94
|
-
xspect download-
|
|
94
|
+
xspect download-models
|
|
95
95
|
```
|
|
96
|
-
Additional species
|
|
96
|
+
Additional species models can be trained using:
|
|
97
97
|
```
|
|
98
|
-
xspect train you-ncbi-genus-name
|
|
98
|
+
xspect train-species you-ncbi-genus-name
|
|
99
99
|
```
|
|
100
100
|
|
|
101
101
|
### How to run the web app
|
|
@@ -107,7 +107,7 @@ xspect api
|
|
|
107
107
|
### How to use the XspecT command line interface
|
|
108
108
|
Run xspect with the configuration you want to run it with as arguments.
|
|
109
109
|
```
|
|
110
|
-
xspect classify your-genus path/to/your/input-set
|
|
110
|
+
xspect classify-species your-genus path/to/your/input-set
|
|
111
111
|
```
|
|
112
112
|
For further instructions on how to use the command line interface, please refer to the [documentation] or execute:
|
|
113
113
|
```
|
|
@@ -51,7 +51,7 @@ src/XspecT.egg-info/requires.txt
|
|
|
51
51
|
src/XspecT.egg-info/top_level.txt
|
|
52
52
|
src/xspect/__init__.py
|
|
53
53
|
src/xspect/definitions.py
|
|
54
|
-
src/xspect/
|
|
54
|
+
src/xspect/download_models.py
|
|
55
55
|
src/xspect/fastapi.py
|
|
56
56
|
src/xspect/file_io.py
|
|
57
57
|
src/xspect/main.py
|
|
@@ -59,7 +59,11 @@ src/xspect/model_management.py
|
|
|
59
59
|
src/xspect/pipeline.py
|
|
60
60
|
src/xspect/run.py
|
|
61
61
|
src/xspect/train.py
|
|
62
|
+
src/xspect/mlst_feature/__init__.py
|
|
63
|
+
src/xspect/mlst_feature/mlst_helper.py
|
|
64
|
+
src/xspect/mlst_feature/pub_mlst_handler.py
|
|
62
65
|
src/xspect/models/__init__.py
|
|
66
|
+
src/xspect/models/probabilistic_filter_mlst_model.py
|
|
63
67
|
src/xspect/models/probabilistic_filter_model.py
|
|
64
68
|
src/xspect/models/probabilistic_filter_svm_model.py
|
|
65
69
|
src/xspect/models/probabilistic_single_filter_model.py
|
|
@@ -80,7 +84,9 @@ tests/test_file_io.py
|
|
|
80
84
|
tests/test_model_management.py
|
|
81
85
|
tests/test_model_result.py
|
|
82
86
|
tests/test_pipeline.py
|
|
87
|
+
tests/test_probabilisitc_filter_mlst_model.py
|
|
83
88
|
tests/test_probabilistic_filter_model.py
|
|
84
89
|
tests/test_probabilistic_filter_svm_model.py
|
|
85
90
|
tests/test_probabilistic_single_filter_model.py
|
|
91
|
+
tests/test_pub_mlst_handler.py
|
|
86
92
|
tests/test_train.py
|
|
@@ -40,3 +40,10 @@ def get_xspect_runs_path():
|
|
|
40
40
|
runs_path = get_xspect_root_path() / "runs"
|
|
41
41
|
runs_path.mkdir(exist_ok=True, parents=True)
|
|
42
42
|
return runs_path
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def get_xspect_mlst_path():
|
|
46
|
+
"""Return the path to the XspecT runs directory."""
|
|
47
|
+
mlst_path = get_xspect_root_path() / "mlst"
|
|
48
|
+
mlst_path.mkdir(exist_ok=True, parents=True)
|
|
49
|
+
return mlst_path
|
|
@@ -7,8 +7,8 @@ import requests
|
|
|
7
7
|
from xspect.definitions import get_xspect_model_path, get_xspect_tmp_path
|
|
8
8
|
|
|
9
9
|
|
|
10
|
-
def
|
|
11
|
-
"""Download
|
|
10
|
+
def download_test_models(url):
|
|
11
|
+
"""Download models."""
|
|
12
12
|
|
|
13
13
|
download_path = get_xspect_tmp_path() / "models.zip"
|
|
14
14
|
extract_path = get_xspect_tmp_path() / "extracted_models"
|
|
@@ -5,7 +5,7 @@ from pathlib import Path
|
|
|
5
5
|
from shutil import copyfileobj
|
|
6
6
|
from fastapi import FastAPI, UploadFile, BackgroundTasks
|
|
7
7
|
from xspect.definitions import get_xspect_runs_path, get_xspect_upload_path
|
|
8
|
-
from xspect.
|
|
8
|
+
from xspect.download_models import download_test_models
|
|
9
9
|
import xspect.model_management as mm
|
|
10
10
|
from xspect.models.result import StepType
|
|
11
11
|
from xspect.pipeline import ModelExecution, Pipeline, PipelineStep
|
|
@@ -17,7 +17,7 @@ app = FastAPI()
|
|
|
17
17
|
@app.get("/download-filters")
|
|
18
18
|
def download_filters():
|
|
19
19
|
"""Download filters."""
|
|
20
|
-
|
|
20
|
+
download_test_models("https://xspect2.s3.eu-central-1.amazonaws.com/models.zip")
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
@app.get("/classify")
|
|
@@ -6,13 +6,23 @@ import uuid
|
|
|
6
6
|
import click
|
|
7
7
|
import uvicorn
|
|
8
8
|
from xspect import fastapi
|
|
9
|
-
from xspect.
|
|
9
|
+
from xspect.download_models import download_test_models
|
|
10
10
|
from xspect.train import train_ncbi
|
|
11
11
|
from xspect.models.result import (
|
|
12
12
|
StepType,
|
|
13
13
|
)
|
|
14
|
-
from xspect.definitions import
|
|
14
|
+
from xspect.definitions import (
|
|
15
|
+
get_xspect_runs_path,
|
|
16
|
+
fasta_endings,
|
|
17
|
+
fastq_endings,
|
|
18
|
+
get_xspect_model_path,
|
|
19
|
+
)
|
|
15
20
|
from xspect.pipeline import ModelExecution, Pipeline, PipelineStep
|
|
21
|
+
from xspect.mlst_feature.mlst_helper import pick_scheme, pick_scheme_from_models_dir
|
|
22
|
+
from xspect.mlst_feature.pub_mlst_handler import PubMLSTHandler
|
|
23
|
+
from xspect.models.probabilistic_filter_mlst_model import (
|
|
24
|
+
ProbabilisticFilterMlstSchemeModel,
|
|
25
|
+
)
|
|
16
26
|
|
|
17
27
|
|
|
18
28
|
@click.group()
|
|
@@ -22,10 +32,10 @@ def cli():
|
|
|
22
32
|
|
|
23
33
|
|
|
24
34
|
@cli.command()
|
|
25
|
-
def
|
|
26
|
-
"""Download
|
|
27
|
-
click.echo("Downloading
|
|
28
|
-
|
|
35
|
+
def download_models():
|
|
36
|
+
"""Download models."""
|
|
37
|
+
click.echo("Downloading models, this may take a while...")
|
|
38
|
+
download_test_models("https://xspect2.s3.eu-central-1.amazonaws.com/models.zip")
|
|
29
39
|
|
|
30
40
|
|
|
31
41
|
@cli.command()
|
|
@@ -43,7 +53,7 @@ def download_filters():
|
|
|
43
53
|
help="Sparse sampling step size (e. g. only every 500th kmer for step=500).",
|
|
44
54
|
default=1,
|
|
45
55
|
)
|
|
46
|
-
def
|
|
56
|
+
def classify_species(genus, path, meta, step):
|
|
47
57
|
"""Classify sample(s) from file or directory PATH."""
|
|
48
58
|
click.echo("Classifying...")
|
|
49
59
|
click.echo(f"Step: {step}")
|
|
@@ -105,7 +115,7 @@ def classify(genus, path, meta, step):
|
|
|
105
115
|
help="SVM Sparse sampling step size (e. g. only every 500th kmer for step=500).",
|
|
106
116
|
default=1,
|
|
107
117
|
)
|
|
108
|
-
def
|
|
118
|
+
def train_species(genus, bf_assembly_path, svm_assembly_path, svm_step):
|
|
109
119
|
"""Train model."""
|
|
110
120
|
|
|
111
121
|
if bf_assembly_path or svm_assembly_path:
|
|
@@ -118,6 +128,49 @@ def train(genus, bf_assembly_path, svm_assembly_path, svm_step):
|
|
|
118
128
|
raise click.ClickException(str(e)) from e
|
|
119
129
|
|
|
120
130
|
|
|
131
|
+
@cli.command()
|
|
132
|
+
@click.option(
|
|
133
|
+
"-c",
|
|
134
|
+
"--choose_schemes",
|
|
135
|
+
is_flag=True,
|
|
136
|
+
help="Choose your own schemes."
|
|
137
|
+
"Default setting is Oxford and Pasteur scheme of A.baumannii.",
|
|
138
|
+
)
|
|
139
|
+
def train_mlst(choose_schemes):
|
|
140
|
+
"""Download alleles and train bloom filters."""
|
|
141
|
+
click.echo("Updating alleles")
|
|
142
|
+
handler = PubMLSTHandler()
|
|
143
|
+
handler.download_alleles(choose_schemes)
|
|
144
|
+
click.echo("Download finished")
|
|
145
|
+
scheme_path = pick_scheme(handler.get_scheme_paths())
|
|
146
|
+
species_name = str(scheme_path).split("/")[-2]
|
|
147
|
+
scheme_name = str(scheme_path).split("/")[-1]
|
|
148
|
+
model = ProbabilisticFilterMlstSchemeModel(
|
|
149
|
+
31, f"{species_name}:{scheme_name}", get_xspect_model_path()
|
|
150
|
+
)
|
|
151
|
+
click.echo("Creating mlst model")
|
|
152
|
+
model.fit(scheme_path)
|
|
153
|
+
model.save()
|
|
154
|
+
click.echo(f"Saved at {model.cobs_path}")
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
@cli.command()
|
|
158
|
+
@click.option(
|
|
159
|
+
"-p",
|
|
160
|
+
"--path",
|
|
161
|
+
help="Path to FASTA-file for mlst identification.",
|
|
162
|
+
type=click.Path(exists=True, dir_okay=True, file_okay=True),
|
|
163
|
+
)
|
|
164
|
+
def classify_mlst(path):
|
|
165
|
+
"""MLST classify a sample."""
|
|
166
|
+
click.echo("Classifying...")
|
|
167
|
+
path = Path(path)
|
|
168
|
+
scheme_path = pick_scheme_from_models_dir()
|
|
169
|
+
model = ProbabilisticFilterMlstSchemeModel.load(scheme_path)
|
|
170
|
+
model.predict(scheme_path, path).save(model.model_display_name, path)
|
|
171
|
+
click.echo(f"Run saved at {get_xspect_runs_path()}.")
|
|
172
|
+
|
|
173
|
+
|
|
121
174
|
@cli.command()
|
|
122
175
|
def api():
|
|
123
176
|
"""Open the XspecT FastAPI."""
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
""" Module for utility functions used in other modules regarding MLST. """
|
|
2
|
+
|
|
3
|
+
__author__ = "Cetin, Oemer"
|
|
4
|
+
|
|
5
|
+
import requests
|
|
6
|
+
import json
|
|
7
|
+
from io import StringIO
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from Bio import SeqIO
|
|
10
|
+
from xspect.definitions import get_xspect_model_path, get_xspect_runs_path
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def create_fasta_files(locus_path: Path, fasta_batch: str):
|
|
14
|
+
"""Create Fasta-Files for every allele of a locus."""
|
|
15
|
+
# fasta_batch = full string of a fasta file containing every allele sequence of a locus
|
|
16
|
+
for record in SeqIO.parse(StringIO(fasta_batch), "fasta"):
|
|
17
|
+
number = record.id.split("_")[-1] # example id = Oxf_cpn60_263
|
|
18
|
+
output_fasta_file = locus_path / f"Allele_ID_{number}.fasta"
|
|
19
|
+
if output_fasta_file.exists():
|
|
20
|
+
continue # Ignore existing ones
|
|
21
|
+
with open(output_fasta_file, "w") as allele:
|
|
22
|
+
SeqIO.write(record, allele, "fasta")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def pick_species_number_from_db(available_species: dict) -> str:
|
|
26
|
+
"""Returns the chosen species from all available ones in the database."""
|
|
27
|
+
# The "database" string can look like this: pubmlst_abaumannii_seqdef
|
|
28
|
+
for counter, database in available_species.items():
|
|
29
|
+
print(str(counter) + ":" + database.split("_")[1])
|
|
30
|
+
print("\nPick one of the above databases")
|
|
31
|
+
while True:
|
|
32
|
+
try:
|
|
33
|
+
choice = input("Choose a species by selecting the corresponding number:")
|
|
34
|
+
if int(choice) in available_species.keys():
|
|
35
|
+
chosen_species = available_species.get(int(choice))
|
|
36
|
+
return chosen_species
|
|
37
|
+
else:
|
|
38
|
+
print(
|
|
39
|
+
"Wrong input! Try again with a number that is available in the list above."
|
|
40
|
+
)
|
|
41
|
+
except ValueError:
|
|
42
|
+
print(
|
|
43
|
+
"Wrong input! Try again with a number that is available in the list above."
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def pick_scheme_number_from_db(available_schemes: dict) -> str:
|
|
48
|
+
"""Returns the chosen schemes from all available ones of a species."""
|
|
49
|
+
# List all available schemes of a species database
|
|
50
|
+
for counter, scheme in available_schemes.items():
|
|
51
|
+
print(str(counter) + ":" + scheme[0])
|
|
52
|
+
print("\nPick any available scheme that is listed for download")
|
|
53
|
+
while True:
|
|
54
|
+
try:
|
|
55
|
+
choice = input("Choose a scheme by selecting the corresponding number:")
|
|
56
|
+
if int(choice) in available_schemes.keys():
|
|
57
|
+
chosen_scheme = available_schemes.get(int(choice))[1]
|
|
58
|
+
return chosen_scheme
|
|
59
|
+
else:
|
|
60
|
+
print(
|
|
61
|
+
"Wrong input! Try again with a number that is available in the above list."
|
|
62
|
+
)
|
|
63
|
+
except ValueError:
|
|
64
|
+
print(
|
|
65
|
+
"Wrong input! Try again with a number that is available in the above list."
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def scheme_list_to_dict(scheme_list: list[str]):
|
|
70
|
+
"""Converts the scheme list attribute into a dictionary with a number as the key."""
|
|
71
|
+
return dict(zip(range(1, len(scheme_list) + 1), scheme_list))
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def pick_scheme_from_models_dir() -> Path:
|
|
75
|
+
"""Returns the chosen scheme from models that have been fitted prior."""
|
|
76
|
+
schemes = {}
|
|
77
|
+
counter = 1
|
|
78
|
+
for entry in sorted((get_xspect_model_path() / "MLST").iterdir()):
|
|
79
|
+
schemes[counter] = entry
|
|
80
|
+
counter += 1
|
|
81
|
+
return pick_scheme(schemes)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def pick_scheme(available_schemes: dict) -> Path:
|
|
85
|
+
"""Returns the chosen scheme from the scheme list."""
|
|
86
|
+
if not available_schemes:
|
|
87
|
+
raise ValueError("No scheme has been chosen for download yet!")
|
|
88
|
+
|
|
89
|
+
if len(available_schemes.items()) == 1:
|
|
90
|
+
return next(iter(available_schemes.values()))
|
|
91
|
+
|
|
92
|
+
# List available schemes
|
|
93
|
+
for counter, scheme in available_schemes.items():
|
|
94
|
+
# For Strain Typing with an API-POST Request to the db
|
|
95
|
+
if str(scheme).startswith("http"):
|
|
96
|
+
scheme_json = requests.get(scheme).json()
|
|
97
|
+
print(str(counter) + ":" + scheme_json["description"])
|
|
98
|
+
|
|
99
|
+
# To pick a scheme after download for fitting
|
|
100
|
+
else:
|
|
101
|
+
print(str(counter) + ":" + str(scheme).split("/")[-1])
|
|
102
|
+
|
|
103
|
+
print("\nPick a scheme for strain type prediction")
|
|
104
|
+
while True:
|
|
105
|
+
try:
|
|
106
|
+
choice = input("Choose a scheme by selecting the corresponding number:")
|
|
107
|
+
if int(choice) in available_schemes.keys():
|
|
108
|
+
chosen_scheme = available_schemes.get(int(choice))
|
|
109
|
+
return chosen_scheme
|
|
110
|
+
else:
|
|
111
|
+
print(
|
|
112
|
+
"Wrong input! Try again with a number that is available in the above list."
|
|
113
|
+
)
|
|
114
|
+
except ValueError:
|
|
115
|
+
print(
|
|
116
|
+
"Wrong input! Try again with a number that is available in the above list."
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class MlstResult:
|
|
121
|
+
"""Class for storing mlst results."""
|
|
122
|
+
|
|
123
|
+
def __init__(
|
|
124
|
+
self,
|
|
125
|
+
scheme_model: str,
|
|
126
|
+
steps: int,
|
|
127
|
+
hits: dict[str, list[dict]],
|
|
128
|
+
):
|
|
129
|
+
self.scheme_model = scheme_model
|
|
130
|
+
self.steps = steps
|
|
131
|
+
self.hits = hits
|
|
132
|
+
|
|
133
|
+
def get_results(self) -> dict:
|
|
134
|
+
"""Stores the result of a prediction in a dictionary."""
|
|
135
|
+
results = {seq_id: result for seq_id, result in self.hits.items()}
|
|
136
|
+
return results
|
|
137
|
+
|
|
138
|
+
def to_dict(self) -> dict:
|
|
139
|
+
"""Converts all attributes into one dictionary."""
|
|
140
|
+
result = {
|
|
141
|
+
"Scheme": self.scheme_model,
|
|
142
|
+
"Steps": self.steps,
|
|
143
|
+
"Results": self.get_results(),
|
|
144
|
+
}
|
|
145
|
+
return result
|
|
146
|
+
|
|
147
|
+
def save(self, display: str, file_path: Path) -> None:
|
|
148
|
+
"""Saves the result inside the "runs" directory"""
|
|
149
|
+
file_name = str(file_path).split("/")[-1]
|
|
150
|
+
json_path = get_xspect_runs_path() / "MLST" / f"{file_name}-{display}.json"
|
|
151
|
+
json_path.parent.mkdir(exist_ok=True, parents=True)
|
|
152
|
+
json_object = json.dumps(self.to_dict(), indent=4)
|
|
153
|
+
|
|
154
|
+
with open(json_path, "w", encoding="utf-8") as file:
|
|
155
|
+
file.write(json_object)
|