XspecT 0.5.0__tar.gz → 0.5.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of XspecT might be problematic. Click here for more details.
- {xspect-0.5.0 → xspect-0.5.2}/.github/workflows/test.yml +4 -1
- {xspect-0.5.0 → xspect-0.5.2}/PKG-INFO +11 -5
- {xspect-0.5.0 → xspect-0.5.2}/README.md +10 -4
- xspect-0.5.2/docs/contributing.md +95 -0
- xspect-0.5.2/docs/understanding.md +24 -0
- {xspect-0.5.0 → xspect-0.5.2}/pyproject.toml +1 -1
- {xspect-0.5.0 → xspect-0.5.2}/src/XspecT.egg-info/PKG-INFO +11 -5
- {xspect-0.5.0 → xspect-0.5.2}/src/XspecT.egg-info/SOURCES.txt +3 -2
- xspect-0.5.2/src/xspect/classify.py +80 -0
- xspect-0.5.2/src/xspect/definitions.py +90 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/download_models.py +10 -2
- xspect-0.5.2/src/xspect/file_io.py +232 -0
- xspect-0.5.2/src/xspect/filter_sequences.py +108 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/main.py +90 -39
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/mlst_feature/mlst_helper.py +3 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/mlst_feature/pub_mlst_handler.py +43 -1
- xspect-0.5.2/src/xspect/model_management.py +149 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/models/probabilistic_filter_mlst_model.py +75 -37
- xspect-0.5.2/src/xspect/models/probabilistic_filter_model.py +462 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/models/probabilistic_filter_svm_model.py +106 -13
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/models/probabilistic_single_filter_model.py +73 -9
- xspect-0.5.2/src/xspect/models/result.py +182 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/ncbi.py +48 -12
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/train.py +19 -11
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/web.py +68 -12
- xspect-0.5.2/src/xspect/xspect-web/dist/assets/index-Ceo58xui.css +1 -0
- xspect-0.5.0/src/xspect/xspect-web/dist/assets/index-CMG4V7fZ.js → xspect-0.5.2/src/xspect/xspect-web/dist/assets/index-Dt_UlbgE.js +82 -77
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/dist/index.html +2 -2
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/App.tsx +4 -2
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/api.tsx +23 -1
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/components/filter-form.tsx +16 -3
- xspect-0.5.2/src/xspect/xspect-web/src/components/filtering-result.tsx +65 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/components/result.tsx +2 -2
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/types.tsx +5 -0
- {xspect-0.5.0 → xspect-0.5.2}/tests/test_cli.py +5 -1
- {xspect-0.5.0 → xspect-0.5.2}/tests/test_file_io.py +6 -10
- {xspect-0.5.0 → xspect-0.5.2}/tests/test_ncbi.py +5 -1
- {xspect-0.5.0 → xspect-0.5.2}/tests/test_probabilisitc_filter_mlst_model.py +49 -7
- {xspect-0.5.0 → xspect-0.5.2}/tests/test_pub_mlst_handler.py +19 -0
- {xspect-0.5.0 → xspect-0.5.2}/tests/test_train.py +7 -2
- {xspect-0.5.0 → xspect-0.5.2}/tests/test_web.py +65 -1
- xspect-0.5.0/docs/contributing.md +0 -3
- xspect-0.5.0/docs/understanding.md +0 -3
- xspect-0.5.0/src/xspect/classify.py +0 -32
- xspect-0.5.0/src/xspect/definitions.py +0 -42
- xspect-0.5.0/src/xspect/file_io.py +0 -165
- xspect-0.5.0/src/xspect/filter_sequences.py +0 -56
- xspect-0.5.0/src/xspect/model_management.py +0 -79
- xspect-0.5.0/src/xspect/models/probabilistic_filter_model.py +0 -280
- xspect-0.5.0/src/xspect/models/result.py +0 -115
- xspect-0.5.0/src/xspect/xspect-web/dist/assets/index-jIKg1HIy.css +0 -1
- {xspect-0.5.0 → xspect-0.5.2}/.github/workflows/black.yml +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/.github/workflows/docs.yml +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/.github/workflows/pylint.yml +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/.github/workflows/pypi.yml +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/.gitignore +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/LICENSE +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/docs/cli.md +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/docs/index.md +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/docs/quickstart.md +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/docs/web.md +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/mkdocs.yml +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/setup.cfg +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/XspecT.egg-info/dependency_links.txt +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/XspecT.egg-info/entry_points.txt +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/XspecT.egg-info/requires.txt +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/XspecT.egg-info/top_level.txt +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/__init__.py +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/mlst_feature/__init__.py +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/models/__init__.py +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/.gitignore +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/README.md +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/components.json +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/dist/vite.svg +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/eslint.config.js +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/index.html +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/package-lock.json +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/package.json +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/pnpm-lock.yaml +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/public/vite.svg +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/assets/react.svg +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/components/classification-form.tsx +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/components/classify.tsx +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/components/data-table.tsx +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/components/dropdown-checkboxes.tsx +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/components/dropdown-slider.tsx +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/components/filter.tsx +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/components/header.tsx +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/components/landing.tsx +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/components/models-details.tsx +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/components/models.tsx +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/components/result-chart.tsx +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/components/spinner.tsx +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/components/ui/accordion.tsx +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/components/ui/button.tsx +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/components/ui/card.tsx +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/components/ui/chart.tsx +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/components/ui/command.tsx +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/components/ui/dialog.tsx +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/components/ui/dropdown-menu.tsx +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/components/ui/file-upload.tsx +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/components/ui/form.tsx +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/components/ui/input.tsx +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/components/ui/label.tsx +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/components/ui/navigation-menu.tsx +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/components/ui/popover.tsx +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/components/ui/select.tsx +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/components/ui/separator.tsx +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/components/ui/slider.tsx +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/components/ui/switch.tsx +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/components/ui/table.tsx +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/components/ui/tabs.tsx +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/index.css +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/lib/utils.ts +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/main.tsx +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/utils.tsx +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/src/vite-env.d.ts +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/tsconfig.app.json +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/tsconfig.json +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/tsconfig.node.json +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/src/xspect/xspect-web/vite.config.ts +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/tests/__init__.py +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/tests/conftest.py +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/tests/test_model_management.py +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/tests/test_model_result.py +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/tests/test_probabilistic_filter_model.py +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/tests/test_probabilistic_filter_svm_model.py +0 -0
- {xspect-0.5.0 → xspect-0.5.2}/tests/test_probabilistic_single_filter_model.py +0 -0
|
@@ -24,10 +24,13 @@ jobs:
|
|
|
24
24
|
run: |
|
|
25
25
|
python -m pip install --upgrade pip
|
|
26
26
|
pip install '.[test]'
|
|
27
|
-
- name: Download models
|
|
27
|
+
- name: Download models and train MLST
|
|
28
28
|
run: |
|
|
29
29
|
xspect models download
|
|
30
|
+
yes 1 | xspect models train mlst
|
|
30
31
|
- name: Test with pytest
|
|
32
|
+
env:
|
|
33
|
+
NCBI_API_KEY: ${{ secrets.NCBI_API_KEY }}
|
|
31
34
|
run: |
|
|
32
35
|
pytest --cov --retries 2 --retry-delay 5
|
|
33
36
|
- name: Upload coverage reports to Codecov
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: XspecT
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.2
|
|
4
4
|
Summary: Tool to monitor and characterize pathogens using Bloom filters.
|
|
5
5
|
License: MIT License
|
|
6
6
|
|
|
@@ -55,7 +55,7 @@ Requires-Dist: pytest-retry; extra == "test"
|
|
|
55
55
|
Requires-Dist: httpx; extra == "test"
|
|
56
56
|
Dynamic: license-file
|
|
57
57
|
|
|
58
|
-
# XspecT
|
|
58
|
+
# XspecT
|
|
59
59
|
<!-- start intro -->
|
|
60
60
|

|
|
61
61
|
[](https://github.com/pylint-dev/pylint)
|
|
@@ -63,7 +63,7 @@ Dynamic: license-file
|
|
|
63
63
|
|
|
64
64
|
XspecT is a Python-based tool to taxonomically classify sequence-reads (or assembled genomes) on the species and/or MLST level using [kmer indices] and a [Support Vector Machine].
|
|
65
65
|
|
|
66
|
-
XspecT utilizes the uniqueness of kmers and compares extracted kmers from the input-data to a
|
|
66
|
+
XspecT utilizes the uniqueness of kmers and compares extracted kmers from the input-data to a kmer index. Probablistic data structures ensure a fast lookup in this process. For a final prediction, the results are classified using a Support Vector Machine.
|
|
67
67
|
|
|
68
68
|
The tool is available as a web-based application and as a command line interface.
|
|
69
69
|
|
|
@@ -91,16 +91,22 @@ xspect models train ncbi
|
|
|
91
91
|
```
|
|
92
92
|
|
|
93
93
|
### How to run the web app
|
|
94
|
-
To run the web app,
|
|
94
|
+
To run the web app, simply execute:
|
|
95
95
|
```
|
|
96
96
|
xspect web
|
|
97
97
|
```
|
|
98
98
|
|
|
99
|
+
This will start a local web server. You can access the web app by navigating to `http://localhost:8000` in your web browser.
|
|
100
|
+
|
|
99
101
|
### How to use the XspecT command line interface
|
|
100
|
-
|
|
102
|
+
To use the XspecT command line interface, execute `xspect` with the desired subcommand and parameters.
|
|
103
|
+
|
|
104
|
+
**Example**:
|
|
101
105
|
```
|
|
102
106
|
xspect classify species
|
|
103
107
|
```
|
|
108
|
+
|
|
109
|
+
If you do not provide the required parameters, the command line interface will prompt you for them.
|
|
104
110
|
For further instructions on how to use the command line interface, please refer to the [documentation] or execute:
|
|
105
111
|
```
|
|
106
112
|
xspect --help
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# XspecT
|
|
1
|
+
# XspecT
|
|
2
2
|
<!-- start intro -->
|
|
3
3
|

|
|
4
4
|
[](https://github.com/pylint-dev/pylint)
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
XspecT is a Python-based tool to taxonomically classify sequence-reads (or assembled genomes) on the species and/or MLST level using [kmer indices] and a [Support Vector Machine].
|
|
8
8
|
|
|
9
|
-
XspecT utilizes the uniqueness of kmers and compares extracted kmers from the input-data to a
|
|
9
|
+
XspecT utilizes the uniqueness of kmers and compares extracted kmers from the input-data to a kmer index. Probablistic data structures ensure a fast lookup in this process. For a final prediction, the results are classified using a Support Vector Machine.
|
|
10
10
|
|
|
11
11
|
The tool is available as a web-based application and as a command line interface.
|
|
12
12
|
|
|
@@ -34,16 +34,22 @@ xspect models train ncbi
|
|
|
34
34
|
```
|
|
35
35
|
|
|
36
36
|
### How to run the web app
|
|
37
|
-
To run the web app,
|
|
37
|
+
To run the web app, simply execute:
|
|
38
38
|
```
|
|
39
39
|
xspect web
|
|
40
40
|
```
|
|
41
41
|
|
|
42
|
+
This will start a local web server. You can access the web app by navigating to `http://localhost:8000` in your web browser.
|
|
43
|
+
|
|
42
44
|
### How to use the XspecT command line interface
|
|
43
|
-
|
|
45
|
+
To use the XspecT command line interface, execute `xspect` with the desired subcommand and parameters.
|
|
46
|
+
|
|
47
|
+
**Example**:
|
|
44
48
|
```
|
|
45
49
|
xspect classify species
|
|
46
50
|
```
|
|
51
|
+
|
|
52
|
+
If you do not provide the required parameters, the command line interface will prompt you for them.
|
|
47
53
|
For further instructions on how to use the command line interface, please refer to the [documentation] or execute:
|
|
48
54
|
```
|
|
49
55
|
xspect --help
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# Contributing to XspecT
|
|
2
|
+
|
|
3
|
+
## Introduction
|
|
4
|
+
Thank you for your interest in contributing to XspecT! This page provides guidelines for contributing to the project, including how to set up your own development environment, the XspecT architecture, CI/CD, and the process for submitting contributions.
|
|
5
|
+
|
|
6
|
+
When contributing to XspecT, please follow the following steps to ensure a smooth process:
|
|
7
|
+
|
|
8
|
+
- **Read the documentation**: Familiarize yourself with the project by reading the [documentation](https://bionf.github.io/XspecT2/), including the [Understanding XspecT](understanding.md) page and the [architecture overview](#architecture-overview).
|
|
9
|
+
- **Follow the coding standards**: Adhere to the project's coding standards and best practices. This includes using consistent naming conventions, writing clear and concise code, and documentation. Furthermore, please make sure your changes are algined with the project's [architecture](#architecture-overview).
|
|
10
|
+
- **Write tests**: Ensure that your changes are covered by tests. We use [pytest](https://docs.pytest.org/en/stable/) for testing. If you add new features or fix bugs, please include tests to verify your changes.
|
|
11
|
+
- **Document your changes**: Update the documentation to reflect any new features or changes you make. This includes updating the README, Google-style docstrings, and the [Mkdocs](https://www.mkdocs.org)-based documentation.
|
|
12
|
+
- **Use clear commit messages**: When committing your changes, use clear and descriptive commit messages that explain the purpose of the changes.
|
|
13
|
+
- **Follow the pull request process**: When you're ready to submit your changes, follow the [pull request process](#pull-request-process) outlined below.
|
|
14
|
+
|
|
15
|
+
## Development Installation
|
|
16
|
+
To set up XspecT for development, first make sure you have [Python](https://www.python.org/downloads/) and [Node.js](https://nodejs.org/en/download/) installed. Please note that XspecT is currently not supported in Windows or Alpine Linux environments, unless you build [COBS](https://github.com/aromberg/cobs) yourself.
|
|
17
|
+
|
|
18
|
+
Get started by cloning the repository:
|
|
19
|
+
```bash
|
|
20
|
+
git clone https://github.com/BIONF/XspecT2.git
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
You then need to build the web application using Vite. Navigate to the `xspect-web` directory and run the build command, which will also watch for changes:
|
|
24
|
+
```bash
|
|
25
|
+
cd XspecT2/src/xspect/xspect-web
|
|
26
|
+
```
|
|
27
|
+
```bash
|
|
28
|
+
npx vite build --watch
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
Finally, in a separate terminal, navigate to the root of the cloned repository and install the Python package in editable mode:
|
|
32
|
+
```bash
|
|
33
|
+
pip install -e .
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
By combining the two processes, you can develop both the frontend and backend simultaneously.
|
|
37
|
+
|
|
38
|
+
## Architecture Overview
|
|
39
|
+
XspecT consists of a Python component (`src/xspect`) and a web application built with [Vite](https://vitejs.dev/) (`src/xspect/xspect-web`). The Python component provides the core functionality, including the command-line interface (CLI) and the backend API, while the web application provides a user-friendly interface for interacting with XspecT. Furthermore, tests for the Python component reside in the `tests/` directory, while documentation is provided in the `docs/` directory.
|
|
40
|
+
|
|
41
|
+
### Python Component
|
|
42
|
+
|
|
43
|
+
The Python component of XspecT is structured as follows:
|
|
44
|
+
|
|
45
|
+
- `main.py`: The entry point for the command-line interface (CLI) and the backend API.
|
|
46
|
+
- `web.py`: The [FastAPI](https://fastapi.tiangolo.com/) application that serves the web interface and handles API requests.
|
|
47
|
+
|
|
48
|
+
The core functionality of XspecT is implemented using the following modules:
|
|
49
|
+
|
|
50
|
+
- `classify.py`: Contains methods to classify sequences based on previously trained XspecT models.
|
|
51
|
+
- `filter_sequences.py`: Contains methods to filter sequences based on classification results.
|
|
52
|
+
- `model_management.py`: Contains methods to manage XspecT models.
|
|
53
|
+
- `train.py`: Contains methods to train XspecT models based on user-provided data or data from the NCBI/PubMLST API.
|
|
54
|
+
- `download_models.py`: Contains methods to download pre-trained XspecT models.
|
|
55
|
+
|
|
56
|
+
In the background, these modules utilize model classes and a result class, which are defined in the `/models/` folder.
|
|
57
|
+
|
|
58
|
+
- `/models/probabilistic_filter_model.py`: Base class for probabilistic filter models, which uses COBS indices for classification and stores the model's metadata. Results from the classification are stored in a `ModelResult` class.
|
|
59
|
+
- `/models/probabilistic_filter_svm_model.py`: This class extends the base model class and implements a probabilistic filter model, in which classification scores are passed to a support vector machine (SVM) for a final prediction. This model is typically used for species-level classification.
|
|
60
|
+
- `/models/probabilistic_filter_mlst_model.py`: This class extends the base model class and implements multilocus strain typing (MLST) by using multiple COBS indices.
|
|
61
|
+
- `/models/probabilistic_single_filter_model.py`: This class extends the base model class and implements a model that uses a single Bloom filter for classification. It is typically used for genus-level classification.
|
|
62
|
+
- `/models/result.py`: Contains the `ModelResult` class, which stores the results of a classification operation, including classification metadata, hits, and a prediction, if applicable.
|
|
63
|
+
|
|
64
|
+
Supplementary modules are documented in their respective files.
|
|
65
|
+
|
|
66
|
+
### Web Application
|
|
67
|
+
The web application (`src/xspect/xspect-web`) is built using Vite, [Axios](https://axios-http.com/), [Tailwind CSS](https://tailwindcss.com/), and [shadcn/ui](https://ui.shadcn.com/). It provides a user-friendly interface for interacting with XspecT and includes the following main components:
|
|
68
|
+
|
|
69
|
+
- `src/api.ts`: Contains the API client for making requests to the backend FastAPI application.
|
|
70
|
+
- `src/App.tsx`: The main application component that renders the user interface. It uses React Router for navigation and includes the main layout as well as routing logic.
|
|
71
|
+
- `src/assets/`: Contains static assets such as images and icons.
|
|
72
|
+
- `src/components/`: Contains reusable components for the user interface, such as buttons, forms, and modals.
|
|
73
|
+
- `src/components/ui/`: Contains UI components from shadcn/ui, which are used to build the user interface.
|
|
74
|
+
- `src/types.ts`: Contains TypeScript type definitions for the application, including types for API responses.
|
|
75
|
+
- `vite.config.ts`: The Vite configuration file that defines how the web application is built and served. Also includes a configuration for the API proxy to the FastAPI backend.
|
|
76
|
+
|
|
77
|
+
## Continuous Integration and Deployment
|
|
78
|
+
We use GitHub Actions to run checks on commits and pull requests. These checks include:
|
|
79
|
+
|
|
80
|
+
- **Code style and formatting**: Ensures that changes align with the project's code style. We use [Black](https://black.readthedocs.io/en/stable/) for Python code formatting.
|
|
81
|
+
- **Linting**: [Pylint](https://pylint.pycqa.org/en/latest/) is used for Python code linting. It checks for coding standards, potential errors, and code smells.
|
|
82
|
+
- **Tests**: Ensures that all tests pass. We use [pytest](https://docs.pytest.org/en/stable/) for testing.
|
|
83
|
+
|
|
84
|
+
Additionally, Github Actions are also used for deployment:
|
|
85
|
+
|
|
86
|
+
- **Documentation**: The Mkdocs-based documentation is built and deployed to GitHub Pages on changes to the `main` branch. You can view the documentation at [https://bionf.github.io/XspecT2/](https://bionf.github.io/XspecT2/).
|
|
87
|
+
- **Python package**: The Python package is built and uploaded to PyPI when a new release is created. This allows users to easily install the latest version of XspecT using `pip install xspect`. Pre-releases are uploaded to TestPyPI and can be installed using `pip install --index-url https://test.pypi.org/simple/ xspect`.
|
|
88
|
+
|
|
89
|
+
## Pull Request Process
|
|
90
|
+
Once you have made your changes and tested them, you can submit a pull request. Please follow these steps:
|
|
91
|
+
|
|
92
|
+
1. Ensure your code is up to date with the `dev` branch
|
|
93
|
+
2. Create a pull request with a clear description of your changes to the `dev` branch
|
|
94
|
+
3. Address any feedback from reviewers
|
|
95
|
+
4. Once approved, your changes will be merged
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# Understanding XspecT
|
|
2
|
+
|
|
3
|
+
## What is XspecT?
|
|
4
|
+
|
|
5
|
+
XspecT is a tool designed to monitor and characterize pathogens using exact pattern matching of kmers. It allows users to filter for pathogen sequences in metagenomic datasets, classify these sequences on a species level, and perform strain-level typing.
|
|
6
|
+
|
|
7
|
+
## Key Features
|
|
8
|
+
- **Genus-Level Classification**: Classify sequences at the genus level, enabling researchers to quickly identify the presence of specific microbial groups.
|
|
9
|
+
- **Species-Level Classification**: Provides detailed classification of sequences at the species level, enhancing the understanding of microbial diversity.
|
|
10
|
+
- **Multi-Locus Strain Typing**: Offers the ability to type sequences at the strain level, which is crucial for understanding variations within species.
|
|
11
|
+
- **Filtering**: Classification results can be used to filter sequences, enabling analysis of metagenomic samples.
|
|
12
|
+
- **Model Management**: XspecT models can be easily downloaded or trained from scratch using the command line interface. Training is possible both from local data, as well as from the NCBI Datasets and PubMLST API.
|
|
13
|
+
- **User-friendly Interface**: Next to the command line interface (CLI), a React-based web interface is available for easy interaction and visualization of results.
|
|
14
|
+
- **Works with Large Datasets**: Entire folders of input data can be passed to the tool, allowing for efficient processing of large datasets.
|
|
15
|
+
|
|
16
|
+
## How XspecT Works
|
|
17
|
+
At its core, XspecT uses exact pattern matching of kmers to identify and classify sequences. The tool leverages indices of known pathogen sequences stored in XspecT models to match against input data. This process involves:
|
|
18
|
+
|
|
19
|
+
1. **Kmer Extraction**: The input sequences are processed to extract kmers, which are short sequences of a fixed length.
|
|
20
|
+
2. **Pattern Matching**: The extracted kmers are matched against an index of known sequences using exact matching algorithms. The number of matches is recorded, and stored as hits.
|
|
21
|
+
3. **Classification**: Based on hits, scores are calculated as the fraction of kmers that match known sequences. These scores are then used to classify the sequences at different taxonomic levels.
|
|
22
|
+
|
|
23
|
+
### COBS Index
|
|
24
|
+
In order to store kmers in a space-efficient manner, XspecT uses a COBS ("Compact Bit-Sliced Signature Index") classic index. This index uses a probabilistic data structure to store kmers, allowing for efficient storage and retrieval. The COBS index is designed to handle large datasets while maintaining fast query performance. More information about the COBS index can be found in the [COBS research paper](https://arxiv.org/abs/1905.09624).
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: XspecT
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.2
|
|
4
4
|
Summary: Tool to monitor and characterize pathogens using Bloom filters.
|
|
5
5
|
License: MIT License
|
|
6
6
|
|
|
@@ -55,7 +55,7 @@ Requires-Dist: pytest-retry; extra == "test"
|
|
|
55
55
|
Requires-Dist: httpx; extra == "test"
|
|
56
56
|
Dynamic: license-file
|
|
57
57
|
|
|
58
|
-
# XspecT
|
|
58
|
+
# XspecT
|
|
59
59
|
<!-- start intro -->
|
|
60
60
|

|
|
61
61
|
[](https://github.com/pylint-dev/pylint)
|
|
@@ -63,7 +63,7 @@ Dynamic: license-file
|
|
|
63
63
|
|
|
64
64
|
XspecT is a Python-based tool to taxonomically classify sequence-reads (or assembled genomes) on the species and/or MLST level using [kmer indices] and a [Support Vector Machine].
|
|
65
65
|
|
|
66
|
-
XspecT utilizes the uniqueness of kmers and compares extracted kmers from the input-data to a
|
|
66
|
+
XspecT utilizes the uniqueness of kmers and compares extracted kmers from the input-data to a kmer index. Probablistic data structures ensure a fast lookup in this process. For a final prediction, the results are classified using a Support Vector Machine.
|
|
67
67
|
|
|
68
68
|
The tool is available as a web-based application and as a command line interface.
|
|
69
69
|
|
|
@@ -91,16 +91,22 @@ xspect models train ncbi
|
|
|
91
91
|
```
|
|
92
92
|
|
|
93
93
|
### How to run the web app
|
|
94
|
-
To run the web app,
|
|
94
|
+
To run the web app, simply execute:
|
|
95
95
|
```
|
|
96
96
|
xspect web
|
|
97
97
|
```
|
|
98
98
|
|
|
99
|
+
This will start a local web server. You can access the web app by navigating to `http://localhost:8000` in your web browser.
|
|
100
|
+
|
|
99
101
|
### How to use the XspecT command line interface
|
|
100
|
-
|
|
102
|
+
To use the XspecT command line interface, execute `xspect` with the desired subcommand and parameters.
|
|
103
|
+
|
|
104
|
+
**Example**:
|
|
101
105
|
```
|
|
102
106
|
xspect classify species
|
|
103
107
|
```
|
|
108
|
+
|
|
109
|
+
If you do not provide the required parameters, the command line interface will prompt you for them.
|
|
104
110
|
For further instructions on how to use the command line interface, please refer to the [documentation] or execute:
|
|
105
111
|
```
|
|
106
112
|
xspect --help
|
|
@@ -54,8 +54,8 @@ src/xspect/xspect-web/tsconfig.node.json
|
|
|
54
54
|
src/xspect/xspect-web/vite.config.ts
|
|
55
55
|
src/xspect/xspect-web/dist/index.html
|
|
56
56
|
src/xspect/xspect-web/dist/vite.svg
|
|
57
|
-
src/xspect/xspect-web/dist/assets/index-
|
|
58
|
-
src/xspect/xspect-web/dist/assets/index-
|
|
57
|
+
src/xspect/xspect-web/dist/assets/index-Ceo58xui.css
|
|
58
|
+
src/xspect/xspect-web/dist/assets/index-Dt_UlbgE.js
|
|
59
59
|
src/xspect/xspect-web/public/vite.svg
|
|
60
60
|
src/xspect/xspect-web/src/App.tsx
|
|
61
61
|
src/xspect/xspect-web/src/api.tsx
|
|
@@ -72,6 +72,7 @@ src/xspect/xspect-web/src/components/dropdown-checkboxes.tsx
|
|
|
72
72
|
src/xspect/xspect-web/src/components/dropdown-slider.tsx
|
|
73
73
|
src/xspect/xspect-web/src/components/filter-form.tsx
|
|
74
74
|
src/xspect/xspect-web/src/components/filter.tsx
|
|
75
|
+
src/xspect/xspect-web/src/components/filtering-result.tsx
|
|
75
76
|
src/xspect/xspect-web/src/components/header.tsx
|
|
76
77
|
src/xspect/xspect-web/src/components/landing.tsx
|
|
77
78
|
src/xspect/xspect-web/src/components/models-details.tsx
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from xspect.mlst_feature.mlst_helper import pick_scheme_from_models_dir
|
|
3
|
+
import xspect.model_management as mm
|
|
4
|
+
from xspect.models.probabilistic_filter_mlst_model import (
|
|
5
|
+
ProbabilisticFilterMlstSchemeModel,
|
|
6
|
+
)
|
|
7
|
+
from xspect.file_io import prepare_input_output_paths
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def classify_genus(
|
|
11
|
+
model_genus: str, input_path: Path, output_path: Path, step: int = 1
|
|
12
|
+
):
|
|
13
|
+
"""
|
|
14
|
+
Classify the genus of sequences.
|
|
15
|
+
|
|
16
|
+
This function classifies input files using the genus model.
|
|
17
|
+
The input path can be a file or directory
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
model_genus (str): The genus model slug.
|
|
21
|
+
input_path (Path): The path to the input file/directory containing sequences.
|
|
22
|
+
output_path (Path): The path to the output file where results will be saved.
|
|
23
|
+
step (int): The amount of kmers to be skipped.
|
|
24
|
+
"""
|
|
25
|
+
model = mm.get_genus_model(model_genus)
|
|
26
|
+
input_paths, get_output_path = prepare_input_output_paths(input_path)
|
|
27
|
+
|
|
28
|
+
for idx, current_path in enumerate(input_paths):
|
|
29
|
+
result = model.predict(current_path, step=step)
|
|
30
|
+
result.input_source = current_path.name
|
|
31
|
+
cls_path = get_output_path(idx, output_path)
|
|
32
|
+
result.save(cls_path)
|
|
33
|
+
print(f"Saved result as {cls_path.name}")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def classify_species(
|
|
37
|
+
model_genus: str, input_path: Path, output_path: Path, step: int = 1
|
|
38
|
+
):
|
|
39
|
+
"""
|
|
40
|
+
Classify the species of sequences.
|
|
41
|
+
|
|
42
|
+
This function classifies input files using the species model.
|
|
43
|
+
The input path can be a file or directory
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
model_genus (str): The genus model slug.
|
|
47
|
+
input_path (Path): The path to the input file/directory containing sequences.
|
|
48
|
+
output_path (Path): The path to the output file where results will be saved.
|
|
49
|
+
step (int): The amount of kmers to be skipped.
|
|
50
|
+
"""
|
|
51
|
+
model = mm.get_species_model(model_genus)
|
|
52
|
+
input_paths, get_output_path = prepare_input_output_paths(input_path)
|
|
53
|
+
|
|
54
|
+
for idx, current_path in enumerate(input_paths):
|
|
55
|
+
result = model.predict(current_path, step=step)
|
|
56
|
+
result.input_source = current_path.name
|
|
57
|
+
cls_path = get_output_path(idx, output_path)
|
|
58
|
+
result.save(cls_path)
|
|
59
|
+
print(f"Saved result as {cls_path.name}")
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def classify_mlst(input_path: Path, output_path: Path, limit: bool):
|
|
63
|
+
"""
|
|
64
|
+
Classify the strain type using the specific MLST model.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
input_path (Path): The path to the input file/directory containing sequences.
|
|
68
|
+
output_path (Path): The path to the output file where results will be saved.
|
|
69
|
+
limit (bool): A limit for the highest allele_id results that are shown.
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
scheme_path = pick_scheme_from_models_dir()
|
|
73
|
+
model = ProbabilisticFilterMlstSchemeModel.load(scheme_path)
|
|
74
|
+
input_paths, get_output_path = prepare_input_output_paths(input_path)
|
|
75
|
+
for idx, current_path in enumerate(input_paths):
|
|
76
|
+
result = model.predict(scheme_path, current_path, step=1, limit=limit)
|
|
77
|
+
result.input_source = current_path.name
|
|
78
|
+
cls_path = get_output_path(idx, output_path)
|
|
79
|
+
result.save(cls_path)
|
|
80
|
+
print(f"Saved result as {cls_path.name}")
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""This module contains definitions for the XspecT package."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from os import getcwd
|
|
5
|
+
|
|
6
|
+
fasta_endings = ["fasta", "fna", "fa", "ffn", "frn"]
|
|
7
|
+
fastq_endings = ["fastq", "fq"]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def get_xspect_root_path() -> Path:
|
|
11
|
+
"""
|
|
12
|
+
Return the root path for XspecT data.
|
|
13
|
+
|
|
14
|
+
Returns the path to the XspecT data directory, which can be located either in the user's home directory or in the current working directory.
|
|
15
|
+
If neither exists, it creates the directory in the user's home directory.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
Path: The path to the XspecT data directory.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
home_based_dir = Path.home() / "xspect-data"
|
|
22
|
+
if home_based_dir.exists():
|
|
23
|
+
return home_based_dir
|
|
24
|
+
|
|
25
|
+
cwd_based_dir = Path(getcwd()) / "xspect-data"
|
|
26
|
+
if cwd_based_dir.exists():
|
|
27
|
+
return cwd_based_dir
|
|
28
|
+
|
|
29
|
+
home_based_dir.mkdir(exist_ok=True, parents=True)
|
|
30
|
+
return home_based_dir
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def get_xspect_model_path() -> Path:
|
|
34
|
+
"""
|
|
35
|
+
Return the path to the XspecT models.
|
|
36
|
+
|
|
37
|
+
Returns the path to the XspecT models directory, which is located within the XspecT data directory.
|
|
38
|
+
If the directory does not exist, it creates the directory.
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
Path: The path to the XspecT models directory.
|
|
42
|
+
"""
|
|
43
|
+
model_path = get_xspect_root_path() / "models"
|
|
44
|
+
model_path.mkdir(exist_ok=True, parents=True)
|
|
45
|
+
return model_path
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def get_xspect_upload_path() -> Path:
|
|
49
|
+
"""
|
|
50
|
+
Return the path to the XspecT upload directory.
|
|
51
|
+
|
|
52
|
+
Returns the path to the XspecT uploads directory, which is located within the XspecT data directory.
|
|
53
|
+
If the directory does not exist, it creates the directory.
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
Path: The path to the XspecT uploads directory.
|
|
57
|
+
"""
|
|
58
|
+
upload_path = get_xspect_root_path() / "uploads"
|
|
59
|
+
upload_path.mkdir(exist_ok=True, parents=True)
|
|
60
|
+
return upload_path
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def get_xspect_runs_path() -> Path:
|
|
64
|
+
"""
|
|
65
|
+
Return the path to the XspecT runs directory.
|
|
66
|
+
|
|
67
|
+
Returns the path to the XspecT runs directory, which is located within the XspecT data directory.
|
|
68
|
+
If the directory does not exist, it creates the directory.
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
Path: The path to the XspecT runs directory.
|
|
72
|
+
"""
|
|
73
|
+
runs_path = get_xspect_root_path() / "runs"
|
|
74
|
+
runs_path.mkdir(exist_ok=True, parents=True)
|
|
75
|
+
return runs_path
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def get_xspect_mlst_path() -> Path:
|
|
79
|
+
"""
|
|
80
|
+
Return the path to the XspecT MLST directory.
|
|
81
|
+
|
|
82
|
+
Returns the path to the XspecT MLST directory, which is located within the XspecT data directory.
|
|
83
|
+
If the directory does not exist, it creates the directory.
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
Path: The path to the XspecT MLST directory.
|
|
87
|
+
"""
|
|
88
|
+
mlst_path = get_xspect_root_path() / "mlst"
|
|
89
|
+
mlst_path.mkdir(exist_ok=True, parents=True)
|
|
90
|
+
return mlst_path
|
|
@@ -8,8 +8,16 @@ import requests
|
|
|
8
8
|
from xspect.definitions import get_xspect_model_path
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
def download_test_models(url):
|
|
12
|
-
"""
|
|
11
|
+
def download_test_models(url: str) -> None:
|
|
12
|
+
"""
|
|
13
|
+
Download models from the specified URL.
|
|
14
|
+
|
|
15
|
+
This function downloads a zip file from the given URL, extracts its contents,
|
|
16
|
+
and copies the extracted files to the XspecT model directory.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
url (str): The URL from which to download the models.
|
|
20
|
+
"""
|
|
13
21
|
with TemporaryDirectory() as tmp_dir:
|
|
14
22
|
tmp_dir = Path(tmp_dir)
|
|
15
23
|
download_path = tmp_dir / "models.zip"
|