XspecT 0.1.3__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of XspecT might be problematic. Click here for more details.
- {XspecT-0.1.3.dist-info → XspecT-0.2.0.dist-info}/METADATA +23 -29
- XspecT-0.2.0.dist-info/RECORD +30 -0
- {XspecT-0.1.3.dist-info → XspecT-0.2.0.dist-info}/WHEEL +1 -1
- xspect/definitions.py +42 -0
- xspect/download_filters.py +11 -26
- xspect/fastapi.py +101 -0
- xspect/file_io.py +34 -103
- xspect/main.py +70 -66
- xspect/model_management.py +88 -0
- xspect/models/__init__.py +0 -0
- xspect/models/probabilistic_filter_model.py +277 -0
- xspect/models/probabilistic_filter_svm_model.py +169 -0
- xspect/models/probabilistic_single_filter_model.py +109 -0
- xspect/models/result.py +148 -0
- xspect/pipeline.py +201 -0
- xspect/run.py +38 -0
- xspect/train.py +304 -0
- xspect/train_filter/create_svm.py +6 -183
- xspect/train_filter/extract_and_concatenate.py +117 -121
- xspect/train_filter/html_scrap.py +16 -28
- xspect/train_filter/ncbi_api/download_assemblies.py +7 -8
- xspect/train_filter/ncbi_api/ncbi_assembly_metadata.py +9 -17
- xspect/train_filter/ncbi_api/ncbi_children_tree.py +3 -2
- xspect/train_filter/ncbi_api/ncbi_taxon_metadata.py +7 -5
- XspecT-0.1.3.dist-info/RECORD +0 -49
- xspect/BF_v2.py +0 -637
- xspect/Bootstrap.py +0 -29
- xspect/Classifier.py +0 -142
- xspect/OXA_Table.py +0 -53
- xspect/WebApp.py +0 -724
- xspect/XspecT_mini.py +0 -1363
- xspect/XspecT_trainer.py +0 -611
- xspect/map_kmers.py +0 -155
- xspect/search_filter.py +0 -504
- xspect/static/How-To.png +0 -0
- xspect/static/Logo.png +0 -0
- xspect/static/Logo2.png +0 -0
- xspect/static/Workflow_AspecT.png +0 -0
- xspect/static/Workflow_ClAssT.png +0 -0
- xspect/static/js.js +0 -615
- xspect/static/main.css +0 -280
- xspect/templates/400.html +0 -64
- xspect/templates/401.html +0 -62
- xspect/templates/404.html +0 -62
- xspect/templates/500.html +0 -62
- xspect/templates/about.html +0 -544
- xspect/templates/home.html +0 -51
- xspect/templates/layoutabout.html +0 -87
- xspect/templates/layouthome.html +0 -63
- xspect/templates/layoutspecies.html +0 -468
- xspect/templates/species.html +0 -33
- xspect/train_filter/README_XspecT_Erweiterung.md +0 -119
- xspect/train_filter/get_paths.py +0 -35
- xspect/train_filter/interface_XspecT.py +0 -204
- xspect/train_filter/k_mer_count.py +0 -162
- {XspecT-0.1.3.dist-info → XspecT-0.2.0.dist-info}/LICENSE +0 -0
- {XspecT-0.1.3.dist-info → XspecT-0.2.0.dist-info}/entry_points.txt +0 -0
- {XspecT-0.1.3.dist-info → XspecT-0.2.0.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: XspecT
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Tool to monitor and characterize pathogens using Bloom filters.
|
|
5
5
|
License: MIT License
|
|
6
6
|
|
|
@@ -32,25 +32,19 @@ Classifier: License :: OSI Approved :: MIT License
|
|
|
32
32
|
Requires-Python: >=3.10
|
|
33
33
|
Description-Content-Type: text/markdown
|
|
34
34
|
License-File: LICENSE
|
|
35
|
-
Requires-Dist: Flask
|
|
36
|
-
Requires-Dist: Flask-WTF
|
|
37
|
-
Requires-Dist: WTForms
|
|
38
|
-
Requires-Dist: Werkzeug
|
|
39
35
|
Requires-Dist: biopython
|
|
40
|
-
Requires-Dist: bitarray
|
|
41
|
-
Requires-Dist: mmh3
|
|
42
|
-
Requires-Dist: numpy
|
|
43
|
-
Requires-Dist: pandas
|
|
44
36
|
Requires-Dist: requests
|
|
45
37
|
Requires-Dist: scikit-learn
|
|
46
|
-
Requires-Dist: Psutil
|
|
47
|
-
Requires-Dist: Matplotlib
|
|
48
|
-
Requires-Dist: Pympler
|
|
49
|
-
Requires-Dist: H5py
|
|
50
38
|
Requires-Dist: Bio
|
|
51
|
-
Requires-Dist: wheel
|
|
52
39
|
Requires-Dist: loguru
|
|
53
40
|
Requires-Dist: click
|
|
41
|
+
Requires-Dist: python-slugify
|
|
42
|
+
Requires-Dist: cobs-reloaded
|
|
43
|
+
Requires-Dist: rbloom
|
|
44
|
+
Requires-Dist: xxhash
|
|
45
|
+
Requires-Dist: fastapi
|
|
46
|
+
Requires-Dist: uvicorn
|
|
47
|
+
Requires-Dist: python-multipart
|
|
54
48
|
Provides-Extra: docs
|
|
55
49
|
Requires-Dist: sphinx ; extra == 'docs'
|
|
56
50
|
Requires-Dist: furo ; extra == 'docs'
|
|
@@ -62,9 +56,13 @@ Requires-Dist: pytest ; extra == 'test'
|
|
|
62
56
|
Requires-Dist: pytest-cov ; extra == 'test'
|
|
63
57
|
|
|
64
58
|
# XspecT - Acinetobacter Species Assignment Tool
|
|
65
|
-
|
|
59
|
+

|
|
60
|
+
[](https://github.com/pylint-dev/pylint)
|
|
61
|
+
[](https://github.com/psf/black)
|
|
62
|
+
|
|
63
|
+
<img src="/src/docs/img/logo.png" height="50%" width="50%">
|
|
66
64
|
<!-- start intro -->
|
|
67
|
-
XspecT is a Python-based tool to taxonomically classify sequence-reads (or assembled genomes) on the species and/or sub-type level using [Bloom Filters]
|
|
65
|
+
XspecT is a Python-based tool to taxonomically classify sequence-reads (or assembled genomes) on the species and/or sub-type level using [Bloom Filters] and a [Support Vector Machine]. It also identifies existing [blaOxa-genes] and provides a list of relevant research papers for further information.
|
|
68
66
|
<br/><br/>
|
|
69
67
|
|
|
70
68
|
XspecT utilizes the uniqueness of kmers and compares extracted kmers from the input-data to a reference database. Bloom Filter ensure a fast lookup in this process. For a final prediction the results are classified using a Support Vector Machine.
|
|
@@ -74,6 +72,10 @@ Local extensions of the reference database are supported.
|
|
|
74
72
|
<br/>
|
|
75
73
|
|
|
76
74
|
The tool is available as a web-based application and a smaller command line interface.
|
|
75
|
+
|
|
76
|
+
[Bloom Filters]: https://en.wikipedia.org/wiki/Bloom_filter
|
|
77
|
+
[Support Vector Machine]: https://en.wikipedia.org/wiki/Support-vector_machine
|
|
78
|
+
[blaOxa-genes]: https://en.wikipedia.org/wiki/Beta-lactamase#OXA_beta-lactamases_(class_D)
|
|
77
79
|
<!-- end intro -->
|
|
78
80
|
|
|
79
81
|
<!-- start quickstart -->
|
|
@@ -82,11 +84,7 @@ To install Xspect, please download the lastest 64 bit Python version and install
|
|
|
82
84
|
```
|
|
83
85
|
pip install xspect
|
|
84
86
|
```
|
|
85
|
-
|
|
86
|
-
```
|
|
87
|
-
conda install -c bioconda jellyfish
|
|
88
|
-
```
|
|
89
|
-
On Apple Silicon, it is possible that this command installs an incorrect Jellyfish package. Please refer to the official [Jellyfish project](https://github.com/gmarcais/Jellyfish) for installation guidance.
|
|
87
|
+
Please note that Apple Silicon is currently not supported.
|
|
90
88
|
|
|
91
89
|
## Usage
|
|
92
90
|
### Get the Bloomfilters
|
|
@@ -100,9 +98,9 @@ xspect train you-ncbi-genus-name
|
|
|
100
98
|
```
|
|
101
99
|
|
|
102
100
|
### How to run the web app
|
|
103
|
-
|
|
101
|
+
To run the web app, install and run [XspecT Web](https://github.com/aromberg/xspect-web). Additionally, run XspecT in API mode:
|
|
104
102
|
```
|
|
105
|
-
xspect
|
|
103
|
+
xspect api
|
|
106
104
|
```
|
|
107
105
|
|
|
108
106
|
### How to use the XspecT command line interface
|
|
@@ -110,13 +108,9 @@ Run xspect with the configuration you want to run it with as arguments.
|
|
|
110
108
|
```
|
|
111
109
|
xspect classify your-genus path/to/your/input-set
|
|
112
110
|
```
|
|
113
|
-
For further instructions on how to use the command line interface, execute:
|
|
111
|
+
For further instructions on how to use the command line interface, please refer to the [documentation] or execute:
|
|
114
112
|
```
|
|
115
113
|
xspect --help
|
|
116
114
|
```
|
|
115
|
+
[documentation]: https://bionf.github.io/XspecT2/cli.html
|
|
117
116
|
<!-- end quickstart -->
|
|
118
|
-
|
|
119
|
-
## Input Data
|
|
120
|
-
XspecT is able to use either raw sequence-reads (FASTQ-format .fq/.fastq) or already assembled genomes (FASTA-format .fasta/.fna). Using sequence-reads saves up the assembly process but high-quality reads with a low error-rate are needed (e.g. Illumina-reads).
|
|
121
|
-
|
|
122
|
-
The amount of reads that will be used has to be set by the user when using sequence-reads. The minimum amount is 5000 reads for species classification and 500 reads for sub-type classification. The maximum number of reads is limited by the browser and is usually around ~8 million reads. Using more reads will lead to a increased runtime (xsec./1mio reads).
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
xspect/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
xspect/definitions.py,sha256=gg6NvT8ypNzlnJvMMo3nHsyh8DHFFu41lOfnILkRDpE,1215
|
|
3
|
+
xspect/download_filters.py,sha256=ByE7Oggx-AyJ02Wirk_wcJHNdRDrJMfjwhmUe5tgWbE,741
|
|
4
|
+
xspect/fastapi.py,sha256=UuUr3eQUL0tCcB2d_ZKMToqreNLSNRKpCKK3-lwAzVo,3208
|
|
5
|
+
xspect/file_io.py,sha256=zKhl6Fd9KZAYiD8YgIyje5TbDYk5lxMp1WUrNkGSBo8,2779
|
|
6
|
+
xspect/main.py,sha256=rFoHKBC9UANlZh3TccZAJbOZ6023BnQaGEoPjjJjW0A,3572
|
|
7
|
+
xspect/model_management.py,sha256=w0aqjLUoixCokyKTYrcN1vih5IoLYLJG9p8aeYdVc8Y,3560
|
|
8
|
+
xspect/pipeline.py,sha256=h7duhVZ-hupwO_KQPstzFo8KMfMI2yleb9HmtTiMjic,7219
|
|
9
|
+
xspect/run.py,sha256=OJ7pCFqva3AhIYklKjVnqWGooVRO7S3b56kIAy-xabY,1189
|
|
10
|
+
xspect/train.py,sha256=khC1lldqfr4NvzLUiSJjSlh7DBG1ePielvQMiB29Hl8,10399
|
|
11
|
+
xspect/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
+
xspect/models/probabilistic_filter_model.py,sha256=ImyNRzR7jf2CBPGI65ItG0_eYmrQjo9soQYlsM0r-P0,9829
|
|
13
|
+
xspect/models/probabilistic_filter_svm_model.py,sha256=9Q4SBAzgbqATpS2E3IoardPpBwqkyrYSnrMwh0zwSag,5420
|
|
14
|
+
xspect/models/probabilistic_single_filter_model.py,sha256=nDAd_-_Ci2eH0KOJtf4wA-w63FMq9rGSR1LGiIA-gdw,3884
|
|
15
|
+
xspect/models/result.py,sha256=vHUEFXvbFyB8WmasXp99IrztjwaxH1f9QMFiRUPe40Q,4824
|
|
16
|
+
xspect/train_filter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
|
+
xspect/train_filter/create_svm.py,sha256=w6gq40yHINVfNzLhJfYFykUaNCwpU9AEDcbkUfis3DY,1504
|
|
18
|
+
xspect/train_filter/extract_and_concatenate.py,sha256=lLrczGgfZi2vAGqxq8fcEmJi5pvqyK33JkB_ZoCNYG8,4840
|
|
19
|
+
xspect/train_filter/html_scrap.py,sha256=76VV_ZbvD2I3IxRb62SiQwRPu2tr4fwn1HkfJQYaosM,3809
|
|
20
|
+
xspect/train_filter/ncbi_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
|
+
xspect/train_filter/ncbi_api/download_assemblies.py,sha256=MB_mxSjCTL05DqIt1WQem8AGU3PjtJnzPndeI9J-AOI,1285
|
|
22
|
+
xspect/train_filter/ncbi_api/ncbi_assembly_metadata.py,sha256=puzDIws-yyBAEHwSAIYUM7g8FpLFmvOKh5xH1EsY8ZE,3830
|
|
23
|
+
xspect/train_filter/ncbi_api/ncbi_children_tree.py,sha256=_8puOsnsKp5lsMV2gZY1ijkfD_BZKG9eXZCX09qph5E,1819
|
|
24
|
+
xspect/train_filter/ncbi_api/ncbi_taxon_metadata.py,sha256=O6JDXC4E6AYaf7NPnb34eSJyZhMB8r--bjoVF_ZsEdA,1868
|
|
25
|
+
XspecT-0.2.0.dist-info/LICENSE,sha256=bhBGDKIRUVwYIHGOGO5hshzuVHyqFJajvSOA3XXOLKI,1094
|
|
26
|
+
XspecT-0.2.0.dist-info/METADATA,sha256=efT3SkWV55firuZJh1gHCN7061Fxda7teuFLeZHvJQ0,4826
|
|
27
|
+
XspecT-0.2.0.dist-info/WHEEL,sha256=Mdi9PDNwEZptOjTlUcAth7XJDFtKrHYaQMPulZeBCiQ,91
|
|
28
|
+
XspecT-0.2.0.dist-info/entry_points.txt,sha256=L7qliX3pIuwupQxpuOSsrBJCSHYPOPNEzH8KZKQGGUw,43
|
|
29
|
+
XspecT-0.2.0.dist-info/top_level.txt,sha256=hdoa4cnBv6OVzpyhMmyxpJxEydH5n2lDciy8urc1paE,7
|
|
30
|
+
XspecT-0.2.0.dist-info/RECORD,,
|
xspect/definitions.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""This module contains definitions for the XspecT package."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from os import getcwd
|
|
5
|
+
|
|
6
|
+
fasta_endings = ["fasta", "fna", "fa", "ffn", "frn"]
|
|
7
|
+
fastq_endings = ["fastq", "fq"]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def get_xspect_root_path():
|
|
11
|
+
"""Return the root path for XspecT data."""
|
|
12
|
+
root_path = Path(getcwd()) / "xspect-data"
|
|
13
|
+
root_path.mkdir(exist_ok=True, parents=True)
|
|
14
|
+
return root_path
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def get_xspect_model_path():
|
|
18
|
+
"""Return the path to the XspecT models."""
|
|
19
|
+
model_path = get_xspect_root_path() / "models"
|
|
20
|
+
model_path.mkdir(exist_ok=True, parents=True)
|
|
21
|
+
return model_path
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def get_xspect_tmp_path():
|
|
25
|
+
"""Return the path to the XspecT temporary files."""
|
|
26
|
+
tmp_path = get_xspect_root_path() / "tmp"
|
|
27
|
+
tmp_path.mkdir(exist_ok=True, parents=True)
|
|
28
|
+
return tmp_path
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def get_xspect_upload_path():
|
|
32
|
+
"""Return the path to the XspecT upload directory."""
|
|
33
|
+
upload_path = get_xspect_root_path() / "uploads"
|
|
34
|
+
upload_path.mkdir(exist_ok=True, parents=True)
|
|
35
|
+
return upload_path
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def get_xspect_runs_path():
|
|
39
|
+
"""Return the path to the XspecT runs directory."""
|
|
40
|
+
runs_path = get_xspect_root_path() / "runs"
|
|
41
|
+
runs_path.mkdir(exist_ok=True, parents=True)
|
|
42
|
+
return runs_path
|
xspect/download_filters.py
CHANGED
|
@@ -4,45 +4,30 @@ import os
|
|
|
4
4
|
import shutil
|
|
5
5
|
import requests
|
|
6
6
|
|
|
7
|
+
from xspect.definitions import get_xspect_model_path, get_xspect_tmp_path
|
|
8
|
+
|
|
7
9
|
|
|
8
10
|
def download_test_filters(url):
|
|
9
11
|
"""Download filters."""
|
|
10
12
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
if not os.path.exists("Training_data"):
|
|
15
|
-
os.makedirs("Training_data")
|
|
13
|
+
download_path = get_xspect_tmp_path() / "models.zip"
|
|
14
|
+
extract_path = get_xspect_tmp_path() / "extracted_models"
|
|
16
15
|
|
|
17
16
|
r = requests.get(url, allow_redirects=True, timeout=10)
|
|
18
|
-
with open(
|
|
17
|
+
with open(download_path, "wb") as f:
|
|
19
18
|
f.write(r.content)
|
|
20
19
|
|
|
21
20
|
shutil.unpack_archive(
|
|
22
|
-
|
|
23
|
-
|
|
21
|
+
download_path,
|
|
22
|
+
extract_path,
|
|
24
23
|
"zip",
|
|
25
24
|
)
|
|
26
25
|
|
|
27
26
|
shutil.copytree(
|
|
28
|
-
|
|
29
|
-
|
|
27
|
+
extract_path,
|
|
28
|
+
get_xspect_model_path(),
|
|
30
29
|
dirs_exist_ok=True,
|
|
31
30
|
)
|
|
32
31
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
shutil.copytree(
|
|
36
|
-
"filter/temp/filters",
|
|
37
|
-
"filter",
|
|
38
|
-
dirs_exist_ok=True,
|
|
39
|
-
)
|
|
40
|
-
|
|
41
|
-
shutil.rmtree("filter/temp")
|
|
42
|
-
|
|
43
|
-
os.remove("filter/filters.zip")
|
|
44
|
-
|
|
45
|
-
saved_options = ["Salmonella"]
|
|
46
|
-
with open("saved_options.txt", "w") as f:
|
|
47
|
-
for item in saved_options:
|
|
48
|
-
f.write("%s\n" % item)
|
|
32
|
+
os.remove(download_path)
|
|
33
|
+
shutil.rmtree(extract_path)
|
xspect/fastapi.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"""FastAPI application for XspecT."""
|
|
2
|
+
|
|
3
|
+
import datetime
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from shutil import copyfileobj
|
|
6
|
+
from fastapi import FastAPI, UploadFile, BackgroundTasks
|
|
7
|
+
from xspect.definitions import get_xspect_runs_path, get_xspect_upload_path
|
|
8
|
+
from xspect.download_filters import download_test_filters
|
|
9
|
+
import xspect.model_management as mm
|
|
10
|
+
from xspect.models.result import StepType
|
|
11
|
+
from xspect.pipeline import ModelExecution, Pipeline, PipelineStep
|
|
12
|
+
from xspect.train import train_ncbi
|
|
13
|
+
|
|
14
|
+
app = FastAPI()
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@app.get("/download-filters")
|
|
18
|
+
def download_filters():
|
|
19
|
+
"""Download filters."""
|
|
20
|
+
download_test_filters("https://xspect2.s3.eu-central-1.amazonaws.com/models.zip")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@app.get("/classify")
|
|
24
|
+
def classify(genus: str, file: str, meta: bool = False, step: int = 500):
|
|
25
|
+
"""Classify uploaded sample."""
|
|
26
|
+
|
|
27
|
+
path = get_xspect_upload_path() / file
|
|
28
|
+
|
|
29
|
+
pipeline = Pipeline(genus + " classification", "Test Author", "test@example.com")
|
|
30
|
+
species_execution = ModelExecution(genus + "-species", sparse_sampling_step=step)
|
|
31
|
+
if meta:
|
|
32
|
+
species_filtering_step = PipelineStep(
|
|
33
|
+
StepType.FILTERING, genus, 0.7, species_execution
|
|
34
|
+
)
|
|
35
|
+
genus_execution = ModelExecution(genus + "-genus", sparse_sampling_step=step)
|
|
36
|
+
genus_execution.add_pipeline_step(species_filtering_step)
|
|
37
|
+
pipeline.add_pipeline_step(genus_execution)
|
|
38
|
+
else:
|
|
39
|
+
pipeline.add_pipeline_step(species_execution)
|
|
40
|
+
|
|
41
|
+
run = pipeline.run(Path(path))
|
|
42
|
+
time_str = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
|
|
43
|
+
save_path = get_xspect_runs_path() / f"run_{time_str}.json"
|
|
44
|
+
run.save(save_path)
|
|
45
|
+
|
|
46
|
+
return run.to_dict()
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@app.post("/train")
|
|
50
|
+
def train(genus: str, background_tasks: BackgroundTasks, svm_steps: int = 1):
|
|
51
|
+
"""Train NCBI model."""
|
|
52
|
+
background_tasks.add_task(train_ncbi, genus, svm_steps)
|
|
53
|
+
|
|
54
|
+
return {"message": "Training started."}
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@app.get("/list-models")
|
|
58
|
+
def list_models():
|
|
59
|
+
"""List available models."""
|
|
60
|
+
return mm.get_models()
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@app.get("/model-metadata")
|
|
64
|
+
def get_model_metadata(model_slug: str):
|
|
65
|
+
"""Get metadata of a model."""
|
|
66
|
+
return mm.get_model_metadata(model_slug)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@app.post("/model-metadata")
|
|
70
|
+
def post_model_metadata(model_slug: str, author: str, author_email: str):
|
|
71
|
+
"""Update metadata of a model."""
|
|
72
|
+
try:
|
|
73
|
+
mm.update_model_metadata(model_slug, author, author_email)
|
|
74
|
+
except ValueError as e:
|
|
75
|
+
return {"error": str(e)}
|
|
76
|
+
return {"message": "Metadata updated."}
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@app.post("/model-display-name")
|
|
80
|
+
def post_model_display_name(model_slug: str, filter_id: str, display_name: str):
|
|
81
|
+
"""Update display name of a filter in a model."""
|
|
82
|
+
try:
|
|
83
|
+
mm.update_model_display_name(model_slug, filter_id, display_name)
|
|
84
|
+
except ValueError as e:
|
|
85
|
+
return {"error": str(e)}
|
|
86
|
+
return {"message": "Display name updated."}
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@app.post("/upload-file")
|
|
90
|
+
def upload_file(file: UploadFile):
|
|
91
|
+
"""Upload file to the server."""
|
|
92
|
+
upload_path = get_xspect_upload_path() / file.filename
|
|
93
|
+
|
|
94
|
+
if not upload_path.exists():
|
|
95
|
+
try:
|
|
96
|
+
with upload_path.open("wb") as buffer:
|
|
97
|
+
copyfileobj(file.file, buffer)
|
|
98
|
+
finally:
|
|
99
|
+
file.file.close()
|
|
100
|
+
|
|
101
|
+
return {"filename": file.filename}
|
xspect/file_io.py
CHANGED
|
@@ -2,107 +2,11 @@
|
|
|
2
2
|
File IO module.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
from linecache import getline
|
|
6
5
|
import os
|
|
7
6
|
from pathlib import Path
|
|
8
7
|
import zipfile
|
|
9
|
-
|
|
10
|
-
from
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
def check_folder_structure():
|
|
14
|
-
"""Checks the folder structure and creates new folders if needed."""
|
|
15
|
-
# Create list of all folder paths.
|
|
16
|
-
root_path = Path(os.getcwd())
|
|
17
|
-
filter_path = root_path / "filter"
|
|
18
|
-
meta_path = root_path / "genus_metadata"
|
|
19
|
-
filter_folder_names = [
|
|
20
|
-
"array_sizes",
|
|
21
|
-
"Metagenomes",
|
|
22
|
-
"species_names",
|
|
23
|
-
"translation_dicts",
|
|
24
|
-
]
|
|
25
|
-
folder_paths = [filter_path, meta_path]
|
|
26
|
-
for filter_folder_name in filter_folder_names:
|
|
27
|
-
filter_folder_path = filter_path / filter_folder_name
|
|
28
|
-
folder_paths.append(filter_folder_path)
|
|
29
|
-
|
|
30
|
-
# Check if folders exist. If not create them.
|
|
31
|
-
for folder_path in folder_paths:
|
|
32
|
-
if not os.path.isdir(folder_path):
|
|
33
|
-
os.mkdir(folder_path)
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
def delete_non_fasta(files):
|
|
37
|
-
"""Delete all non fasta files from the list and return the list without those file names.
|
|
38
|
-
|
|
39
|
-
:param files: List of file names.
|
|
40
|
-
:type files: list[str]
|
|
41
|
-
:return: List with only fasta files.
|
|
42
|
-
"""
|
|
43
|
-
# All possible fasta file endings.
|
|
44
|
-
fasta_endings = ["fasta", "fna", "fa", "ffn", "frn"]
|
|
45
|
-
|
|
46
|
-
# Iterate through file list backwards and delete all non fasta files.
|
|
47
|
-
for i in range(len(files) - 1, -1, -1):
|
|
48
|
-
file = files[i].split(".")
|
|
49
|
-
if file[-1] in fasta_endings:
|
|
50
|
-
continue
|
|
51
|
-
else:
|
|
52
|
-
del files[i]
|
|
53
|
-
|
|
54
|
-
return files
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
def get_accessions(file_names: list[str]) -> list[str]:
|
|
58
|
-
"""Extract accessions from file names.
|
|
59
|
-
|
|
60
|
-
:param files: List of file names.
|
|
61
|
-
:type files: list[str]
|
|
62
|
-
:return: List of all accessions.
|
|
63
|
-
:rtype: list[str]
|
|
64
|
-
"""
|
|
65
|
-
accessions = []
|
|
66
|
-
for idx, file in enumerate(file_names):
|
|
67
|
-
accessions.append(file.split("_"))
|
|
68
|
-
accessions[idx] = accessions[idx][0] + "_" + accessions[idx][1]
|
|
69
|
-
|
|
70
|
-
return accessions
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
def get_file_paths(base_path: Path, file_names: list[str]) -> list[Path]:
|
|
74
|
-
"""Make a list with the paths to the files.
|
|
75
|
-
|
|
76
|
-
:param base_path: Path of the parent directory.
|
|
77
|
-
:type base_path: Path
|
|
78
|
-
:param files: List of file names.
|
|
79
|
-
:type files: list[str]
|
|
80
|
-
:return: A list with all file paths.
|
|
81
|
-
:rtype: list[Path]
|
|
82
|
-
"""
|
|
83
|
-
return [base_path / file for file in file_names]
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
def get_species_names(file_paths: list[Path]):
|
|
87
|
-
"""Extracts the species names.
|
|
88
|
-
|
|
89
|
-
:param file_paths: List with the file paths.
|
|
90
|
-
:type file_paths: list[Path]
|
|
91
|
-
:return: List with all species names.
|
|
92
|
-
"""
|
|
93
|
-
names = list()
|
|
94
|
-
for path in file_paths:
|
|
95
|
-
header = getline(str(path), 1)
|
|
96
|
-
name = header.replace("\n", "").replace(">", "")
|
|
97
|
-
if not name.isdigit():
|
|
98
|
-
logger.error(
|
|
99
|
-
"The header of file: {path} does not contain a correct ID: {name}. The ID needs to be "
|
|
100
|
-
"just numbers"
|
|
101
|
-
)
|
|
102
|
-
logger.error("Aborting")
|
|
103
|
-
exit()
|
|
104
|
-
names.append(name)
|
|
105
|
-
return names
|
|
8
|
+
from Bio import SeqIO
|
|
9
|
+
from xspect.definitions import fasta_endings, fastq_endings
|
|
106
10
|
|
|
107
11
|
|
|
108
12
|
def delete_zip_files(dir_path):
|
|
@@ -129,7 +33,7 @@ def extract_zip(zip_path, unzipped_path):
|
|
|
129
33
|
|
|
130
34
|
|
|
131
35
|
def concatenate_meta(path: Path, genus: str):
|
|
132
|
-
"""Concatenates all
|
|
36
|
+
"""Concatenates all species files to one fasta file.
|
|
133
37
|
|
|
134
38
|
:param path: Path to the directory with the concatenated fasta files.
|
|
135
39
|
:type path: Path
|
|
@@ -137,20 +41,47 @@ def concatenate_meta(path: Path, genus: str):
|
|
|
137
41
|
:type genus: str
|
|
138
42
|
"""
|
|
139
43
|
files_path = path / "concatenate"
|
|
140
|
-
fasta_endings = ["fasta", "fna", "fa", "ffn", "frn"]
|
|
141
44
|
meta_path = path / (genus + ".fasta")
|
|
142
45
|
files = os.listdir(files_path)
|
|
143
46
|
|
|
144
|
-
with open(meta_path, "w") as meta_file:
|
|
47
|
+
with open(meta_path, "w", encoding="utf-8") as meta_file:
|
|
145
48
|
# Write the header.
|
|
146
49
|
meta_header = f">{genus} metagenome\n"
|
|
147
50
|
meta_file.write(meta_header)
|
|
148
51
|
|
|
149
52
|
# Open each concatenated species file and write the sequence in the meta file.
|
|
150
53
|
for file in files:
|
|
151
|
-
file_ending = str(file).
|
|
54
|
+
file_ending = str(file).rsplit(".", maxsplit=1)[-1]
|
|
152
55
|
if file_ending in fasta_endings:
|
|
153
|
-
with open(
|
|
56
|
+
with open(
|
|
57
|
+
(files_path / str(file)), "r", encoding="utf-8"
|
|
58
|
+
) as species_file:
|
|
154
59
|
for line in species_file:
|
|
155
60
|
if line[0] != ">":
|
|
156
61
|
meta_file.write(line.replace("\n", ""))
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def get_record_iterator(file_path: Path):
|
|
65
|
+
"""Returns a record iterator for a fasta or fastq file."""
|
|
66
|
+
if not isinstance(file_path, Path):
|
|
67
|
+
raise ValueError("Path must be a Path object")
|
|
68
|
+
|
|
69
|
+
if not file_path.exists():
|
|
70
|
+
raise ValueError("File does not exist")
|
|
71
|
+
|
|
72
|
+
if not file_path.is_file():
|
|
73
|
+
raise ValueError("Path must be a file")
|
|
74
|
+
|
|
75
|
+
if file_path.suffix[1:] in fasta_endings:
|
|
76
|
+
return SeqIO.parse(file_path, "fasta")
|
|
77
|
+
|
|
78
|
+
if file_path.suffix[1:] in fastq_endings:
|
|
79
|
+
return SeqIO.parse(file_path, "fastq")
|
|
80
|
+
|
|
81
|
+
raise ValueError("Invalid file format, must be a fasta or fastq file")
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def get_records_by_id(file: Path, ids: list[str]):
|
|
85
|
+
"""Return records with the specified ids."""
|
|
86
|
+
records = get_record_iterator(file)
|
|
87
|
+
return [record for record in records if record.id in ids]
|