XspecT 0.2.6__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of XspecT might be problematic. Click here for more details.

xspect/train.py CHANGED
@@ -2,271 +2,282 @@
2
2
  This module contains the main functions for training the models.
3
3
  """
4
4
 
5
- import os
6
5
  import shutil
7
6
  from pathlib import Path
8
- import sys
9
- from time import localtime, perf_counter, asctime, sleep
7
+ from tempfile import TemporaryDirectory
10
8
  from loguru import logger
11
- from xspect.definitions import get_xspect_model_path, get_xspect_tmp_path
12
- from xspect.file_io import concatenate_meta
9
+ from xspect.definitions import get_xspect_model_path
10
+ from xspect.file_io import (
11
+ concatenate_species_fasta_files,
12
+ concatenate_metagenome,
13
+ extract_zip,
14
+ get_ncbi_dataset_accession_paths,
15
+ )
16
+ from xspect.models.probabilistic_filter_model import ProbabilisticFilterModel
13
17
  from xspect.models.probabilistic_filter_svm_model import ProbabilisticFilterSVMModel
14
18
  from xspect.models.probabilistic_single_filter_model import (
15
19
  ProbabilisticSingleFilterModel,
16
20
  )
17
- from xspect.train_filter.ncbi_api import (
18
- ncbi_assembly_metadata,
19
- ncbi_taxon_metadata,
20
- ncbi_children_tree,
21
- download_assemblies,
22
- )
23
- from xspect.train_filter import (
24
- create_svm,
25
- html_scrap,
26
- extract_and_concatenate,
27
- )
28
-
29
-
30
- def check_user_input(user_input: str):
31
- """The given input of the user will be checked. The input has to be a genus in NCBI.
32
-
33
- :return: The genus name.
21
+ from xspect.ncbi import AssemblySource, NCBIHandler
22
+
23
+
24
+ def train_from_directory(
25
+ display_name: str,
26
+ dir_path: Path,
27
+ meta: bool = False,
28
+ training_accessions: dict[str, list[str]] = None,
29
+ svm_accessions: list[str] = None,
30
+ svm_step: int = 1,
31
+ translation_dict: dict[str, str] = None,
32
+ author: str = None,
33
+ author_email: str = None,
34
+ ):
34
35
  """
35
- taxon_metadata = ncbi_taxon_metadata.NCBITaxonMetadata([user_input])
36
- all_metadata = taxon_metadata.get_metadata()
37
- for metadata in all_metadata.values():
38
- sci_name = metadata["sci_name"]
39
- tax_id = metadata["tax_id"]
40
- rank = metadata["rank"]
41
- lineage = metadata["lineage"]
42
- bacteria_id = 2
43
- if user_input not in (sci_name, tax_id):
44
- print(
45
- f"{get_current_time()}| The given genus: {user_input} was found as"
46
- f" genus: {sci_name} ID: {tax_id}"
47
- )
48
- print(f"{get_current_time()}| Using {sci_name} as genus name.")
49
- if rank == "GENUS":
50
- if bacteria_id not in lineage:
51
- print(f"{get_current_time()}| The given genus is not a bacteria.")
52
- print(f"{get_current_time()}| Do you want to continue: [y/n]")
53
- choice = input("-> ").lower()
54
- if choice == "y":
55
- return str(sci_name)
56
- print(f"{get_current_time()}| Exiting...")
57
- sys.exit()
58
- return str(sci_name)
59
- print(f"{get_current_time()}| {user_input} is rank {rank} and not genus.")
60
- sys.exit()
61
-
62
-
63
- def set_logger(dir_name: str):
64
- """Sets the logger parameters.
65
-
66
- :param dir_name: Name of the folder where the log should be saved.
36
+ Train a model from a directory containing training data.
37
+
38
+ This function trains a probabilistic filter model using the data in the specified directory.
39
+ The training data should be organized in the following way:
40
+ - dir_path
41
+ - cobs
42
+ - <species_name_1>
43
+ - <fasta_file_1>
44
+ - <fasta_file_2>
45
+ - <species_name_2>
46
+ - <fasta_file_1>
47
+ - <fasta_file_2>
48
+ - svm (optional)
49
+ - <species_name_1>
50
+ - <svm_file_1>
51
+ - <svm_file_2>
52
+ - <species_name_2>
53
+ - <svm_file_1>
54
+ - <svm_file_2>
55
+ If no SVM directory is found, the model will be trained without SVM.
56
+ The training data should be in FASTA format. The model is saved to the xspect_data directory.
57
+
58
+ Args:
59
+ display_name (str): Name of the model to be trained.
60
+ dir_path (Path): Path to the directory containing training data.
61
+ meta (bool, optional): Whether to train a metagenome model. Defaults to False.
62
+ training_accessions (list[str], optional): List of training accessions. Defaults to None.
63
+ svm_accessions (list[str], optional): List of SVM accession identifiers. Defaults to None.
64
+ svm_step (int, optional): Step size for SVM training. Defaults to 1.
65
+ translation_dict (dict[str, str], optional): Dictionary for display names. Defaults to None.
66
+ author (str, optional): Author of the model. Defaults to None.
67
+ author_email (str, optional): Author's email. Defaults to None.
68
+
69
+ Raises:
70
+ TypeError: If `display_name` is not a string.
71
+ TypeError: If `dir_path` is not a Path object to a valid directory.
72
+ ValueError: If the "cobs" directory is not found in `dir_path`.
73
+ ValueError: If no folders are found in the "cobs" directory.
74
+ ValueError: If the number of SVM folders does not match the number of COBS folders.
75
+ ValueError: If the names of COBS folders and SVM folders do not match.
76
+ ValueError: If no FASTA files are found in a COBS folder.
77
+
78
+ Notes:
79
+ - If the "svm" directory is not found, the model will be trained without SVM.
80
+ - Temporary directories are used for intermediate processing.
67
81
  """
68
- genus = dir_name.split("_")[0]
69
-
70
- # Starting logger.
71
- logger.remove()
72
- logger.add(sys.stderr, format="{time:HH:mm:ss} | {level} | {message}", level="INFO")
73
- log_path = get_xspect_tmp_path() / dir_name / (genus + ".log")
74
- logger.add(log_path, format="{time:HH:mm:ss} | {level} | {message}", level="DEBUG")
75
-
76
82
 
77
- def create_translation_dict(dir_name: str) -> dict[str, str]:
78
- """Create a translation dictionary to translate the taxon ID to its scientific name.
83
+ if not isinstance(display_name, str):
84
+ raise TypeError("display_name must be a string")
79
85
 
80
- :param dir_name: Directory name for current genus.
81
- :return: The created translation dictionary.
82
- """
83
- path = get_xspect_tmp_path() / dir_name / "concatenate"
84
- files = os.listdir(path)
85
- translation_dict = {}
86
- for file in files:
87
- file_split = file.split(".")[0].split("_")
88
- tax_id = file_split[0]
89
- final_file_name = tax_id + ".fasta"
90
- name = file_split[1]
91
- translation_dict[final_file_name] = name
86
+ if not isinstance(dir_path, Path) and dir_path.exists() and dir_path.is_dir():
87
+ raise TypeError("dir must be Path object to a valid directory")
88
+
89
+ cobs_training_path = dir_path / "cobs"
90
+ if not cobs_training_path.exists():
91
+ raise ValueError("cobs directory not found")
92
+
93
+ cobs_folders = [f for f in cobs_training_path.iterdir() if f.is_dir()]
94
+ if len(cobs_folders) == 0:
95
+ raise ValueError("no folders found in cobs directory")
96
+
97
+ svm_path = dir_path / "svm"
98
+ if svm_path.exists():
99
+ svm_folders = [f for f in svm_path.iterdir() if f.is_dir()]
100
+ if len(svm_folders) != len(cobs_folders):
101
+ raise ValueError(
102
+ "number of svm folders does not match number of cobs folders"
103
+ )
92
104
 
93
- return translation_dict
105
+ for cobs_folder, svm_folder in zip(cobs_folders, svm_folders):
106
+ if cobs_folder.name != svm_folder.name:
107
+ raise ValueError("cobs folder and svm folder names do not match")
108
+ else:
109
+ print("SVM directory not found. Model will be trained without SVM.")
110
+
111
+ with TemporaryDirectory() as tmp_dir:
112
+ tmp_dir = Path(tmp_dir)
113
+ species_dir = tmp_dir / "species"
114
+ species_dir.mkdir(parents=True, exist_ok=True)
115
+
116
+ # concatenate files in cobs_training_data for each species
117
+ concatenate_species_fasta_files(cobs_folders, species_dir)
118
+
119
+ if svm_path.exists():
120
+ species_model = ProbabilisticFilterSVMModel(
121
+ k=21,
122
+ model_display_name=display_name,
123
+ author=author,
124
+ author_email=author_email,
125
+ model_type="Species",
126
+ base_path=get_xspect_model_path(),
127
+ kernel="rbf",
128
+ c=1.0,
129
+ )
130
+ species_model.fit(
131
+ species_dir,
132
+ svm_path,
133
+ display_names=translation_dict,
134
+ svm_step=svm_step,
135
+ training_accessions=training_accessions,
136
+ svm_accessions=svm_accessions,
137
+ )
138
+ else:
139
+ species_model = ProbabilisticFilterModel(
140
+ k=21,
141
+ model_display_name=display_name,
142
+ author=author,
143
+ author_email=author_email,
144
+ model_type="Species",
145
+ base_path=get_xspect_model_path(),
146
+ )
147
+ species_model.fit(
148
+ species_dir,
149
+ display_names=translation_dict,
150
+ training_accessions=training_accessions,
151
+ )
94
152
 
153
+ species_model.save()
95
154
 
96
- def change_bf_assembly_file_names(dir_name: str):
97
- """Change all concatenated assembly names to only the taxon ID.
155
+ if meta:
156
+ meta_fasta = tmp_dir / f"{display_name}.fasta"
157
+ concatenate_metagenome(species_dir, meta_fasta)
98
158
 
99
- :param dir_name: Directory name for current genus.
159
+ genus_model = ProbabilisticSingleFilterModel(
160
+ k=21,
161
+ model_display_name=display_name,
162
+ author=author,
163
+ author_email=author_email,
164
+ model_type="Genus",
165
+ base_path=get_xspect_model_path(),
166
+ )
167
+ genus_model.fit(
168
+ meta_fasta,
169
+ display_name,
170
+ training_accessions=(
171
+ sum(training_accessions.values(), [])
172
+ if training_accessions
173
+ else None
174
+ ),
175
+ )
176
+ genus_model.save()
177
+
178
+
179
+ def train_from_ncbi(
180
+ genus: str,
181
+ svm_step: int = 1,
182
+ author: str = None,
183
+ author_email: str = None,
184
+ ):
185
+ """Train a model using NCBI assembly data for a given genus.
186
+
187
+ This function trains a probabilistic filter model using the assembly data from NCBI.
188
+ The training data is downloaded and processed, and the model is saved to the
189
+ xspect_data directory.
190
+
191
+ Args:
192
+ genus (str): Genus name for which the model will be trained.
193
+ svm_step (int, optional): Step size for SVM training. Defaults to 1.
194
+ author (str, optional): Author of the model. Defaults to None.
195
+ author_email (str, optional): Author's email. Defaults to None.
196
+
197
+ Raises:
198
+ TypeError: If `genus` is not a string.
199
+ ValueError: If no species with accessions are found.
200
+
201
+ Notes:
202
+ - The function uses NCBI API to fetch assembly metadata.
203
+ - Temporary directories are used for intermediate processing.
100
204
  """
101
- path = get_xspect_tmp_path() / dir_name / "concatenate"
102
- files = os.listdir(path)
103
- for file in files:
104
- file_split = file.split(".")[0].split("_")
105
- tax_id = file_split[0]
106
- new_file_name = f"{tax_id}.fasta"
107
- os.rename((path / file), (path / new_file_name))
108
-
109
-
110
- def get_current_time():
111
- """Returns the current time in the form hh:mm:ss."""
112
- return asctime(localtime()).split()[3]
113
-
114
-
115
- def train_ncbi(genus: str, svm_step: int = 1):
116
- """Train genus and species models with NCBI assemblies from the given genus."""
117
-
118
205
  if not isinstance(genus, str):
119
206
  raise TypeError("genus must be a string")
120
207
 
121
- # Check user input.
122
- genus = check_user_input(user_input=genus)
123
-
124
- # The directory name is defined in the following format: 'genus'_DD_MM_YYYY_hh-mm-ss
125
- curr_time = localtime()
126
- dir_name = f"{genus}_{curr_time[2]}_{curr_time[1]}_{curr_time[0]}_{curr_time[3]}-{curr_time[4]}-{curr_time[5]}"
127
-
128
- # Set the logger.
129
- set_logger(dir_name)
130
-
131
- # Time for the whole program.
132
- start = perf_counter()
133
-
134
- # Search for every defined species of the genus.
135
- logger.info("Getting all species of the genus")
136
- children_ids = ncbi_children_tree.NCBIChildrenTree(genus).children_ids()
137
- species_dict = ncbi_taxon_metadata.NCBITaxonMetadata(children_ids).get_metadata()
138
-
139
- # Get all gcf accessions that have Taxonomy check result OK.
140
- logger.info("Checking ANI data for updates")
141
- ani_gcf = html_scrap.TaxonomyCheck().ani_gcf()
142
-
143
- # Look for up to 8 assembly accessions per species.
144
- logger.info("Getting assembly metadata")
145
- all_metadata = ncbi_assembly_metadata.NCBIAssemblyMetadata(
146
- all_metadata=species_dict, ani_gcf=ani_gcf, count=8, contig_n50=10000
147
- )
148
- all_metadata = all_metadata.get_all_metadata()
149
-
150
- # Ensure that the genus has at least one species with accessions.
151
- if not all_metadata:
152
- raise ValueError("No species with accessions found")
153
-
154
- # Download the chosen assemblies.
155
- # One file for each species with it's downloaded assemblies in zip format.
156
-
157
- # Iterate through all species.
158
- logger.info("Downloading assemblies for bloomfilter training")
159
- for metadata in all_metadata.values():
160
- # Only try to download when the species has accessions.
161
- if len(metadata["accessions"]) >= 1:
162
- sleep(5)
163
- species_name = metadata["sci_name"]
164
- tax_id = metadata["tax_id"]
165
- logger.info("Downloading {id}_{name}", id=tax_id, name=species_name)
166
- file_name = f"{tax_id}_{species_name}.zip"
167
-
168
- # Selecting the first 4 assemblies for training the filters.
169
- accessions = metadata["accessions"][:4]
170
-
171
- download_assemblies.download_assemblies(
172
- accessions=accessions,
173
- dir_name=dir_name,
174
- target_folder="zip_files",
175
- zip_file_name=file_name,
176
- )
177
- logger.info("Concatenating and extracting")
178
-
179
- # Concatenate all assemblies of each species.
180
- extract_and_concatenate.bf(dir_name=dir_name, delete=True)
181
- concatenate_meta(get_xspect_tmp_path() / dir_name, genus)
208
+ ncbi_handler = NCBIHandler()
209
+ genus_tax_id = ncbi_handler.get_genus_taxon_id(genus)
210
+ species_ids = ncbi_handler.get_species(genus_tax_id)
211
+ species_names = ncbi_handler.get_taxon_names(species_ids)
212
+
213
+ filtered_species_ids = [
214
+ tax_id
215
+ for tax_id in species_ids
216
+ if "candidatus" not in species_names[tax_id].lower()
217
+ and " sp." not in species_names[tax_id].lower()
218
+ ]
219
+ filtered_species_names = {
220
+ str(tax_id): species_names[tax_id] for tax_id in filtered_species_ids
221
+ }
182
222
 
183
- # Download assemblies for svm creation.
184
- logger.info("Downloading assemblies for support-vector-machine training")
185
223
  accessions = {}
186
- for metadata in all_metadata.values():
187
- # Only add taxon with accessions.
188
- if len(metadata["accessions"]) >= 1:
189
- accessions[metadata["tax_id"]] = metadata["accessions"]
190
-
191
- # Downloading assemblies.
192
- create_svm.get_svm_assemblies(all_accessions=accessions, dir_name=dir_name)
193
-
194
- logger.info("Extracting SVM assemblies")
195
-
196
- # Extracting assemblies.
197
- extract_and_concatenate.svm(
198
- species_accessions=accessions, dir_name=dir_name, delete=True
199
- )
200
-
201
- # Make dictionary for translating taxon ID to scientific name.
202
- translation_dict = create_translation_dict(dir_name)
203
- change_bf_assembly_file_names(dir_name)
204
-
205
- species_files_path = get_xspect_tmp_path() / dir_name / "concatenate"
206
- species_result_path = get_xspect_model_path() / genus
207
-
208
- # Train Bloomfilter for complete genus.
209
- logger.info("Training metagenome model")
210
- mg_files_path = get_xspect_tmp_path() / dir_name
211
-
212
- genus_model = ProbabilisticSingleFilterModel(
213
- k=21,
214
- model_display_name=genus,
215
- author="Test",
216
- author_email="test@example.com",
217
- model_type="Genus",
218
- base_path=Path(species_result_path).parent,
219
- )
220
- genus_model.fit(mg_files_path / f"{genus}.fasta", genus)
221
- genus_model.save()
222
-
223
- logger.info("Training species model")
224
-
225
- species_model = ProbabilisticFilterSVMModel(
226
- k=21,
227
- model_display_name=genus,
228
- author="Test",
229
- author_email="test@example.com",
230
- model_type="Species",
231
- base_path=Path(species_result_path).parent,
232
- kernel="rbf",
233
- c=1.0,
234
- )
235
- svm_dir = get_xspect_tmp_path() / dir_name / "training_data"
236
- species_model.fit(
237
- Path(species_files_path),
238
- svm_dir,
239
- display_names=translation_dict,
240
- svm_step=svm_step,
241
- )
242
- species_model.save()
243
-
244
- # Cleanup files.
245
- shutil.rmtree(get_xspect_tmp_path() / dir_name)
246
-
247
- end = perf_counter()
248
-
249
- logger.info("Program runtime: {time} m", time=(round((end - start) / 60, 2)))
250
- logger.info("XspecT-trainer is finished.")
251
-
252
-
253
- def train_from_directory(display_name: str, dir_path: Path, meta: bool = False):
254
- """Train the gene family and gene filter.
255
-
256
- :param display_name: Name of the model.
257
- :param dir: Input directory.
258
- """
259
-
260
- if not isinstance(display_name, str):
261
- raise TypeError("display_name must be a string")
262
-
263
- if not isinstance(dir_path, Path) and dir_path.exists() and dir_path.is_dir():
264
- raise ValueError("dir must be Path object to a valid directory")
265
-
266
- # check if the directory contains the necessary files
267
- # copy to temp path
268
- # check if svm training data exists
269
- # train model, with svm data if it exists
270
- # add display names
271
- # train metagenome model
272
- # clean up temp path
224
+ for tax_id in filtered_species_ids:
225
+ taxon_accessions = ncbi_handler.get_highest_quality_accessions(
226
+ tax_id, AssemblySource.REFSEQ, 8
227
+ )
228
+ if not taxon_accessions:
229
+ logger.warning(f"No assemblies found for tax_id {tax_id}. Skipping.")
230
+ filtered_species_names.pop(str(tax_id), None)
231
+ continue
232
+ accessions[tax_id] = taxon_accessions
233
+
234
+ if not accessions:
235
+ raise ValueError(
236
+ "No species with accessions found. Please check the genus name."
237
+ )
238
+
239
+ with TemporaryDirectory() as tmp_dir:
240
+ tmp_dir = Path(tmp_dir)
241
+ cobs_dir = tmp_dir / "cobs"
242
+ svm_dir = tmp_dir / "svm"
243
+ cobs_dir.mkdir(parents=True, exist_ok=True)
244
+ svm_dir.mkdir(parents=True, exist_ok=True)
245
+
246
+ ncbi_handler.download_assemblies(
247
+ accessions=sum(accessions.values(), []), output_dir=tmp_dir
248
+ )
249
+ extract_zip(tmp_dir, tmp_dir)
250
+ accession_paths = get_ncbi_dataset_accession_paths(tmp_dir / "ncbi_dataset")
251
+
252
+ # select accessions
253
+ cobs_accessions = {}
254
+ svm_accessions = {}
255
+ for tax_id, accession_list in accessions.items():
256
+ cobs_accessions[tax_id] = accession_list[:4]
257
+ svm_accessions[tax_id] = accession_list[-4:]
258
+
259
+ # move files
260
+ for tax_id, accession_list in cobs_accessions.items():
261
+ tax_id_dir = cobs_dir / str(tax_id)
262
+ tax_id_dir.mkdir(parents=True, exist_ok=True)
263
+ for accession in accession_list:
264
+ accession_path = accession_paths[accession]
265
+ shutil.copy(accession_path, tax_id_dir / f"{accession}.fasta")
266
+ for tax_id, accession_list in svm_accessions.items():
267
+ tax_id_dir = svm_dir / str(tax_id)
268
+ tax_id_dir.mkdir(parents=True, exist_ok=True)
269
+ for accession in accession_list:
270
+ accession_path = accession_paths[accession]
271
+ shutil.copy(accession_path, tax_id_dir / f"{accession}.fasta")
272
+
273
+ train_from_directory(
274
+ display_name=genus,
275
+ dir_path=tmp_dir,
276
+ meta=True,
277
+ training_accessions=cobs_accessions,
278
+ svm_accessions=svm_accessions,
279
+ svm_step=svm_step,
280
+ translation_dict=filtered_species_names,
281
+ author=author,
282
+ author_email=author_email,
283
+ )
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: XspecT
3
- Version: 0.2.6
3
+ Version: 0.4.0
4
4
  Summary: Tool to monitor and characterize pathogens using Bloom filters.
5
5
  License: MIT License
6
6
 
@@ -54,34 +54,27 @@ Requires-Dist: sphinx-autobuild; extra == "docs"
54
54
  Provides-Extra: test
55
55
  Requires-Dist: pytest; extra == "test"
56
56
  Requires-Dist: pytest-cov; extra == "test"
57
+ Dynamic: license-file
57
58
 
58
59
  # XspecT - Acinetobacter Species Assignment Tool
60
+ <!-- start intro -->
59
61
  ![Test](https://github.com/bionf/xspect2/actions/workflows/test.yml/badge.svg)
60
62
  [![linting: pylint](https://img.shields.io/badge/linting-pylint-yellowgreen)](https://github.com/pylint-dev/pylint)
61
63
  [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
62
64
 
63
- <img src="/docs/img/logo.png" height="50%" width="50%">
64
-
65
- <!-- start intro -->
66
- XspecT is a Python-based tool to taxonomically classify sequence-reads (or assembled genomes) on the species and/or MLST level using [Bloom Filters] and a [Support Vector Machine].
67
- <br/><br/>
68
-
69
- XspecT utilizes the uniqueness of kmers and compares extracted kmers from the input-data to a reference database. Bloom Filter ensure a fast lookup in this process. For a final prediction the results are classified using a Support Vector Machine.
70
- <br/>
65
+ XspecT is a Python-based tool to taxonomically classify sequence-reads (or assembled genomes) on the species and/or MLST level using [kmer indices] and a [Support Vector Machine].
71
66
 
72
- Local extensions of the reference database are supported.
73
- <br/>
67
+ XspecT utilizes the uniqueness of kmers and compares extracted kmers from the input-data to a reference database. Bloom Filter ensure a fast lookup in this process. For a final prediction, the results are classified using a Support Vector Machine.
74
68
 
75
- The tool is available as a web-based application and a smaller command line interface.
69
+ The tool is available as a web-based application and as a command line interface.
76
70
 
77
- [Bloom Filters]: https://en.wikipedia.org/wiki/Bloom_filter
71
+ [kmer indices]: https://arxiv.org/abs/1905.09624
78
72
  [Support Vector Machine]: https://en.wikipedia.org/wiki/Support-vector_machine
79
- [blaOxa-genes]: https://en.wikipedia.org/wiki/Beta-lactamase#OXA_beta-lactamases_(class_D)
80
73
  <!-- end intro -->
81
74
 
82
75
  <!-- start quickstart -->
83
76
  ## Installation
84
- To install Xspect, please download the lastest 64 bit Python version and install the package using pip:
77
+ To install XspecT, please download the lastest 64 bit Python version and install the package using pip:
85
78
  ```
86
79
  pip install xspect
87
80
  ```
@@ -91,23 +84,23 @@ Please note that Windows and Alpine Linux is currently not supported.
91
84
  ### Get the models
92
85
  To download basic pre-trained models, you can use the built-in command:
93
86
  ```
94
- xspect download-models
87
+ xspect models download
95
88
  ```
96
89
  Additional species models can be trained using:
97
90
  ```
98
- xspect train-species you-ncbi-genus-name
91
+ xspect models train ncbi
99
92
  ```
100
93
 
101
94
  ### How to run the web app
102
95
  To run the web app, install and run [XspecT Web](https://github.com/aromberg/xspect-web). Additionally, run XspecT in API mode:
103
96
  ```
104
- xspect api
97
+ xspect web
105
98
  ```
106
99
 
107
100
  ### How to use the XspecT command line interface
108
- Run xspect with the configuration you want to run it with as arguments.
101
+ Run XspecT with the configuration you want to run it with as arguments.
109
102
  ```
110
- xspect classify-species your-genus path/to/your/input-set
103
+ xspect classify species
111
104
  ```
112
105
  For further instructions on how to use the command line interface, please refer to the [documentation] or execute:
113
106
  ```
@@ -0,0 +1,24 @@
1
+ xspect/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ xspect/definitions.py,sha256=fVn_li_s2hriOSGJ69o_H8H-vkw1znvkryhBj7WMnF4,1219
3
+ xspect/download_models.py,sha256=y1wFJZa1xOJfvUP78zKkRs46O-WqKBL90vmo5AYUio0,853
4
+ xspect/fastapi.py,sha256=DOef3MqWPdBmdYBo8Z9SPmWrbJHOsQxQe3GrC4f__Rc,3165
5
+ xspect/file_io.py,sha256=YmfoKEQdHHEi8dO2G5Kt4tSNi5LuWW0VZ74pyYRHiTo,5937
6
+ xspect/main.py,sha256=uVj1fooDU5WW8sMug5YPwuAphb8zd3PDpNFNlTIyXBw,11155
7
+ xspect/model_management.py,sha256=LItMidbfxZfttEZHa8da_nnkwkH7XVLWDM0uVrFUZ0Q,3753
8
+ xspect/ncbi.py,sha256=sSJO3g8n89Qw6UJjAy13bpjOcIGSquTKNKVHNUMbDeM,10072
9
+ xspect/train.py,sha256=7I7-inWGJe_VDzII9dLZ8U-8SUCZDIrhb-eNOZEyfss,10703
10
+ xspect/mlst_feature/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ xspect/mlst_feature/mlst_helper.py,sha256=3zhhEomkk-qiObbQ82TM_YHuyVCJ7_XgyzsYM_4TS0E,5760
12
+ xspect/mlst_feature/pub_mlst_handler.py,sha256=oss3CkJNt6041p3qnMdOfoX8ZgUfpB93CUim-Yakc9A,5031
13
+ xspect/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ xspect/models/probabilistic_filter_mlst_model.py,sha256=JMc0yBJPo7J9b-GpvhDmzhwWPIKOwatAq0edDgM72PE,11735
15
+ xspect/models/probabilistic_filter_model.py,sha256=l8mhcRgHPso7qIgI56buCnE3ZleO3gPWOZEpgrycOBA,10029
16
+ xspect/models/probabilistic_filter_svm_model.py,sha256=xXimcv3iWnG1JiFyrk6UqkP9hFIxWGDdb__fRdQYwro,6245
17
+ xspect/models/probabilistic_single_filter_model.py,sha256=yxWnCt4IP-3ZRLP4pRA3f2VTHc0_4g17PDCyOFayDDg,4090
18
+ xspect/models/result.py,sha256=fhTS43XYAIkNiiAMyNpaif0kM4Ab3xLBnVJnutkOuFU,3400
19
+ xspect-0.4.0.dist-info/licenses/LICENSE,sha256=bhBGDKIRUVwYIHGOGO5hshzuVHyqFJajvSOA3XXOLKI,1094
20
+ xspect-0.4.0.dist-info/METADATA,sha256=mmsNmdiRqOC0RCBe7yW6oofue2OctwErCWVyiJD86nI,4439
21
+ xspect-0.4.0.dist-info/WHEEL,sha256=ck4Vq1_RXyvS4Jt6SI0Vz6fyVs4GWg7AINwpsaGEgPE,91
22
+ xspect-0.4.0.dist-info/entry_points.txt,sha256=L7qliX3pIuwupQxpuOSsrBJCSHYPOPNEzH8KZKQGGUw,43
23
+ xspect-0.4.0.dist-info/top_level.txt,sha256=hdoa4cnBv6OVzpyhMmyxpJxEydH5n2lDciy8urc1paE,7
24
+ xspect-0.4.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.0)
2
+ Generator: setuptools (80.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5