XspecT 0.2.7__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of XspecT might be problematic. Click here for more details.

xspect/train.py CHANGED
@@ -2,266 +2,282 @@
2
2
  This module contains the main functions for training the models.
3
3
  """
4
4
 
5
- import os
6
5
  import shutil
7
6
  from pathlib import Path
8
- import sys
9
- from time import localtime, perf_counter, asctime, sleep
7
+ from tempfile import TemporaryDirectory
10
8
  from loguru import logger
11
- from xspect.definitions import get_xspect_model_path, get_xspect_tmp_path
12
- from xspect.file_io import concatenate_meta
9
+ from xspect.definitions import get_xspect_model_path
10
+ from xspect.file_io import (
11
+ concatenate_species_fasta_files,
12
+ concatenate_metagenome,
13
+ extract_zip,
14
+ get_ncbi_dataset_accession_paths,
15
+ )
16
+ from xspect.models.probabilistic_filter_model import ProbabilisticFilterModel
13
17
  from xspect.models.probabilistic_filter_svm_model import ProbabilisticFilterSVMModel
14
18
  from xspect.models.probabilistic_single_filter_model import (
15
19
  ProbabilisticSingleFilterModel,
16
20
  )
17
- from xspect.train_filter.ncbi_api import (
18
- ncbi_assembly_metadata,
19
- ncbi_taxon_metadata,
20
- ncbi_children_tree,
21
- download_assemblies,
22
- )
23
- from xspect.train_filter import (
24
- create_svm,
25
- extract_and_concatenate,
26
- )
27
-
28
-
29
- def check_user_input(user_input: str):
30
- """The given input of the user will be checked. The input has to be a genus in NCBI.
31
-
32
- :return: The genus name.
21
+ from xspect.ncbi import AssemblySource, NCBIHandler
22
+
23
+
24
+ def train_from_directory(
25
+ display_name: str,
26
+ dir_path: Path,
27
+ meta: bool = False,
28
+ training_accessions: dict[str, list[str]] = None,
29
+ svm_accessions: list[str] = None,
30
+ svm_step: int = 1,
31
+ translation_dict: dict[str, str] = None,
32
+ author: str = None,
33
+ author_email: str = None,
34
+ ):
33
35
  """
34
- taxon_metadata = ncbi_taxon_metadata.NCBITaxonMetadata([user_input])
35
- all_metadata = taxon_metadata.get_metadata()
36
- for metadata in all_metadata.values():
37
- sci_name = metadata["sci_name"]
38
- tax_id = metadata["tax_id"]
39
- rank = metadata["rank"]
40
- lineage = metadata["lineage"]
41
- bacteria_id = 2
42
- if user_input not in (sci_name, tax_id):
43
- print(
44
- f"{get_current_time()}| The given genus: {user_input} was found as"
45
- f" genus: {sci_name} ID: {tax_id}"
46
- )
47
- print(f"{get_current_time()}| Using {sci_name} as genus name.")
48
- if rank == "GENUS":
49
- if bacteria_id not in lineage:
50
- print(f"{get_current_time()}| The given genus is not a bacteria.")
51
- print(f"{get_current_time()}| Do you want to continue: [y/n]")
52
- choice = input("-> ").lower()
53
- if choice == "y":
54
- return str(sci_name)
55
- print(f"{get_current_time()}| Exiting...")
56
- sys.exit()
57
- return str(sci_name)
58
- print(f"{get_current_time()}| {user_input} is rank {rank} and not genus.")
59
- sys.exit()
60
-
61
-
62
- def set_logger(dir_name: str):
63
- """Sets the logger parameters.
64
-
65
- :param dir_name: Name of the folder where the log should be saved.
36
+ Train a model from a directory containing training data.
37
+
38
+ This function trains a probabilistic filter model using the data in the specified directory.
39
+ The training data should be organized in the following way:
40
+ - dir_path
41
+ - cobs
42
+ - <species_name_1>
43
+ - <fasta_file_1>
44
+ - <fasta_file_2>
45
+ - <species_name_2>
46
+ - <fasta_file_1>
47
+ - <fasta_file_2>
48
+ - svm (optional)
49
+ - <species_name_1>
50
+ - <svm_file_1>
51
+ - <svm_file_2>
52
+ - <species_name_2>
53
+ - <svm_file_1>
54
+ - <svm_file_2>
55
+ If no SVM directory is found, the model will be trained without SVM.
56
+ The training data should be in FASTA format. The model is saved to the xspect_data directory.
57
+
58
+ Args:
59
+ display_name (str): Name of the model to be trained.
60
+ dir_path (Path): Path to the directory containing training data.
61
+ meta (bool, optional): Whether to train a metagenome model. Defaults to False.
62
+ training_accessions (list[str], optional): List of training accessions. Defaults to None.
63
+ svm_accessions (list[str], optional): List of SVM accession identifiers. Defaults to None.
64
+ svm_step (int, optional): Step size for SVM training. Defaults to 1.
65
+ translation_dict (dict[str, str], optional): Dictionary for display names. Defaults to None.
66
+ author (str, optional): Author of the model. Defaults to None.
67
+ author_email (str, optional): Author's email. Defaults to None.
68
+
69
+ Raises:
70
+ TypeError: If `display_name` is not a string.
71
+ TypeError: If `dir_path` is not a Path object to a valid directory.
72
+ ValueError: If the "cobs" directory is not found in `dir_path`.
73
+ ValueError: If no folders are found in the "cobs" directory.
74
+ ValueError: If the number of SVM folders does not match the number of COBS folders.
75
+ ValueError: If the names of COBS folders and SVM folders do not match.
76
+ ValueError: If no FASTA files are found in a COBS folder.
77
+
78
+ Notes:
79
+ - If the "svm" directory is not found, the model will be trained without SVM.
80
+ - Temporary directories are used for intermediate processing.
66
81
  """
67
- genus = dir_name.split("_")[0]
68
-
69
- # Starting logger.
70
- logger.remove()
71
- logger.add(sys.stderr, format="{time:HH:mm:ss} | {level} | {message}", level="INFO")
72
- log_path = get_xspect_tmp_path() / dir_name / (genus + ".log")
73
- logger.add(log_path, format="{time:HH:mm:ss} | {level} | {message}", level="DEBUG")
74
-
75
82
 
76
- def create_translation_dict(dir_name: str) -> dict[str, str]:
77
- """Create a translation dictionary to translate the taxon ID to its scientific name.
83
+ if not isinstance(display_name, str):
84
+ raise TypeError("display_name must be a string")
78
85
 
79
- :param dir_name: Directory name for current genus.
80
- :return: The created translation dictionary.
81
- """
82
- path = get_xspect_tmp_path() / dir_name / "concatenate"
83
- files = os.listdir(path)
84
- translation_dict = {}
85
- for file in files:
86
- file_split = file.split(".")[0].split("_")
87
- tax_id = file_split[0]
88
- final_file_name = tax_id + ".fasta"
89
- name = file_split[1]
90
- translation_dict[final_file_name] = name
86
+ if not isinstance(dir_path, Path) and dir_path.exists() and dir_path.is_dir():
87
+ raise TypeError("dir must be Path object to a valid directory")
88
+
89
+ cobs_training_path = dir_path / "cobs"
90
+ if not cobs_training_path.exists():
91
+ raise ValueError("cobs directory not found")
92
+
93
+ cobs_folders = [f for f in cobs_training_path.iterdir() if f.is_dir()]
94
+ if len(cobs_folders) == 0:
95
+ raise ValueError("no folders found in cobs directory")
96
+
97
+ svm_path = dir_path / "svm"
98
+ if svm_path.exists():
99
+ svm_folders = [f for f in svm_path.iterdir() if f.is_dir()]
100
+ if len(svm_folders) != len(cobs_folders):
101
+ raise ValueError(
102
+ "number of svm folders does not match number of cobs folders"
103
+ )
91
104
 
92
- return translation_dict
105
+ for cobs_folder, svm_folder in zip(cobs_folders, svm_folders):
106
+ if cobs_folder.name != svm_folder.name:
107
+ raise ValueError("cobs folder and svm folder names do not match")
108
+ else:
109
+ print("SVM directory not found. Model will be trained without SVM.")
110
+
111
+ with TemporaryDirectory() as tmp_dir:
112
+ tmp_dir = Path(tmp_dir)
113
+ species_dir = tmp_dir / "species"
114
+ species_dir.mkdir(parents=True, exist_ok=True)
115
+
116
+ # concatenate files in cobs_training_data for each species
117
+ concatenate_species_fasta_files(cobs_folders, species_dir)
118
+
119
+ if svm_path.exists():
120
+ species_model = ProbabilisticFilterSVMModel(
121
+ k=21,
122
+ model_display_name=display_name,
123
+ author=author,
124
+ author_email=author_email,
125
+ model_type="Species",
126
+ base_path=get_xspect_model_path(),
127
+ kernel="rbf",
128
+ c=1.0,
129
+ )
130
+ species_model.fit(
131
+ species_dir,
132
+ svm_path,
133
+ display_names=translation_dict,
134
+ svm_step=svm_step,
135
+ training_accessions=training_accessions,
136
+ svm_accessions=svm_accessions,
137
+ )
138
+ else:
139
+ species_model = ProbabilisticFilterModel(
140
+ k=21,
141
+ model_display_name=display_name,
142
+ author=author,
143
+ author_email=author_email,
144
+ model_type="Species",
145
+ base_path=get_xspect_model_path(),
146
+ )
147
+ species_model.fit(
148
+ species_dir,
149
+ display_names=translation_dict,
150
+ training_accessions=training_accessions,
151
+ )
93
152
 
153
+ species_model.save()
94
154
 
95
- def change_bf_assembly_file_names(dir_name: str):
96
- """Change all concatenated assembly names to only the taxon ID.
155
+ if meta:
156
+ meta_fasta = tmp_dir / f"{display_name}.fasta"
157
+ concatenate_metagenome(species_dir, meta_fasta)
97
158
 
98
- :param dir_name: Directory name for current genus.
159
+ genus_model = ProbabilisticSingleFilterModel(
160
+ k=21,
161
+ model_display_name=display_name,
162
+ author=author,
163
+ author_email=author_email,
164
+ model_type="Genus",
165
+ base_path=get_xspect_model_path(),
166
+ )
167
+ genus_model.fit(
168
+ meta_fasta,
169
+ display_name,
170
+ training_accessions=(
171
+ sum(training_accessions.values(), [])
172
+ if training_accessions
173
+ else None
174
+ ),
175
+ )
176
+ genus_model.save()
177
+
178
+
179
+ def train_from_ncbi(
180
+ genus: str,
181
+ svm_step: int = 1,
182
+ author: str = None,
183
+ author_email: str = None,
184
+ ):
185
+ """Train a model using NCBI assembly data for a given genus.
186
+
187
+ This function trains a probabilistic filter model using the assembly data from NCBI.
188
+ The training data is downloaded and processed, and the model is saved to the
189
+ xspect_data directory.
190
+
191
+ Args:
192
+ genus (str): Genus name for which the model will be trained.
193
+ svm_step (int, optional): Step size for SVM training. Defaults to 1.
194
+ author (str, optional): Author of the model. Defaults to None.
195
+ author_email (str, optional): Author's email. Defaults to None.
196
+
197
+ Raises:
198
+ TypeError: If `genus` is not a string.
199
+ ValueError: If no species with accessions are found.
200
+
201
+ Notes:
202
+ - The function uses NCBI API to fetch assembly metadata.
203
+ - Temporary directories are used for intermediate processing.
99
204
  """
100
- path = get_xspect_tmp_path() / dir_name / "concatenate"
101
- files = os.listdir(path)
102
- for file in files:
103
- file_split = file.split(".")[0].split("_")
104
- tax_id = file_split[0]
105
- new_file_name = f"{tax_id}.fasta"
106
- os.rename((path / file), (path / new_file_name))
107
-
108
-
109
- def get_current_time():
110
- """Returns the current time in the form hh:mm:ss."""
111
- return asctime(localtime()).split()[3]
112
-
113
-
114
- def train_ncbi(genus: str, svm_step: int = 1):
115
- """Train genus and species models with NCBI assemblies from the given genus."""
116
-
117
205
  if not isinstance(genus, str):
118
206
  raise TypeError("genus must be a string")
119
207
 
120
- # Check user input.
121
- genus = check_user_input(user_input=genus)
122
-
123
- # The directory name is defined in the following format: 'genus'_DD_MM_YYYY_hh-mm-ss
124
- curr_time = localtime()
125
- dir_name = f"{genus}_{curr_time[2]}_{curr_time[1]}_{curr_time[0]}_{curr_time[3]}-{curr_time[4]}-{curr_time[5]}"
126
-
127
- # Set the logger.
128
- set_logger(dir_name)
129
-
130
- # Time for the whole program.
131
- start = perf_counter()
132
-
133
- # Search for every defined species of the genus.
134
- logger.info("Getting all species of the genus")
135
- children_ids = ncbi_children_tree.NCBIChildrenTree(genus).children_ids()
136
- species_dict = ncbi_taxon_metadata.NCBITaxonMetadata(children_ids).get_metadata()
137
-
138
- # Look for up to 8 assembly accessions per species.
139
- logger.info("Getting assembly metadata")
140
- all_metadata = ncbi_assembly_metadata.NCBIAssemblyMetadata(
141
- all_metadata=species_dict, count=8, contig_n50=10000
142
- )
143
- all_metadata = all_metadata.get_all_metadata()
144
-
145
- # Ensure that the genus has at least one species with accessions.
146
- if not all_metadata:
147
- raise ValueError("No species with accessions found")
148
-
149
- # Download the chosen assemblies.
150
- # One file for each species with it's downloaded assemblies in zip format.
151
-
152
- # Iterate through all species.
153
- logger.info("Downloading assemblies for bloomfilter training")
154
- for metadata in all_metadata.values():
155
- # Only try to download when the species has accessions.
156
- if len(metadata["accessions"]) >= 1:
157
- sleep(5)
158
- species_name = metadata["sci_name"]
159
- tax_id = metadata["tax_id"]
160
- logger.info("Downloading {id}_{name}", id=tax_id, name=species_name)
161
- file_name = f"{tax_id}_{species_name}.zip"
162
-
163
- # Selecting the first 4 assemblies for training the filters.
164
- accessions = metadata["accessions"][:4]
165
-
166
- download_assemblies.download_assemblies(
167
- accessions=accessions,
168
- dir_name=dir_name,
169
- target_folder="zip_files",
170
- zip_file_name=file_name,
171
- )
172
- logger.info("Concatenating and extracting")
173
-
174
- # Concatenate all assemblies of each species.
175
- extract_and_concatenate.bf(dir_name=dir_name, delete=True)
176
- concatenate_meta(get_xspect_tmp_path() / dir_name, genus)
208
+ ncbi_handler = NCBIHandler()
209
+ genus_tax_id = ncbi_handler.get_genus_taxon_id(genus)
210
+ species_ids = ncbi_handler.get_species(genus_tax_id)
211
+ species_names = ncbi_handler.get_taxon_names(species_ids)
212
+
213
+ filtered_species_ids = [
214
+ tax_id
215
+ for tax_id in species_ids
216
+ if "candidatus" not in species_names[tax_id].lower()
217
+ and " sp." not in species_names[tax_id].lower()
218
+ ]
219
+ filtered_species_names = {
220
+ str(tax_id): species_names[tax_id] for tax_id in filtered_species_ids
221
+ }
177
222
 
178
- # Download assemblies for svm creation.
179
- logger.info("Downloading assemblies for support-vector-machine training")
180
223
  accessions = {}
181
- for metadata in all_metadata.values():
182
- # Only add taxon with accessions.
183
- if len(metadata["accessions"]) >= 1:
184
- accessions[metadata["tax_id"]] = metadata["accessions"]
185
-
186
- # Downloading assemblies.
187
- create_svm.get_svm_assemblies(all_accessions=accessions, dir_name=dir_name)
188
-
189
- logger.info("Extracting SVM assemblies")
190
-
191
- # Extracting assemblies.
192
- extract_and_concatenate.svm(
193
- species_accessions=accessions, dir_name=dir_name, delete=True
194
- )
195
-
196
- # Make dictionary for translating taxon ID to scientific name.
197
- translation_dict = create_translation_dict(dir_name)
198
- change_bf_assembly_file_names(dir_name)
199
-
200
- species_files_path = get_xspect_tmp_path() / dir_name / "concatenate"
201
- species_result_path = get_xspect_model_path() / genus
202
-
203
- # Train Bloomfilter for complete genus.
204
- logger.info("Training metagenome model")
205
- mg_files_path = get_xspect_tmp_path() / dir_name
206
-
207
- genus_model = ProbabilisticSingleFilterModel(
208
- k=21,
209
- model_display_name=genus,
210
- author="Test",
211
- author_email="test@example.com",
212
- model_type="Genus",
213
- base_path=Path(species_result_path).parent,
214
- )
215
- genus_model.fit(mg_files_path / f"{genus}.fasta", genus)
216
- genus_model.save()
217
-
218
- logger.info("Training species model")
219
-
220
- species_model = ProbabilisticFilterSVMModel(
221
- k=21,
222
- model_display_name=genus,
223
- author="Test",
224
- author_email="test@example.com",
225
- model_type="Species",
226
- base_path=Path(species_result_path).parent,
227
- kernel="rbf",
228
- c=1.0,
229
- )
230
- svm_dir = get_xspect_tmp_path() / dir_name / "training_data"
231
- species_model.fit(
232
- Path(species_files_path),
233
- svm_dir,
234
- display_names=translation_dict,
235
- svm_step=svm_step,
236
- )
237
- species_model.save()
238
-
239
- # Cleanup files.
240
- shutil.rmtree(get_xspect_tmp_path() / dir_name)
241
-
242
- end = perf_counter()
243
-
244
- logger.info("Program runtime: {time} m", time=(round((end - start) / 60, 2)))
245
- logger.info("XspecT-trainer is finished.")
246
-
247
-
248
- def train_from_directory(display_name: str, dir_path: Path, meta: bool = False):
249
- """Train the gene family and gene filter.
250
-
251
- :param display_name: Name of the model.
252
- :param dir: Input directory.
253
- """
254
-
255
- if not isinstance(display_name, str):
256
- raise TypeError("display_name must be a string")
257
-
258
- if not isinstance(dir_path, Path) and dir_path.exists() and dir_path.is_dir():
259
- raise ValueError("dir must be Path object to a valid directory")
260
-
261
- # check if the directory contains the necessary files
262
- # copy to temp path
263
- # check if svm training data exists
264
- # train model, with svm data if it exists
265
- # add display names
266
- # train metagenome model
267
- # clean up temp path
224
+ for tax_id in filtered_species_ids:
225
+ taxon_accessions = ncbi_handler.get_highest_quality_accessions(
226
+ tax_id, AssemblySource.REFSEQ, 8
227
+ )
228
+ if not taxon_accessions:
229
+ logger.warning(f"No assemblies found for tax_id {tax_id}. Skipping.")
230
+ filtered_species_names.pop(str(tax_id), None)
231
+ continue
232
+ accessions[tax_id] = taxon_accessions
233
+
234
+ if not accessions:
235
+ raise ValueError(
236
+ "No species with accessions found. Please check the genus name."
237
+ )
238
+
239
+ with TemporaryDirectory() as tmp_dir:
240
+ tmp_dir = Path(tmp_dir)
241
+ cobs_dir = tmp_dir / "cobs"
242
+ svm_dir = tmp_dir / "svm"
243
+ cobs_dir.mkdir(parents=True, exist_ok=True)
244
+ svm_dir.mkdir(parents=True, exist_ok=True)
245
+
246
+ ncbi_handler.download_assemblies(
247
+ accessions=sum(accessions.values(), []), output_dir=tmp_dir
248
+ )
249
+ extract_zip(tmp_dir, tmp_dir)
250
+ accession_paths = get_ncbi_dataset_accession_paths(tmp_dir / "ncbi_dataset")
251
+
252
+ # select accessions
253
+ cobs_accessions = {}
254
+ svm_accessions = {}
255
+ for tax_id, accession_list in accessions.items():
256
+ cobs_accessions[tax_id] = accession_list[:4]
257
+ svm_accessions[tax_id] = accession_list[-4:]
258
+
259
+ # move files
260
+ for tax_id, accession_list in cobs_accessions.items():
261
+ tax_id_dir = cobs_dir / str(tax_id)
262
+ tax_id_dir.mkdir(parents=True, exist_ok=True)
263
+ for accession in accession_list:
264
+ accession_path = accession_paths[accession]
265
+ shutil.copy(accession_path, tax_id_dir / f"{accession}.fasta")
266
+ for tax_id, accession_list in svm_accessions.items():
267
+ tax_id_dir = svm_dir / str(tax_id)
268
+ tax_id_dir.mkdir(parents=True, exist_ok=True)
269
+ for accession in accession_list:
270
+ accession_path = accession_paths[accession]
271
+ shutil.copy(accession_path, tax_id_dir / f"{accession}.fasta")
272
+
273
+ train_from_directory(
274
+ display_name=genus,
275
+ dir_path=tmp_dir,
276
+ meta=True,
277
+ training_accessions=cobs_accessions,
278
+ svm_accessions=svm_accessions,
279
+ svm_step=svm_step,
280
+ translation_dict=filtered_species_names,
281
+ author=author,
282
+ author_email=author_email,
283
+ )
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: XspecT
3
- Version: 0.2.7
3
+ Version: 0.4.1
4
4
  Summary: Tool to monitor and characterize pathogens using Bloom filters.
5
5
  License: MIT License
6
6
 
@@ -54,34 +54,28 @@ Requires-Dist: sphinx-autobuild; extra == "docs"
54
54
  Provides-Extra: test
55
55
  Requires-Dist: pytest; extra == "test"
56
56
  Requires-Dist: pytest-cov; extra == "test"
57
+ Requires-Dist: httpx; extra == "test"
58
+ Dynamic: license-file
57
59
 
58
60
  # XspecT - Acinetobacter Species Assignment Tool
61
+ <!-- start intro -->
59
62
  ![Test](https://github.com/bionf/xspect2/actions/workflows/test.yml/badge.svg)
60
63
  [![linting: pylint](https://img.shields.io/badge/linting-pylint-yellowgreen)](https://github.com/pylint-dev/pylint)
61
64
  [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
62
65
 
63
- <img src="/docs/img/logo.png" height="50%" width="50%">
64
-
65
- <!-- start intro -->
66
- XspecT is a Python-based tool to taxonomically classify sequence-reads (or assembled genomes) on the species and/or MLST level using [Bloom Filters] and a [Support Vector Machine].
67
- <br/><br/>
68
-
69
- XspecT utilizes the uniqueness of kmers and compares extracted kmers from the input-data to a reference database. Bloom Filter ensure a fast lookup in this process. For a final prediction the results are classified using a Support Vector Machine.
70
- <br/>
66
+ XspecT is a Python-based tool to taxonomically classify sequence-reads (or assembled genomes) on the species and/or MLST level using [kmer indices] and a [Support Vector Machine].
71
67
 
72
- Local extensions of the reference database are supported.
73
- <br/>
68
+ XspecT utilizes the uniqueness of kmers and compares extracted kmers from the input-data to a reference database. Bloom Filter ensure a fast lookup in this process. For a final prediction, the results are classified using a Support Vector Machine.
74
69
 
75
- The tool is available as a web-based application and a smaller command line interface.
70
+ The tool is available as a web-based application and as a command line interface.
76
71
 
77
- [Bloom Filters]: https://en.wikipedia.org/wiki/Bloom_filter
72
+ [kmer indices]: https://arxiv.org/abs/1905.09624
78
73
  [Support Vector Machine]: https://en.wikipedia.org/wiki/Support-vector_machine
79
- [blaOxa-genes]: https://en.wikipedia.org/wiki/Beta-lactamase#OXA_beta-lactamases_(class_D)
80
74
  <!-- end intro -->
81
75
 
82
76
  <!-- start quickstart -->
83
77
  ## Installation
84
- To install Xspect, please download the lastest 64 bit Python version and install the package using pip:
78
+ To install XspecT, please download the lastest 64 bit Python version and install the package using pip:
85
79
  ```
86
80
  pip install xspect
87
81
  ```
@@ -91,23 +85,23 @@ Please note that Windows and Alpine Linux is currently not supported.
91
85
  ### Get the models
92
86
  To download basic pre-trained models, you can use the built-in command:
93
87
  ```
94
- xspect download-models
88
+ xspect models download
95
89
  ```
96
90
  Additional species models can be trained using:
97
91
  ```
98
- xspect train-species you-ncbi-genus-name
92
+ xspect models train ncbi
99
93
  ```
100
94
 
101
95
  ### How to run the web app
102
96
  To run the web app, install and run [XspecT Web](https://github.com/aromberg/xspect-web). Additionally, run XspecT in API mode:
103
97
  ```
104
- xspect api
98
+ xspect web
105
99
  ```
106
100
 
107
101
  ### How to use the XspecT command line interface
108
- Run xspect with the configuration you want to run it with as arguments.
102
+ Run XspecT with the configuration you want to run it with as arguments.
109
103
  ```
110
- xspect classify-species your-genus path/to/your/input-set
104
+ xspect classify species
111
105
  ```
112
106
  For further instructions on how to use the command line interface, please refer to the [documentation] or execute:
113
107
  ```
@@ -0,0 +1,24 @@
1
+ xspect/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ xspect/definitions.py,sha256=fVn_li_s2hriOSGJ69o_H8H-vkw1znvkryhBj7WMnF4,1219
3
+ xspect/download_models.py,sha256=y1wFJZa1xOJfvUP78zKkRs46O-WqKBL90vmo5AYUio0,853
4
+ xspect/fastapi.py,sha256=DOef3MqWPdBmdYBo8Z9SPmWrbJHOsQxQe3GrC4f__Rc,3165
5
+ xspect/file_io.py,sha256=YmfoKEQdHHEi8dO2G5Kt4tSNi5LuWW0VZ74pyYRHiTo,5937
6
+ xspect/main.py,sha256=twIn48wPDFOEejQroYN8JM8a40naEqT_BUgrMAwwYck,12154
7
+ xspect/model_management.py,sha256=UbmAr3YOZ4oy_9iVvApCLstYHGkcmneHEC_yftRIqCI,3010
8
+ xspect/ncbi.py,sha256=sSJO3g8n89Qw6UJjAy13bpjOcIGSquTKNKVHNUMbDeM,10072
9
+ xspect/train.py,sha256=7I7-inWGJe_VDzII9dLZ8U-8SUCZDIrhb-eNOZEyfss,10703
10
+ xspect/mlst_feature/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ xspect/mlst_feature/mlst_helper.py,sha256=3zhhEomkk-qiObbQ82TM_YHuyVCJ7_XgyzsYM_4TS0E,5760
12
+ xspect/mlst_feature/pub_mlst_handler.py,sha256=oss3CkJNt6041p3qnMdOfoX8ZgUfpB93CUim-Yakc9A,5031
13
+ xspect/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ xspect/models/probabilistic_filter_mlst_model.py,sha256=JMc0yBJPo7J9b-GpvhDmzhwWPIKOwatAq0edDgM72PE,11735
15
+ xspect/models/probabilistic_filter_model.py,sha256=l8mhcRgHPso7qIgI56buCnE3ZleO3gPWOZEpgrycOBA,10029
16
+ xspect/models/probabilistic_filter_svm_model.py,sha256=xXimcv3iWnG1JiFyrk6UqkP9hFIxWGDdb__fRdQYwro,6245
17
+ xspect/models/probabilistic_single_filter_model.py,sha256=yxWnCt4IP-3ZRLP4pRA3f2VTHc0_4g17PDCyOFayDDg,4090
18
+ xspect/models/result.py,sha256=ELWiDlQPlxNG7ceLpth60Z_Hb1ZdopDJ3vgHBPgSRm8,3989
19
+ xspect-0.4.1.dist-info/licenses/LICENSE,sha256=bhBGDKIRUVwYIHGOGO5hshzuVHyqFJajvSOA3XXOLKI,1094
20
+ xspect-0.4.1.dist-info/METADATA,sha256=BLTAPyNGEjUxxFUqGvdgyFy8T0p9b8w8IOBbUBvnv28,4477
21
+ xspect-0.4.1.dist-info/WHEEL,sha256=ck4Vq1_RXyvS4Jt6SI0Vz6fyVs4GWg7AINwpsaGEgPE,91
22
+ xspect-0.4.1.dist-info/entry_points.txt,sha256=L7qliX3pIuwupQxpuOSsrBJCSHYPOPNEzH8KZKQGGUw,43
23
+ xspect-0.4.1.dist-info/top_level.txt,sha256=hdoa4cnBv6OVzpyhMmyxpJxEydH5n2lDciy8urc1paE,7
24
+ xspect-0.4.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (76.0.0)
2
+ Generator: setuptools (80.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5