XspecT 0.5.0__tar.gz → 0.5.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of XspecT might be problematic. Click here for more details.

Files changed (119) hide show
  1. {xspect-0.5.0 → xspect-0.5.1}/.github/workflows/test.yml +2 -0
  2. {xspect-0.5.0 → xspect-0.5.1}/PKG-INFO +11 -5
  3. {xspect-0.5.0 → xspect-0.5.1}/README.md +10 -4
  4. {xspect-0.5.0 → xspect-0.5.1}/pyproject.toml +1 -1
  5. {xspect-0.5.0 → xspect-0.5.1}/src/XspecT.egg-info/PKG-INFO +11 -5
  6. xspect-0.5.1/src/xspect/classify.py +67 -0
  7. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/definitions.py +11 -3
  8. xspect-0.5.1/src/xspect/filter_sequences.py +138 -0
  9. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/main.py +52 -32
  10. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/models/probabilistic_filter_model.py +7 -7
  11. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/models/probabilistic_filter_svm_model.py +7 -7
  12. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/models/probabilistic_single_filter_model.py +7 -4
  13. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/ncbi.py +3 -2
  14. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/train.py +17 -10
  15. {xspect-0.5.0 → xspect-0.5.1}/tests/test_cli.py +5 -1
  16. {xspect-0.5.0 → xspect-0.5.1}/tests/test_ncbi.py +5 -1
  17. {xspect-0.5.0 → xspect-0.5.1}/tests/test_train.py +7 -2
  18. xspect-0.5.0/src/xspect/classify.py +0 -32
  19. xspect-0.5.0/src/xspect/filter_sequences.py +0 -56
  20. {xspect-0.5.0 → xspect-0.5.1}/.github/workflows/black.yml +0 -0
  21. {xspect-0.5.0 → xspect-0.5.1}/.github/workflows/docs.yml +0 -0
  22. {xspect-0.5.0 → xspect-0.5.1}/.github/workflows/pylint.yml +0 -0
  23. {xspect-0.5.0 → xspect-0.5.1}/.github/workflows/pypi.yml +0 -0
  24. {xspect-0.5.0 → xspect-0.5.1}/.gitignore +0 -0
  25. {xspect-0.5.0 → xspect-0.5.1}/LICENSE +0 -0
  26. {xspect-0.5.0 → xspect-0.5.1}/docs/cli.md +0 -0
  27. {xspect-0.5.0 → xspect-0.5.1}/docs/contributing.md +0 -0
  28. {xspect-0.5.0 → xspect-0.5.1}/docs/index.md +0 -0
  29. {xspect-0.5.0 → xspect-0.5.1}/docs/quickstart.md +0 -0
  30. {xspect-0.5.0 → xspect-0.5.1}/docs/understanding.md +0 -0
  31. {xspect-0.5.0 → xspect-0.5.1}/docs/web.md +0 -0
  32. {xspect-0.5.0 → xspect-0.5.1}/mkdocs.yml +0 -0
  33. {xspect-0.5.0 → xspect-0.5.1}/setup.cfg +0 -0
  34. {xspect-0.5.0 → xspect-0.5.1}/src/XspecT.egg-info/SOURCES.txt +0 -0
  35. {xspect-0.5.0 → xspect-0.5.1}/src/XspecT.egg-info/dependency_links.txt +0 -0
  36. {xspect-0.5.0 → xspect-0.5.1}/src/XspecT.egg-info/entry_points.txt +0 -0
  37. {xspect-0.5.0 → xspect-0.5.1}/src/XspecT.egg-info/requires.txt +0 -0
  38. {xspect-0.5.0 → xspect-0.5.1}/src/XspecT.egg-info/top_level.txt +0 -0
  39. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/__init__.py +0 -0
  40. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/download_models.py +0 -0
  41. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/file_io.py +0 -0
  42. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/mlst_feature/__init__.py +0 -0
  43. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/mlst_feature/mlst_helper.py +0 -0
  44. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/mlst_feature/pub_mlst_handler.py +0 -0
  45. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/model_management.py +0 -0
  46. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/models/__init__.py +0 -0
  47. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/models/probabilistic_filter_mlst_model.py +0 -0
  48. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/models/result.py +0 -0
  49. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/web.py +0 -0
  50. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/.gitignore +0 -0
  51. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/README.md +0 -0
  52. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/components.json +0 -0
  53. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/dist/assets/index-CMG4V7fZ.js +0 -0
  54. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/dist/assets/index-jIKg1HIy.css +0 -0
  55. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/dist/index.html +0 -0
  56. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/dist/vite.svg +0 -0
  57. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/eslint.config.js +0 -0
  58. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/index.html +0 -0
  59. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/package-lock.json +0 -0
  60. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/package.json +0 -0
  61. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/pnpm-lock.yaml +0 -0
  62. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/public/vite.svg +0 -0
  63. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/App.tsx +0 -0
  64. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/api.tsx +0 -0
  65. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/assets/react.svg +0 -0
  66. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/components/classification-form.tsx +0 -0
  67. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/components/classify.tsx +0 -0
  68. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/components/data-table.tsx +0 -0
  69. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/components/dropdown-checkboxes.tsx +0 -0
  70. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/components/dropdown-slider.tsx +0 -0
  71. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/components/filter-form.tsx +0 -0
  72. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/components/filter.tsx +0 -0
  73. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/components/header.tsx +0 -0
  74. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/components/landing.tsx +0 -0
  75. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/components/models-details.tsx +0 -0
  76. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/components/models.tsx +0 -0
  77. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/components/result-chart.tsx +0 -0
  78. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/components/result.tsx +0 -0
  79. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/components/spinner.tsx +0 -0
  80. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/components/ui/accordion.tsx +0 -0
  81. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/components/ui/button.tsx +0 -0
  82. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/components/ui/card.tsx +0 -0
  83. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/components/ui/chart.tsx +0 -0
  84. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/components/ui/command.tsx +0 -0
  85. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/components/ui/dialog.tsx +0 -0
  86. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/components/ui/dropdown-menu.tsx +0 -0
  87. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/components/ui/file-upload.tsx +0 -0
  88. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/components/ui/form.tsx +0 -0
  89. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/components/ui/input.tsx +0 -0
  90. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/components/ui/label.tsx +0 -0
  91. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/components/ui/navigation-menu.tsx +0 -0
  92. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/components/ui/popover.tsx +0 -0
  93. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/components/ui/select.tsx +0 -0
  94. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/components/ui/separator.tsx +0 -0
  95. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/components/ui/slider.tsx +0 -0
  96. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/components/ui/switch.tsx +0 -0
  97. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/components/ui/table.tsx +0 -0
  98. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/components/ui/tabs.tsx +0 -0
  99. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/index.css +0 -0
  100. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/lib/utils.ts +0 -0
  101. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/main.tsx +0 -0
  102. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/types.tsx +0 -0
  103. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/utils.tsx +0 -0
  104. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/src/vite-env.d.ts +0 -0
  105. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/tsconfig.app.json +0 -0
  106. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/tsconfig.json +0 -0
  107. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/tsconfig.node.json +0 -0
  108. {xspect-0.5.0 → xspect-0.5.1}/src/xspect/xspect-web/vite.config.ts +0 -0
  109. {xspect-0.5.0 → xspect-0.5.1}/tests/__init__.py +0 -0
  110. {xspect-0.5.0 → xspect-0.5.1}/tests/conftest.py +0 -0
  111. {xspect-0.5.0 → xspect-0.5.1}/tests/test_file_io.py +0 -0
  112. {xspect-0.5.0 → xspect-0.5.1}/tests/test_model_management.py +0 -0
  113. {xspect-0.5.0 → xspect-0.5.1}/tests/test_model_result.py +0 -0
  114. {xspect-0.5.0 → xspect-0.5.1}/tests/test_probabilisitc_filter_mlst_model.py +0 -0
  115. {xspect-0.5.0 → xspect-0.5.1}/tests/test_probabilistic_filter_model.py +0 -0
  116. {xspect-0.5.0 → xspect-0.5.1}/tests/test_probabilistic_filter_svm_model.py +0 -0
  117. {xspect-0.5.0 → xspect-0.5.1}/tests/test_probabilistic_single_filter_model.py +0 -0
  118. {xspect-0.5.0 → xspect-0.5.1}/tests/test_pub_mlst_handler.py +0 -0
  119. {xspect-0.5.0 → xspect-0.5.1}/tests/test_web.py +0 -0
@@ -28,6 +28,8 @@ jobs:
28
28
  run: |
29
29
  xspect models download
30
30
  - name: Test with pytest
31
+ env:
32
+ NCBI_API_KEY: ${{ secrets.NCBI_API_KEY }}
31
33
  run: |
32
34
  pytest --cov --retries 2 --retry-delay 5
33
35
  - name: Upload coverage reports to Codecov
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: XspecT
3
- Version: 0.5.0
3
+ Version: 0.5.1
4
4
  Summary: Tool to monitor and characterize pathogens using Bloom filters.
5
5
  License: MIT License
6
6
 
@@ -55,7 +55,7 @@ Requires-Dist: pytest-retry; extra == "test"
55
55
  Requires-Dist: httpx; extra == "test"
56
56
  Dynamic: license-file
57
57
 
58
- # XspecT - Acinetobacter Species Assignment Tool
58
+ # XspecT
59
59
  <!-- start intro -->
60
60
  ![Test](https://github.com/bionf/xspect2/actions/workflows/test.yml/badge.svg)
61
61
  [![linting: pylint](https://img.shields.io/badge/linting-pylint-yellowgreen)](https://github.com/pylint-dev/pylint)
@@ -63,7 +63,7 @@ Dynamic: license-file
63
63
 
64
64
  XspecT is a Python-based tool to taxonomically classify sequence-reads (or assembled genomes) on the species and/or MLST level using [kmer indices] and a [Support Vector Machine].
65
65
 
66
- XspecT utilizes the uniqueness of kmers and compares extracted kmers from the input-data to a reference database. Bloom Filter ensure a fast lookup in this process. For a final prediction, the results are classified using a Support Vector Machine.
66
+ XspecT utilizes the uniqueness of kmers and compares extracted kmers from the input-data to a kmer index. Probablistic data structures ensure a fast lookup in this process. For a final prediction, the results are classified using a Support Vector Machine.
67
67
 
68
68
  The tool is available as a web-based application and as a command line interface.
69
69
 
@@ -91,16 +91,22 @@ xspect models train ncbi
91
91
  ```
92
92
 
93
93
  ### How to run the web app
94
- To run the web app, install and run [XspecT Web](https://github.com/aromberg/xspect-web). Additionally, run XspecT in API mode:
94
+ To run the web app, simply execute:
95
95
  ```
96
96
  xspect web
97
97
  ```
98
98
 
99
+ This will start a local web server. You can access the web app by navigating to `http://localhost:8000` in your web browser.
100
+
99
101
  ### How to use the XspecT command line interface
100
- Run XspecT with the configuration you want to run it with as arguments.
102
+ To use the XspecT command line interface, execute `xspect` with the desired subcommand and parameters.
103
+
104
+ **Example**:
101
105
  ```
102
106
  xspect classify species
103
107
  ```
108
+
109
+ If you do not provide the required parameters, the command line interface will prompt you for them.
104
110
  For further instructions on how to use the command line interface, please refer to the [documentation] or execute:
105
111
  ```
106
112
  xspect --help
@@ -1,4 +1,4 @@
1
- # XspecT - Acinetobacter Species Assignment Tool
1
+ # XspecT
2
2
  <!-- start intro -->
3
3
  ![Test](https://github.com/bionf/xspect2/actions/workflows/test.yml/badge.svg)
4
4
  [![linting: pylint](https://img.shields.io/badge/linting-pylint-yellowgreen)](https://github.com/pylint-dev/pylint)
@@ -6,7 +6,7 @@
6
6
 
7
7
  XspecT is a Python-based tool to taxonomically classify sequence-reads (or assembled genomes) on the species and/or MLST level using [kmer indices] and a [Support Vector Machine].
8
8
 
9
- XspecT utilizes the uniqueness of kmers and compares extracted kmers from the input-data to a reference database. Bloom Filter ensure a fast lookup in this process. For a final prediction, the results are classified using a Support Vector Machine.
9
+ XspecT utilizes the uniqueness of kmers and compares extracted kmers from the input-data to a kmer index. Probablistic data structures ensure a fast lookup in this process. For a final prediction, the results are classified using a Support Vector Machine.
10
10
 
11
11
  The tool is available as a web-based application and as a command line interface.
12
12
 
@@ -34,16 +34,22 @@ xspect models train ncbi
34
34
  ```
35
35
 
36
36
  ### How to run the web app
37
- To run the web app, install and run [XspecT Web](https://github.com/aromberg/xspect-web). Additionally, run XspecT in API mode:
37
+ To run the web app, simply execute:
38
38
  ```
39
39
  xspect web
40
40
  ```
41
41
 
42
+ This will start a local web server. You can access the web app by navigating to `http://localhost:8000` in your web browser.
43
+
42
44
  ### How to use the XspecT command line interface
43
- Run XspecT with the configuration you want to run it with as arguments.
45
+ To use the XspecT command line interface, execute `xspect` with the desired subcommand and parameters.
46
+
47
+ **Example**:
44
48
  ```
45
49
  xspect classify species
46
50
  ```
51
+
52
+ If you do not provide the required parameters, the command line interface will prompt you for them.
47
53
  For further instructions on how to use the command line interface, please refer to the [documentation] or execute:
48
54
  ```
49
55
  xspect --help
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "XspecT"
3
- version = "0.5.0"
3
+ version = "0.5.1"
4
4
  description = "Tool to monitor and characterize pathogens using Bloom filters."
5
5
  readme = {file = "README.md", content-type = "text/markdown"}
6
6
  license = {file = "LICENSE"}
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: XspecT
3
- Version: 0.5.0
3
+ Version: 0.5.1
4
4
  Summary: Tool to monitor and characterize pathogens using Bloom filters.
5
5
  License: MIT License
6
6
 
@@ -55,7 +55,7 @@ Requires-Dist: pytest-retry; extra == "test"
55
55
  Requires-Dist: httpx; extra == "test"
56
56
  Dynamic: license-file
57
57
 
58
- # XspecT - Acinetobacter Species Assignment Tool
58
+ # XspecT
59
59
  <!-- start intro -->
60
60
  ![Test](https://github.com/bionf/xspect2/actions/workflows/test.yml/badge.svg)
61
61
  [![linting: pylint](https://img.shields.io/badge/linting-pylint-yellowgreen)](https://github.com/pylint-dev/pylint)
@@ -63,7 +63,7 @@ Dynamic: license-file
63
63
 
64
64
  XspecT is a Python-based tool to taxonomically classify sequence-reads (or assembled genomes) on the species and/or MLST level using [kmer indices] and a [Support Vector Machine].
65
65
 
66
- XspecT utilizes the uniqueness of kmers and compares extracted kmers from the input-data to a reference database. Bloom Filter ensure a fast lookup in this process. For a final prediction, the results are classified using a Support Vector Machine.
66
+ XspecT utilizes the uniqueness of kmers and compares extracted kmers from the input-data to a kmer index. Probablistic data structures ensure a fast lookup in this process. For a final prediction, the results are classified using a Support Vector Machine.
67
67
 
68
68
  The tool is available as a web-based application and as a command line interface.
69
69
 
@@ -91,16 +91,22 @@ xspect models train ncbi
91
91
  ```
92
92
 
93
93
  ### How to run the web app
94
- To run the web app, install and run [XspecT Web](https://github.com/aromberg/xspect-web). Additionally, run XspecT in API mode:
94
+ To run the web app, simply execute:
95
95
  ```
96
96
  xspect web
97
97
  ```
98
98
 
99
+ This will start a local web server. You can access the web app by navigating to `http://localhost:8000` in your web browser.
100
+
99
101
  ### How to use the XspecT command line interface
100
- Run XspecT with the configuration you want to run it with as arguments.
102
+ To use the XspecT command line interface, execute `xspect` with the desired subcommand and parameters.
103
+
104
+ **Example**:
101
105
  ```
102
106
  xspect classify species
103
107
  ```
108
+
109
+ If you do not provide the required parameters, the command line interface will prompt you for them.
104
110
  For further instructions on how to use the command line interface, please refer to the [documentation] or execute:
105
111
  ```
106
112
  xspect --help
@@ -0,0 +1,67 @@
1
+ from pathlib import Path
2
+ from xspect.mlst_feature.mlst_helper import pick_scheme_from_models_dir
3
+ import xspect.model_management as mm
4
+ from xspect.models.probabilistic_filter_mlst_model import (
5
+ ProbabilisticFilterMlstSchemeModel,
6
+ )
7
+ from xspect.definitions import fasta_endings, fastq_endings
8
+
9
+
10
+ def classify_genus(
11
+ model_genus: str, input_path: Path, output_path: Path, step: int = 1
12
+ ):
13
+ """Classify the input file using the genus model."""
14
+ model = mm.get_genus_model(model_genus)
15
+
16
+ input_paths = []
17
+ input_is_dir = input_path.is_dir()
18
+ ending_wildcards = [f"*.{ending}" for ending in fasta_endings + fastq_endings]
19
+
20
+ if input_is_dir:
21
+ input_paths = [p for e in ending_wildcards for p in input_path.glob(e)]
22
+ elif input_path.is_file():
23
+ input_paths = [input_path]
24
+
25
+ for idx, current_path in enumerate(input_paths):
26
+ result = model.predict(current_path, step=step)
27
+ result.input_source = current_path.name
28
+ output_name = (
29
+ f"{output_path.stem}_{idx+1}{output_path.suffix}"
30
+ if input_is_dir
31
+ else output_path.name
32
+ )
33
+ result.save(output_path.parent / output_name)
34
+ print(f"Saved result as {output_name}")
35
+
36
+
37
+ def classify_species(model_genus, input_path, output_path, step=1):
38
+ """Classify the input file using the species model."""
39
+ model = mm.get_species_model(model_genus)
40
+
41
+ input_paths = []
42
+ input_is_dir = input_path.is_dir()
43
+ ending_wildcards = [f"*.{ending}" for ending in fasta_endings + fastq_endings]
44
+
45
+ if input_is_dir:
46
+ input_paths = [p for e in ending_wildcards for p in input_path.glob(e)]
47
+ elif input_path.is_file():
48
+ input_paths = [input_path]
49
+
50
+ for idx, current_path in enumerate(input_paths):
51
+ result = model.predict(current_path, step=step)
52
+ result.input_source = current_path.name
53
+ output_name = (
54
+ f"{output_path.stem}_{idx+1}{output_path.suffix}"
55
+ if input_is_dir
56
+ else output_path.name
57
+ )
58
+ result.save(output_path.parent / output_name)
59
+ print(f"Saved result as {output_name}")
60
+
61
+
62
+ def classify_mlst(input_path, output_path):
63
+ """Classify the input file using the MLST model."""
64
+ scheme_path = pick_scheme_from_models_dir()
65
+ model = ProbabilisticFilterMlstSchemeModel.load(scheme_path)
66
+ result = model.predict(scheme_path, input_path)
67
+ result.save(output_path)
@@ -9,9 +9,17 @@ fastq_endings = ["fastq", "fq"]
9
9
 
10
10
  def get_xspect_root_path():
11
11
  """Return the root path for XspecT data."""
12
- root_path = Path(getcwd()) / "xspect-data"
13
- root_path.mkdir(exist_ok=True, parents=True)
14
- return root_path
12
+
13
+ home_based_dir = Path.home() / "xspect-data"
14
+ if home_based_dir.exists():
15
+ return home_based_dir
16
+
17
+ cwd_based_dir = Path(getcwd()) / "xspect-data"
18
+ if cwd_based_dir.exists():
19
+ return cwd_based_dir
20
+
21
+ home_based_dir.mkdir(exist_ok=True, parents=True)
22
+ return home_based_dir
15
23
 
16
24
 
17
25
  def get_xspect_model_path():
@@ -0,0 +1,138 @@
1
+ from pathlib import Path
2
+ from xspect.model_management import get_genus_model, get_species_model
3
+ from xspect.file_io import filter_sequences
4
+ from xspect.definitions import fasta_endings, fastq_endings
5
+
6
+
7
+ def filter_species(
8
+ model_genus: str,
9
+ model_species: str,
10
+ input_path: Path,
11
+ output_path: Path,
12
+ threshold: float,
13
+ classification_output_path: Path | None = None,
14
+ ):
15
+ """Filter sequences by species.
16
+ This function filters sequences from the input file based on the species model.
17
+ It uses the genus model to identify the genus of the sequences and then applies
18
+ the species model to filter the sequences.
19
+
20
+ Args:
21
+ model_genus (str): The genus model slug.
22
+ model_species (str): The species model slug.
23
+ input_path (Path): The path to the input file containing sequences.
24
+ output_path (Path): The path to the output file where filtered sequences will be saved.
25
+ above this threshold will be included in the output file. A threshold of -1 will
26
+ include only sequences if the species score is the highest among the
27
+ available species scores.
28
+ classification_output_path (Path): Optional path to save the classification results.
29
+ threshold (float): The threshold for filtering sequences. Only sequences with a score
30
+ above this threshold will be included in the output file. A threshold of -1 will
31
+ include only sequences if the species score is the highest among the
32
+ available species scores.
33
+ """
34
+ species_model = get_species_model(model_genus)
35
+
36
+ input_paths = []
37
+ input_is_dir = input_path.is_dir()
38
+ ending_wildcards = [f"*.{ending}" for ending in fasta_endings + fastq_endings]
39
+
40
+ if input_is_dir:
41
+ input_paths = [p for e in ending_wildcards for p in input_path.glob(e)]
42
+ elif input_path.is_file():
43
+ input_paths = [input_path]
44
+
45
+ for idx, current_path in enumerate(input_paths):
46
+ result = species_model.predict(current_path)
47
+ result.input_source = current_path.name
48
+
49
+ if classification_output_path:
50
+ classification_output_name = (
51
+ f"{classification_output_path.stem}_{idx+1}{classification_output_path.suffix}"
52
+ if input_is_dir
53
+ else classification_output_path.name
54
+ )
55
+ result.save(classification_output_path.parent / classification_output_name)
56
+ print(
57
+ f"Saved classification results from {current_path.name} as {classification_output_name}"
58
+ )
59
+
60
+ included_ids = result.get_filtered_subsequence_labels(model_species, threshold)
61
+ if not included_ids:
62
+ print(f"No sequences found for the given species in {current_path.name}.")
63
+ continue
64
+ output_name = (
65
+ f"{output_path.stem}_{idx+1}{output_path.suffix}"
66
+ if input_is_dir
67
+ else output_path.name
68
+ )
69
+ filter_sequences(
70
+ current_path,
71
+ output_path.parent / output_name,
72
+ included_ids,
73
+ )
74
+ print(f"Saved filtered sequences from {current_path.name} as {output_name}")
75
+
76
+
77
+ def filter_genus(
78
+ model_genus: str,
79
+ input_path: Path,
80
+ output_path: Path,
81
+ threshold: float,
82
+ classification_output_path: Path | None = None,
83
+ ):
84
+ """Filter sequences by genus.
85
+ This function filters sequences from the input file based on the genus model.
86
+ It uses the genus model to identify the genus of the sequences and then applies
87
+ the filtering based on the provided threshold.
88
+
89
+ Args:
90
+ model_genus (str): The genus model slug.
91
+ input_path (Path): The path to the input file containing sequences.
92
+ output_path (Path): The path to the output file where filtered sequences will be saved.
93
+ threshold (float): The threshold for filtering sequences. Only sequences with a score
94
+ above this threshold will be included in the output file.
95
+ classification_output_path (Path): Optional path to save the classification results.
96
+
97
+ """
98
+ genus_model = get_genus_model(model_genus)
99
+
100
+ input_paths = []
101
+ input_is_dir = input_path.is_dir()
102
+ ending_wildcards = [f"*.{ending}" for ending in fasta_endings + fastq_endings]
103
+
104
+ if input_is_dir:
105
+ input_paths = [p for e in ending_wildcards for p in input_path.glob(e)]
106
+ elif input_path.is_file():
107
+ input_paths = [input_path]
108
+
109
+ for idx, current_path in enumerate(input_paths):
110
+ result = genus_model.predict(current_path)
111
+ result.input_source = current_path.name
112
+
113
+ if classification_output_path:
114
+ classification_output_name = (
115
+ f"{classification_output_path.stem}_{idx+1}{classification_output_path.suffix}"
116
+ if input_is_dir
117
+ else classification_output_path.name
118
+ )
119
+ result.save(classification_output_path.parent / classification_output_name)
120
+ print(
121
+ f"Saved classification results from {current_path.name} as {classification_output_name}"
122
+ )
123
+
124
+ included_ids = result.get_filtered_subsequence_labels(model_genus, threshold)
125
+ if not included_ids:
126
+ print(f"No sequences found for the given genus in {current_path.name}.")
127
+ continue
128
+ output_name = (
129
+ f"{output_path.stem}_{idx+1}{output_path.suffix}"
130
+ if input_is_dir
131
+ else output_path.name
132
+ )
133
+ filter_sequences(
134
+ current_path,
135
+ output_path.parent / output_name,
136
+ included_ids,
137
+ )
138
+ print(f"Saved filtered sequences from {current_path.name} as {output_name}")
@@ -7,12 +7,12 @@ import uvicorn
7
7
  from xspect import classify
8
8
  from xspect.web import app
9
9
  from xspect.download_models import download_test_models
10
- from xspect.file_io import filter_sequences
10
+ from xspect import filter_sequences
11
11
  from xspect.train import train_from_directory, train_from_ncbi
12
12
  from xspect.definitions import (
13
13
  get_xspect_model_path,
14
14
  )
15
- from xspect.mlst_feature.mlst_helper import pick_scheme, pick_scheme_from_models_dir
15
+ from xspect.mlst_feature.mlst_helper import pick_scheme
16
16
  from xspect.mlst_feature.pub_mlst_handler import PubMLSTHandler
17
17
  from xspect.models.probabilistic_filter_mlst_model import (
18
18
  ProbabilisticFilterMlstSchemeModel,
@@ -211,19 +211,19 @@ def classify_seqs():
211
211
  help="Path to FASTA or FASTQ file for classification.",
212
212
  type=click.Path(exists=True, dir_okay=True, file_okay=True),
213
213
  prompt=True,
214
+ default=Path("."),
214
215
  )
215
216
  @click.option(
216
217
  "-o",
217
218
  "--output-path",
218
219
  help="Path to the output file.",
219
- type=click.Path(dir_okay=True, file_okay=True),
220
+ type=click.Path(dir_okay=False, file_okay=True),
220
221
  default=Path(".") / f"result_{uuid4()}.json",
221
222
  )
222
223
  def classify_genus(model_genus, input_path, output_path):
223
224
  """Classify samples using a genus model."""
224
225
  click.echo("Classifying...")
225
226
  classify.classify_genus(model_genus, Path(input_path), Path(output_path))
226
- click.echo(f"Result saved as {output_path}.")
227
227
 
228
228
 
229
229
  @classify_seqs.command(
@@ -244,12 +244,13 @@ def classify_genus(model_genus, input_path, output_path):
244
244
  help="Path to FASTA or FASTQ file for classification.",
245
245
  type=click.Path(exists=True, dir_okay=True, file_okay=True),
246
246
  prompt=True,
247
+ default=Path("."),
247
248
  )
248
249
  @click.option(
249
250
  "-o",
250
251
  "--output-path",
251
252
  help="Path to the output file.",
252
- type=click.Path(dir_okay=True, file_okay=True),
253
+ type=click.Path(dir_okay=False, file_okay=True),
253
254
  default=Path(".") / f"result_{uuid4()}.json",
254
255
  )
255
256
  @click.option(
@@ -264,7 +265,6 @@ def classify_species(model_genus, input_path, output_path, sparse_sampling_step)
264
265
  classify.classify_species(
265
266
  model_genus, Path(input_path), Path(output_path), sparse_sampling_step
266
267
  )
267
- click.echo(f"Result saved as {output_path}.")
268
268
 
269
269
 
270
270
  @classify_seqs.command(
@@ -275,15 +275,14 @@ def classify_species(model_genus, input_path, output_path, sparse_sampling_step)
275
275
  "-i",
276
276
  "--input-path",
277
277
  help="Path to FASTA-file for mlst identification.",
278
- type=click.Path(exists=True, dir_okay=True, file_okay=True),
278
+ type=click.Path(exists=True, dir_okay=False, file_okay=True),
279
279
  prompt=True,
280
280
  )
281
281
  @click.option(
282
282
  "-o",
283
283
  "--output-path",
284
284
  help="Path to the output file.",
285
- type=click.Path(dir_okay=True, file_okay=True),
286
- default=Path(".") / f"result_{uuid4()}.json",
285
+ type=click.Path(dir_okay=False, file_okay=True),
287
286
  )
288
287
  def classify_mlst(input_path, output_path):
289
288
  """MLST classify a sample."""
@@ -321,37 +320,42 @@ def filter_seqs():
321
320
  help="Path to FASTA or FASTQ file for classification.",
322
321
  type=click.Path(exists=True, dir_okay=True, file_okay=True),
323
322
  prompt=True,
323
+ default=Path("."),
324
324
  )
325
325
  @click.option(
326
326
  "-o",
327
327
  "--output-path",
328
328
  help="Path to the output file.",
329
- type=click.Path(dir_okay=True, file_okay=True),
329
+ type=click.Path(dir_okay=False, file_okay=True),
330
330
  prompt=True,
331
+ default=Path(".") / f"genus_filtered_{uuid4()}.fasta",
331
332
  )
332
333
  @click.option(
334
+ "--classification-output-path",
335
+ help="Optional path to the classification output file.",
336
+ type=click.Path(dir_okay=False, file_okay=True),
337
+ )
338
+ @click.option(
339
+ "-t",
333
340
  "--threshold",
334
- type=float,
341
+ type=click.FloatRange(0, 1),
335
342
  help="Threshold for filtering (default: 0.7).",
336
343
  default=0.7,
337
344
  prompt=True,
338
345
  )
339
- def filter_genus(model_genus, input_path, output_path, threshold):
346
+ def filter_genus(
347
+ model_genus, input_path, output_path, classification_output_path, threshold
348
+ ):
340
349
  """Filter samples using a genus model."""
341
350
  click.echo("Filtering...")
342
- genus_model = get_genus_model(model_genus)
343
- result = genus_model.predict(Path(input_path))
344
- included_ids = result.get_filtered_subsequence_labels(model_genus, threshold)
345
- if not included_ids:
346
- click.echo("No sequences found for the given genus.")
347
- return
348
351
 
349
- filter_sequences(
352
+ filter_sequences.filter_genus(
353
+ model_genus,
350
354
  Path(input_path),
351
355
  Path(output_path),
352
- included_ids=included_ids,
356
+ threshold,
357
+ Path(classification_output_path) if classification_output_path else None,
353
358
  )
354
- click.echo(f"Filtered sequences saved at {output_path}.")
355
359
 
356
360
 
357
361
  @filter_seqs.command(
@@ -378,24 +382,44 @@ def filter_genus(model_genus, input_path, output_path, threshold):
378
382
  help="Path to FASTA or FASTQ file for classification.",
379
383
  type=click.Path(exists=True, dir_okay=True, file_okay=True),
380
384
  prompt=True,
385
+ default=Path("."),
381
386
  )
382
387
  @click.option(
383
388
  "-o",
384
389
  "--output-path",
385
390
  help="Path to the output file.",
386
- type=click.Path(dir_okay=True, file_okay=True),
391
+ type=click.Path(dir_okay=False, file_okay=True),
387
392
  prompt=True,
393
+ default=Path(".") / f"species_filtered_{uuid4()}.fasta",
394
+ )
395
+ @click.option(
396
+ "--classification-output-path",
397
+ help="Optional path to the classification output file.",
398
+ type=click.Path(dir_okay=False, file_okay=True),
388
399
  )
389
400
  @click.option(
401
+ "-t",
390
402
  "--threshold",
391
403
  type=float,
392
404
  help="Threshold for filtering (default: 0.7). Use -1 to filter for the highest scoring species.",
393
405
  default=0.7,
394
406
  prompt=True,
395
407
  )
396
- def filter_species(model_genus, model_species, input_path, output_path, threshold):
408
+ def filter_species(
409
+ model_genus,
410
+ model_species,
411
+ input_path,
412
+ output_path,
413
+ threshold,
414
+ classification_output_path,
415
+ ):
397
416
  """Filter a sample using the species model."""
398
417
 
418
+ if threshold != -1 and (threshold < 0 or threshold > 1):
419
+ raise click.BadParameter(
420
+ "Threshold must be between 0 and 1, or -1 for filtering by the highest scoring species."
421
+ )
422
+
399
423
  available_species = get_model_metadata(f"{model_genus}-species")["display_names"]
400
424
  available_species = {
401
425
  id: name.replace(f"{model_genus} ", "")
@@ -420,18 +444,14 @@ def filter_species(model_genus, model_species, input_path, output_path, threshol
420
444
  ][0]
421
445
 
422
446
  click.echo("Filtering...")
423
- species_model = get_species_model(model_genus)
424
- result = species_model.predict(Path(input_path))
425
- included_ids = result.get_filtered_subsequence_labels(model_species, threshold)
426
- if not included_ids:
427
- click.echo("No sequences found for the given species.")
428
- return
429
- filter_sequences(
447
+ filter_sequences.filter_species(
448
+ model_genus,
449
+ model_species,
430
450
  Path(input_path),
431
451
  Path(output_path),
432
- included_ids=included_ids,
452
+ threshold,
453
+ Path(classification_output_path) if classification_output_path else None,
433
454
  )
434
- click.echo(f"Filtered sequences saved at {output_path}.")
435
455
 
436
456
 
437
457
  if __name__ == "__main__":
@@ -20,13 +20,13 @@ class ProbabilisticFilterModel:
20
20
  self,
21
21
  k: int,
22
22
  model_display_name: str,
23
- author: str,
24
- author_email: str,
23
+ author: str | None,
24
+ author_email: str | None,
25
25
  model_type: str,
26
26
  base_path: Path,
27
27
  fpr: float = 0.01,
28
28
  num_hashes: int = 7,
29
- training_accessions: dict[str, list[str]] = None,
29
+ training_accessions: dict[str, list[str]] | None = None,
30
30
  ) -> None:
31
31
  if k < 1:
32
32
  raise ValueError("Invalid k value, must be greater than 0")
@@ -49,7 +49,7 @@ class ProbabilisticFilterModel:
49
49
  self.index = None
50
50
  self.training_accessions = training_accessions
51
51
 
52
- def get_cobs_index_path(self) -> Path:
52
+ def get_cobs_index_path(self) -> str:
53
53
  """Returns the path to the cobs index"""
54
54
  return str(self.base_path / self.slug() / "index.cobs_classic")
55
55
 
@@ -76,8 +76,8 @@ class ProbabilisticFilterModel:
76
76
  def fit(
77
77
  self,
78
78
  dir_path: Path,
79
- display_names: dict = None,
80
- training_accessions: dict[str, list[str]] = None,
79
+ display_names: dict | None = None,
80
+ training_accessions: dict[str, list[str]] | None = None,
81
81
  ) -> None:
82
82
  """Adds filters to the model"""
83
83
 
@@ -123,7 +123,7 @@ class ProbabilisticFilterModel:
123
123
  self.index = cobs.Search(self.get_cobs_index_path(), True)
124
124
 
125
125
  def calculate_hits(
126
- self, sequence: Seq, filter_ids: list[str] = None, step: int = 1
126
+ self, sequence: Seq, filter_ids: list[str] | None = None, step: int = 1
127
127
  ) -> dict:
128
128
  """Calculates the hits for a sequence"""
129
129
 
@@ -21,16 +21,16 @@ class ProbabilisticFilterSVMModel(ProbabilisticFilterModel):
21
21
  self,
22
22
  k: int,
23
23
  model_display_name: str,
24
- author: str,
25
- author_email: str,
24
+ author: str | None,
25
+ author_email: str | None,
26
26
  model_type: str,
27
27
  base_path: Path,
28
28
  kernel: str,
29
29
  c: float,
30
30
  fpr: float = 0.01,
31
31
  num_hashes: int = 7,
32
- training_accessions: dict[str, list[str]] = None,
33
- svm_accessions: dict[str, list[str]] = None,
32
+ training_accessions: dict[str, list[str]] | None = None,
33
+ svm_accessions: dict[str, list[str]] | None = None,
34
34
  ) -> None:
35
35
  super().__init__(
36
36
  k=k,
@@ -64,10 +64,10 @@ class ProbabilisticFilterSVMModel(ProbabilisticFilterModel):
64
64
  self,
65
65
  dir_path: Path,
66
66
  svm_path: Path,
67
- display_names: dict = None,
67
+ display_names: dict[str, str] | None = None,
68
68
  svm_step: int = 1,
69
- training_accessions: list[str] = None,
70
- svm_accessions: list[str] = None,
69
+ training_accessions: dict[str, list[str]] | None = None,
70
+ svm_accessions: dict[str, list[str]] | None = None,
71
71
  ) -> None:
72
72
  """Fit the SVM to the sequences and labels"""
73
73