moose-fs 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. moose_fs-0.1.0/.github/workflows/doc.yml +71 -0
  2. moose_fs-0.1.0/.github/workflows/publish.yml +53 -0
  3. moose_fs-0.1.0/.github/workflows/tests.yml +64 -0
  4. moose_fs-0.1.0/.gitignore +48 -0
  5. moose_fs-0.1.0/.pre-commit-config.yaml +6 -0
  6. moose_fs-0.1.0/LICENSE +21 -0
  7. moose_fs-0.1.0/PKG-INFO +232 -0
  8. moose_fs-0.1.0/README.md +190 -0
  9. moose_fs-0.1.0/docs/Makefile +20 -0
  10. moose_fs-0.1.0/docs/make.bat +36 -0
  11. moose_fs-0.1.0/docs/source/api_reference.rst +13 -0
  12. moose_fs-0.1.0/docs/source/conf.py +26 -0
  13. moose_fs-0.1.0/docs/source/core.rst +23 -0
  14. moose_fs-0.1.0/docs/source/feature_selection_pipeline.rst +8 -0
  15. moose_fs-0.1.0/docs/source/feature_selectors.rst +58 -0
  16. moose_fs-0.1.0/docs/source/index.rst +15 -0
  17. moose_fs-0.1.0/docs/source/installation.rst +27 -0
  18. moose_fs-0.1.0/docs/source/introduction.rst +22 -0
  19. moose_fs-0.1.0/docs/source/mergers.rst +33 -0
  20. moose_fs-0.1.0/docs/source/metrics.rst +13 -0
  21. moose_fs-0.1.0/docs/source/usage.rst +54 -0
  22. moose_fs-0.1.0/moosefs/__init__.py +6 -0
  23. moose_fs-0.1.0/moosefs/core/__init__.py +6 -0
  24. moose_fs-0.1.0/moosefs/core/data_processor.py +319 -0
  25. moose_fs-0.1.0/moosefs/core/feature.py +44 -0
  26. moose_fs-0.1.0/moosefs/core/novovicova.py +60 -0
  27. moose_fs-0.1.0/moosefs/core/pareto.py +90 -0
  28. moose_fs-0.1.0/moosefs/feature_selection_pipeline.py +548 -0
  29. moose_fs-0.1.0/moosefs/feature_selectors/__init__.py +26 -0
  30. moose_fs-0.1.0/moosefs/feature_selectors/base_selector.py +38 -0
  31. moose_fs-0.1.0/moosefs/feature_selectors/default_variance.py +21 -0
  32. moose_fs-0.1.0/moosefs/feature_selectors/elastic_net_selector.py +75 -0
  33. moose_fs-0.1.0/moosefs/feature_selectors/f_statistic_selector.py +42 -0
  34. moose_fs-0.1.0/moosefs/feature_selectors/lasso_selector.py +46 -0
  35. moose_fs-0.1.0/moosefs/feature_selectors/mrmr_selector.py +57 -0
  36. moose_fs-0.1.0/moosefs/feature_selectors/mutual_info_selector.py +45 -0
  37. moose_fs-0.1.0/moosefs/feature_selectors/random_forest_selector.py +48 -0
  38. moose_fs-0.1.0/moosefs/feature_selectors/svm_selector.py +50 -0
  39. moose_fs-0.1.0/moosefs/feature_selectors/variance_selectors.py +16 -0
  40. moose_fs-0.1.0/moosefs/feature_selectors/xgboost_selector.py +44 -0
  41. moose_fs-0.1.0/moosefs/merging_strategies/__init__.py +17 -0
  42. moose_fs-0.1.0/moosefs/merging_strategies/arithmetic_mean_merger.py +46 -0
  43. moose_fs-0.1.0/moosefs/merging_strategies/base_merger.py +64 -0
  44. moose_fs-0.1.0/moosefs/merging_strategies/borda_merger.py +46 -0
  45. moose_fs-0.1.0/moosefs/merging_strategies/consensus_merger.py +80 -0
  46. moose_fs-0.1.0/moosefs/merging_strategies/l2_norm_merger.py +42 -0
  47. moose_fs-0.1.0/moosefs/merging_strategies/union_of_intersections_merger.py +89 -0
  48. moose_fs-0.1.0/moosefs/metrics/__init__.py +23 -0
  49. moose_fs-0.1.0/moosefs/metrics/performance_metrics.py +239 -0
  50. moose_fs-0.1.0/moosefs/metrics/stability_metrics.py +49 -0
  51. moose_fs-0.1.0/moosefs/utils.py +161 -0
  52. moose_fs-0.1.0/pyproject.toml +114 -0
  53. moose_fs-0.1.0/scripts/config.yml +92 -0
  54. moose_fs-0.1.0/scripts/main.py +163 -0
  55. moose_fs-0.1.0/scripts/utils.py +186 -0
  56. moose_fs-0.1.0/tests/__init__.py +0 -0
  57. moose_fs-0.1.0/tests/config_test.yml +34 -0
  58. moose_fs-0.1.0/tests/test_base_merger_validation.py +16 -0
  59. moose_fs-0.1.0/tests/test_consensus_merger.py +33 -0
  60. moose_fs-0.1.0/tests/test_data_processor.py +68 -0
  61. moose_fs-0.1.0/tests/test_feature.py +32 -0
  62. moose_fs-0.1.0/tests/test_fs_methods.py +181 -0
  63. moose_fs-0.1.0/tests/test_main_script.py +75 -0
  64. moose_fs-0.1.0/tests/test_merging.py +346 -0
  65. moose_fs-0.1.0/tests/test_metrics.py +86 -0
  66. moose_fs-0.1.0/tests/test_novovicova.py +74 -0
  67. moose_fs-0.1.0/tests/test_pareto.py +107 -0
  68. moose_fs-0.1.0/tests/test_pipeline_internals.py +91 -0
  69. moose_fs-0.1.0/tests/test_pipeline_method.py +81 -0
  70. moose_fs-0.1.0/tests/test_pipeline_process.py +143 -0
  71. moose_fs-0.1.0/tests/test_reproducibility.py +68 -0
  72. moose_fs-0.1.0/tests/test_stability_metrics.py +28 -0
  73. moose_fs-0.1.0/tests/test_utils.py +34 -0
  74. moose_fs-0.1.0/tests/test_variance_default.py +20 -0
  75. moose_fs-0.1.0/tutorials/advanced_example.ipynb +256 -0
  76. moose_fs-0.1.0/tutorials/getting_started.ipynb +231 -0
  77. moose_fs-0.1.0/tutorials/simple_example.ipynb +213 -0
@@ -0,0 +1,71 @@
1
+ name: Documentation
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ pull_request:
8
+ branches:
9
+ - main
10
+
11
+ concurrency:
12
+ group: docs-${{ github.workflow }}-${{ github.ref }}
13
+ cancel-in-progress: true
14
+
15
+ permissions:
16
+ contents: write
17
+ pages: write
18
+ id-token: write
19
+
20
+ jobs:
21
+ build_and_deploy:
22
+ name: Build Docs
23
+ runs-on: ubuntu-latest
24
+
25
+ steps:
26
+ - name: Checkout code
27
+ uses: actions/checkout@v4
28
+
29
+ - name: Set up uv and Python
30
+ uses: astral-sh/setup-uv@v3
31
+ with:
32
+ python-version: '3.11'
33
+
34
+ - name: Create project venv
35
+ run: uv venv --python 3.11
36
+
37
+ - name: Install docs dependencies
38
+ run: |
39
+ uv pip install -e . -e .[docs]
40
+ uv pip install build
41
+
42
+ - name: Ensure root directory is included in PYTHONPATH
43
+ run: |
44
+ export PYTHONPATH=$PYTHONPATH:$(pwd)
45
+ echo $PYTHONPATH
46
+
47
+ - name: Build package
48
+ run: uv run python -m build
49
+
50
+ - name: Clean build directory
51
+ run: |
52
+ rm -rf docs/build/* || true
53
+
54
+ - name: Build docs (sphinx)
55
+ if: ${{ hashFiles('docs/source/conf.py') != '' }}
56
+ run: |
57
+ export PYTHONPATH=$(pwd)
58
+ echo "Building docs with PYTHONPATH: $PYTHONPATH"
59
+ uv run sphinx-build -b html docs/source docs/build/html -v
60
+
61
+ - name: Deploy to GitHub Pages
62
+ if: ${{ hashFiles('docs/source/conf.py') != '' && github.event_name == 'push' }}
63
+ uses: peaceiris/actions-gh-pages@v3
64
+ with:
65
+ github_token: ${{ secrets.GITHUB_TOKEN }}
66
+ publish_dir: ./docs/build/html
67
+
68
+ - name: Verify Imports
69
+ run: |
70
+ uv run python -c "import moosefs.core.pareto; print('moosefs.core.pareto imported successfully')"
71
+ uv run python -c "import moosefs.feature_selection_pipeline; print('moosefs.feature_selection_pipeline imported successfully')"
@@ -0,0 +1,53 @@
1
+ # This workflow builds the distribution and uploads to PyPI.
2
+
3
+ name: Publish to PyPI
4
+
5
+ on:
6
+ release:
7
+ types: [published]
8
+
9
+ env:
10
+ PYTHON_VERSION: '3.11'
11
+
12
+ permissions:
13
+ contents: write
14
+
15
+ jobs:
16
+ build-and-publish:
17
+ runs-on: ubuntu-latest
18
+
19
+ steps:
20
+ - name: Checkout code
21
+ uses: actions/checkout@v4
22
+
23
+ - name: Set up uv and Python 🐍
24
+ uses: astral-sh/setup-uv@v3
25
+ with:
26
+ python-version: ${{ env.PYTHON_VERSION }}
27
+
28
+ - name: Create project venv
29
+ run: uv venv --python ${{ env.PYTHON_VERSION }}
30
+
31
+ - name: Install build tools
32
+ run: uv pip install -e . -e .[dev]
33
+
34
+ - name: Build binary wheel and source tarball 🔨
35
+ run: uv run python -m build
36
+
37
+ - name: Verify distribution
38
+ run: uv run twine check dist/*
39
+
40
+ - name: Show built artifacts
41
+ run: ls -l dist
42
+
43
+ - name: Upload built artifacts
44
+ uses: actions/upload-artifact@v4
45
+ with:
46
+ name: dist
47
+ path: dist/*
48
+ retention-days: 7
49
+
50
+ - name: Publish distribution 📦 to PyPI
51
+ uses: pypa/gh-action-pypi-publish@release/v1
52
+ with:
53
+ password: ${{ secrets.PYPI_API_TOKEN }}
@@ -0,0 +1,64 @@
1
+ name: tests
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ - '**'
8
+ pull_request:
9
+
10
+ concurrency:
11
+ group: tests-${{ github.workflow }}-${{ github.ref }}
12
+ cancel-in-progress: true
13
+
14
+ jobs:
15
+ test:
16
+ runs-on: ubuntu-latest
17
+ strategy:
18
+ fail-fast: false
19
+ matrix:
20
+ python-version: [ '3.9', '3.10', '3.11', '3.12' ]
21
+
22
+ steps:
23
+ - name: Checkout code
24
+ uses: actions/checkout@v4
25
+
26
+ - name: Set up uv and Python
27
+ uses: astral-sh/setup-uv@v3
28
+ with:
29
+ python-version: ${{ matrix.python-version }}
30
+
31
+ - name: Create project venv
32
+ run: uv venv --python ${{ matrix.python-version }}
33
+
34
+ - name: Install dev dependencies (editable)
35
+ run: uv pip install -e . -e .[dev]
36
+
37
+ - name: Verify imports
38
+ run: uv run python -c "import moosefs; print('import ok')"
39
+
40
+ - name: Ruff format check
41
+ run: uv run ruff format --check .
42
+
43
+ - name: Ruff lint
44
+ run: uv run ruff check --output-format=github .
45
+
46
+ - name: Run pytest
47
+ run: uv run pytest -q --maxfail=1 --disable-warnings
48
+
49
+ - name: Upload junit/coverage (if any)
50
+ if: always()
51
+ uses: actions/upload-artifact@v4
52
+ with:
53
+ name: test-artifacts-py${{ matrix.python-version }}
54
+ path: |
55
+ ./.pytest_cache/**
56
+ ./coverage.xml
57
+ if-no-files-found: ignore
58
+ retention-days: 7
59
+
60
+ - name: Build package
61
+ run: uv run python -m build
62
+
63
+ - name: Verify distribution
64
+ run: uv run twine check dist/*
@@ -0,0 +1,48 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ dist/
13
+ *.egg-info/
14
+ eggs/
15
+ lib/
16
+ lib64/
17
+ parts/
18
+ sdist/
19
+ var/
20
+ wheels/
21
+ pip-wheel-metadata/
22
+
23
+ # Virtual environments
24
+ .venv/
25
+ venv/
26
+ ENV/
27
+ env/
28
+
29
+ # Test, coverage, and cache
30
+ .pytest_cache/
31
+ .mypy_cache/
32
+ .ruff_cache/
33
+ .coverage
34
+ coverage.xml
35
+
36
+ # IDE/editor settings
37
+ .vscode/
38
+ .idea/
39
+ .history/
40
+
41
+ # OS files
42
+ .DS_Store
43
+
44
+ # Project-specific
45
+ results/
46
+ tests/test_experiment/
47
+ docs/build/
48
+ uv.lock
@@ -0,0 +1,6 @@
1
+ repos:
2
+ - repo: https://github.com/astral-sh/ruff-pre-commit
3
+ rev: v0.14.2
4
+ hooks:
5
+ - id: ruff
6
+ - id: ruff-format
moose_fs-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 CI4CB-lab
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,232 @@
1
+ Metadata-Version: 2.4
2
+ Name: moose-fs
3
+ Version: 0.1.0
4
+ Summary: MOOSE-FS: Multi-Objective Optimized Ensemble Feature Selection
5
+ Project-URL: Repository, https://github.com/CI4CB-lab/moosefs
6
+ Project-URL: Documentation, https://CI4CB-lab.github.io/moosefs/
7
+ Author-email: Arthur Babey <arthur.babey@heig-vd.ch>
8
+ License: MIT
9
+ License-File: LICENSE
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
14
+ Requires-Python: >=3.9
15
+ Requires-Dist: joblib
16
+ Requires-Dist: mrmr-selection
17
+ Requires-Dist: numpy
18
+ Requires-Dist: pandas
19
+ Requires-Dist: pyyaml
20
+ Requires-Dist: ranky
21
+ Requires-Dist: scikit-learn>=1.5.0
22
+ Requires-Dist: scipy>=1.11
23
+ Requires-Dist: xgboost
24
+ Provides-Extra: dev
25
+ Requires-Dist: build>=1; extra == 'dev'
26
+ Requires-Dist: coverage[toml]; extra == 'dev'
27
+ Requires-Dist: pre-commit; extra == 'dev'
28
+ Requires-Dist: pytest>=7; extra == 'dev'
29
+ Requires-Dist: ruff>=0.14.2; extra == 'dev'
30
+ Requires-Dist: sphinx-autodoc-typehints; extra == 'dev'
31
+ Requires-Dist: sphinx-rtd-theme; extra == 'dev'
32
+ Requires-Dist: sphinx>=7; extra == 'dev'
33
+ Requires-Dist: twine>=5; extra == 'dev'
34
+ Provides-Extra: docs
35
+ Requires-Dist: sphinx-autodoc-typehints; extra == 'docs'
36
+ Requires-Dist: sphinx-rtd-theme; extra == 'docs'
37
+ Requires-Dist: sphinx>=7; extra == 'docs'
38
+ Provides-Extra: test
39
+ Requires-Dist: coverage[toml]; extra == 'test'
40
+ Requires-Dist: pytest>=7; extra == 'test'
41
+ Description-Content-Type: text/markdown
42
+
43
+ # MOOSE-FS
44
+
45
+ [![tests](https://github.com/CI4CB-lab/moosefs/actions/workflows/tests.yml/badge.svg)](https://github.com/CI4CB-lab/moosefs/actions/workflows/tests.yml)
46
+ [Documentation](https://CI4CB-lab.github.io/moosefs/)
47
+
48
+ ## Overview
49
+
50
+ MOOSE-FS is a feature selection library that leverages an ensemble-based approach to optimize both predictive performance and stability. By combining multiple feature selection methods, merging strategies, and evaluation metrics, it provides a highly flexible and tunable pipeline for both classification and regression tasks. The package automates feature selection across multiple iterations and uses Pareto optimization to identify the best feature subsets.
51
+
52
+ Users can define their feature selection process by:
53
+ - Selecting feature selection methods from predefined options or implementing custom ones.
54
+ - Choosing merging strategies to aggregate feature rankings.
55
+ - Specifying performance metrics to evaluate selected features.
56
+ - Configuring the number of features to select and the number of repetitions.
57
+ - Working with either **classification** or **regression** problems.
58
+
59
+ The library allows defining feature selectors, merging strategies, and metrics either as **class instances** or as **string identifiers**, which act as placeholders for built-in methods. The framework is modular and can be easily extended by adding new selection algorithms or merging strategies.
60
+
61
+ ---
62
+
63
+ ## Requirements
64
+
65
+ - **Python** 3.9 or higher
66
+ - **Dependencies**: Automatically installed from `pyproject.toml`.
67
+
68
+ ---
69
+
70
+ ## Installation
71
+
72
+ ### From Source
73
+
74
+ To install the package from source, run:
75
+
76
+ ```bash
77
+ pip install git+https://github.com/CI4CB-lab/moosefs.git
78
+ ```
79
+
80
+ Alternatively, clone the repository and install locally:
81
+
82
+ ```bash
83
+ git clone https://github.com/CI4CB-lab/moosefs.git
84
+ cd moosefs
85
+ pip install .
86
+ ```
87
+
88
+ ---
89
+
90
+ ## Using the Library
91
+
92
+ ### 1. Feature Selection Pipeline
93
+
94
+ The core of MOOSE-FS is the `FeatureSelectionPipeline`, which provides a fully configurable workflow for feature selection. Users can specify:
95
+ - Feature selection methods
96
+ - Merging strategy
97
+ - Evaluation metrics
98
+ - Task type (classification or regression)
99
+ - Number of features to select
100
+ - Number of repetitions
101
+
102
+ #### Example Usage
103
+
104
+ ```python
105
+ # `data` can be a single DataFrame (last column = target)
106
+ # or you can pass `X` and `y` separately.
107
+ # Assume `data` is a pandas DataFrame whose last column "label" holds the targets.
108
+ from moosefs import FeatureSelectionPipeline
109
+
110
+ fs_methods = ["f_statistic_selector", "random_forest_selector", "svm_selector"]
111
+ merging_strategy = "union_of_intersections_merger"
112
+
113
+ pipeline = FeatureSelectionPipeline(
114
+ X=data.drop(columns=["label"]),
115
+ y=data["label"],
116
+ fs_methods=fs_methods,
117
+ merging_strategy=merging_strategy,
118
+ num_repeats=5,
119
+ task="classification",
120
+ num_features_to_select=10,
121
+ )
122
+ results = pipeline.run()
123
+ ```
124
+
125
+ This will run feature selection, merge results using the chosen strategy, and return the best-selected features.
126
+
127
+ ### 2. Extensibility
128
+
129
+ MOOSE-FS is designed to be easily extended. Users can implement custom:
130
+ - **Feature selection methods**: Define a new feature selector class and integrate it into the pipeline.
131
+ - **Merging strategies**: Implement a custom strategy to aggregate selected features.
132
+ - **Metrics**: Add new evaluation metrics tailored to specific tasks.
133
+
134
+ New methods can be used directly in the pipeline by passing the class or a corresponding identifier.
135
+
136
+ ---
137
+
138
+ ## Using the CLI
139
+
140
+ Once installed, the pipeline can also be run from the command line using:
141
+
142
+ ```bash
143
+ efs-pipeline
144
+ ```
145
+
146
+ This command executes `scripts/main.py` using parameters from `scripts/config.yaml`. Users can specify a different config file:
147
+
148
+ ```bash
149
+ efs-pipeline path/to/your_config.yaml
150
+ ```
151
+
152
+ ### Example `config.yaml`
153
+
154
+ ```yaml
155
+ experiment:
156
+ name: "example_experiment"
157
+ results_path: "results/"
158
+ data_path: "data/input_data.csv"
159
+
160
+ preprocessing:
161
+ normalize: true
162
+ handle_missing: true
163
+
164
+ pipeline:
165
+ fs_methods: ["f_statistic_selector", "random_forest_selector"]
166
+ merging_strategy: "union_of_intersections_merger"
167
+ num_repeats: 5
168
+ task: "classification"
169
+ num_features_to_select: 10
170
+ ```
171
+
172
+ ### Results
173
+
174
+ The results are saved in a structured directory under `results/example_experiment/`, including:
175
+ - A **text file** summarizing the pipeline run.
176
+ - A **CSV file** containing the final results.
177
+
178
+ ---
179
+
180
+ ## Code Structure
181
+
182
+ - **`core/`**: Core modules for data processing, metrics, and stability computation.
183
+ - **`feature_selection_pipeline.py`**: Defines the main feature selection workflow.
184
+ - **`feature_selectors/`**: Implements feature selection methods (e.g., F-statistic, mutual information, RandomForest, SVM).
185
+ - **`merging_strategies/`**: Implements merging strategies such as Borda count and union of intersections.
186
+
187
+ ---
188
+
189
+ ## Contributing
190
+
191
+ Contributions are welcome! If you have ideas for improving MOOSE-FS, feel free to open an issue or submit a pull request.
192
+
193
+ ### Development (uv)
194
+
195
+ This project uses uv for local environments and dependency management. The library builds via the existing PEP 517 backend (hatchling); uv only manages the environment, installs, and command execution.
196
+
197
+ - Install/select Python 3.9+ and ensure `uv` is installed.
198
+ - Create a local virtual environment in `.venv`:
199
+
200
+ ```bash
201
+ uv venv --python 3.9
202
+ ```
203
+
204
+ - Install dev dependencies (editable):
205
+
206
+ ```bash
207
+ uv pip install -e ".[dev]"
208
+ ```
209
+
210
+ - Install pre-commit hooks:
211
+
212
+ ```bash
213
+ uv run pre-commit install
214
+ ```
215
+
216
+ - Run formatting and linting:
217
+
218
+ ```bash
219
+ uv run ruff format .
220
+ uv run ruff check --fix .
221
+ ```
222
+
223
+ - Run tests:
224
+
225
+ ```bash
226
+ uv run pytest -q
227
+ ```
228
+ ---
229
+
230
+ ## License
231
+
232
+ This project is licensed under the MIT License.
@@ -0,0 +1,190 @@
1
+ # MOOSE-FS
2
+
3
+ [![tests](https://github.com/CI4CB-lab/moosefs/actions/workflows/tests.yml/badge.svg)](https://github.com/CI4CB-lab/moosefs/actions/workflows/tests.yml)
4
+ [Documentation](https://CI4CB-lab.github.io/moosefs/)
5
+
6
+ ## Overview
7
+
8
+ MOOSE-FS is a feature selection library that leverages an ensemble-based approach to optimize both predictive performance and stability. By combining multiple feature selection methods, merging strategies, and evaluation metrics, it provides a highly flexible and tunable pipeline for both classification and regression tasks. The package automates feature selection across multiple iterations and uses Pareto optimization to identify the best feature subsets.
9
+
10
+ Users can define their feature selection process by:
11
+ - Selecting feature selection methods from predefined options or implementing custom ones.
12
+ - Choosing merging strategies to aggregate feature rankings.
13
+ - Specifying performance metrics to evaluate selected features.
14
+ - Configuring the number of features to select and the number of repetitions.
15
+ - Working with either **classification** or **regression** problems.
16
+
17
+ The library allows defining feature selectors, merging strategies, and metrics either as **class instances** or as **string identifiers**, which act as placeholders for built-in methods. The framework is modular and can be easily extended by adding new selection algorithms or merging strategies.
18
+
19
+ ---
20
+
21
+ ## Requirements
22
+
23
+ - **Python** 3.9 or higher
24
+ - **Dependencies**: Automatically installed from `pyproject.toml`.
25
+
26
+ ---
27
+
28
+ ## Installation
29
+
30
+ ### From Source
31
+
32
+ To install the package from source, run:
33
+
34
+ ```bash
35
+ pip install git+https://github.com/CI4CB-lab/moosefs.git
36
+ ```
37
+
38
+ Alternatively, clone the repository and install locally:
39
+
40
+ ```bash
41
+ git clone https://github.com/CI4CB-lab/moosefs.git
42
+ cd moosefs
43
+ pip install .
44
+ ```
45
+
46
+ ---
47
+
48
+ ## Using the Library
49
+
50
+ ### 1. Feature Selection Pipeline
51
+
52
+ The core of MOOSE-FS is the `FeatureSelectionPipeline`, which provides a fully configurable workflow for feature selection. Users can specify:
53
+ - Feature selection methods
54
+ - Merging strategy
55
+ - Evaluation metrics
56
+ - Task type (classification or regression)
57
+ - Number of features to select
58
+ - Number of repetitions
59
+
60
+ #### Example Usage
61
+
62
+ ```python
63
+ # `data` can be a single DataFrame (last column = target)
64
+ # or you can pass `X` and `y` separately.
65
+ # Assume `data` is a pandas DataFrame whose last column "label" holds the targets.
66
+ from moosefs import FeatureSelectionPipeline
67
+
68
+ fs_methods = ["f_statistic_selector", "random_forest_selector", "svm_selector"]
69
+ merging_strategy = "union_of_intersections_merger"
70
+
71
+ pipeline = FeatureSelectionPipeline(
72
+ X=data.drop(columns=["label"]),
73
+ y=data["label"],
74
+ fs_methods=fs_methods,
75
+ merging_strategy=merging_strategy,
76
+ num_repeats=5,
77
+ task="classification",
78
+ num_features_to_select=10,
79
+ )
80
+ results = pipeline.run()
81
+ ```
82
+
83
+ This will run feature selection, merge results using the chosen strategy, and return the best-selected features.
84
+
85
+ ### 2. Extensibility
86
+
87
+ MOOSE-FS is designed to be easily extended. Users can implement custom:
88
+ - **Feature selection methods**: Define a new feature selector class and integrate it into the pipeline.
89
+ - **Merging strategies**: Implement a custom strategy to aggregate selected features.
90
+ - **Metrics**: Add new evaluation metrics tailored to specific tasks.
91
+
92
+ New methods can be used directly in the pipeline by passing the class or a corresponding identifier.
93
+
94
+ ---
95
+
96
+ ## Using the CLI
97
+
98
+ Once installed, the pipeline can also be run from the command line using:
99
+
100
+ ```bash
101
+ efs-pipeline
102
+ ```
103
+
104
+ This command executes `scripts/main.py` using parameters from `scripts/config.yaml`. Users can specify a different config file:
105
+
106
+ ```bash
107
+ efs-pipeline path/to/your_config.yaml
108
+ ```
109
+
110
+ ### Example `config.yaml`
111
+
112
+ ```yaml
113
+ experiment:
114
+ name: "example_experiment"
115
+ results_path: "results/"
116
+ data_path: "data/input_data.csv"
117
+
118
+ preprocessing:
119
+ normalize: true
120
+ handle_missing: true
121
+
122
+ pipeline:
123
+ fs_methods: ["f_statistic_selector", "random_forest_selector"]
124
+ merging_strategy: "union_of_intersections_merger"
125
+ num_repeats: 5
126
+ task: "classification"
127
+ num_features_to_select: 10
128
+ ```
129
+
130
+ ### Results
131
+
132
+ The results are saved in a structured directory under `results/example_experiment/`, including:
133
+ - A **text file** summarizing the pipeline run.
134
+ - A **CSV file** containing the final results.
135
+
136
+ ---
137
+
138
+ ## Code Structure
139
+
140
+ - **`core/`**: Core modules for data processing, metrics, and stability computation.
141
+ - **`feature_selection_pipeline.py`**: Defines the main feature selection workflow.
142
+ - **`feature_selectors/`**: Implements feature selection methods (e.g., F-statistic, mutual information, RandomForest, SVM).
143
+ - **`merging_strategies/`**: Implements merging strategies such as Borda count and union of intersections.
144
+
145
+ ---
146
+
147
+ ## Contributing
148
+
149
+ Contributions are welcome! If you have ideas for improving MOOSE-FS, feel free to open an issue or submit a pull request.
150
+
151
+ ### Development (uv)
152
+
153
+ This project uses uv for local environments and dependency management. The library builds via the existing PEP 517 backend (hatchling); uv only manages the environment, installs, and command execution.
154
+
155
+ - Install/select Python 3.9+ and ensure `uv` is installed.
156
+ - Create a local virtual environment in `.venv`:
157
+
158
+ ```bash
159
+ uv venv --python 3.9
160
+ ```
161
+
162
+ - Install dev dependencies (editable):
163
+
164
+ ```bash
165
+ uv pip install -e ".[dev]"
166
+ ```
167
+
168
+ - Install pre-commit hooks:
169
+
170
+ ```bash
171
+ uv run pre-commit install
172
+ ```
173
+
174
+ - Run formatting and linting:
175
+
176
+ ```bash
177
+ uv run ruff format .
178
+ uv run ruff check --fix .
179
+ ```
180
+
181
+ - Run tests:
182
+
183
+ ```bash
184
+ uv run pytest -q
185
+ ```
186
+ ---
187
+
188
+ ## License
189
+
190
+ This project is licensed under the MIT License.