moose-fs 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- moose_fs-0.1.0/.github/workflows/doc.yml +71 -0
- moose_fs-0.1.0/.github/workflows/publish.yml +53 -0
- moose_fs-0.1.0/.github/workflows/tests.yml +64 -0
- moose_fs-0.1.0/.gitignore +48 -0
- moose_fs-0.1.0/.pre-commit-config.yaml +6 -0
- moose_fs-0.1.0/LICENSE +21 -0
- moose_fs-0.1.0/PKG-INFO +232 -0
- moose_fs-0.1.0/README.md +190 -0
- moose_fs-0.1.0/docs/Makefile +20 -0
- moose_fs-0.1.0/docs/make.bat +36 -0
- moose_fs-0.1.0/docs/source/api_reference.rst +13 -0
- moose_fs-0.1.0/docs/source/conf.py +26 -0
- moose_fs-0.1.0/docs/source/core.rst +23 -0
- moose_fs-0.1.0/docs/source/feature_selection_pipeline.rst +8 -0
- moose_fs-0.1.0/docs/source/feature_selectors.rst +58 -0
- moose_fs-0.1.0/docs/source/index.rst +15 -0
- moose_fs-0.1.0/docs/source/installation.rst +27 -0
- moose_fs-0.1.0/docs/source/introduction.rst +22 -0
- moose_fs-0.1.0/docs/source/mergers.rst +33 -0
- moose_fs-0.1.0/docs/source/metrics.rst +13 -0
- moose_fs-0.1.0/docs/source/usage.rst +54 -0
- moose_fs-0.1.0/moosefs/__init__.py +6 -0
- moose_fs-0.1.0/moosefs/core/__init__.py +6 -0
- moose_fs-0.1.0/moosefs/core/data_processor.py +319 -0
- moose_fs-0.1.0/moosefs/core/feature.py +44 -0
- moose_fs-0.1.0/moosefs/core/novovicova.py +60 -0
- moose_fs-0.1.0/moosefs/core/pareto.py +90 -0
- moose_fs-0.1.0/moosefs/feature_selection_pipeline.py +548 -0
- moose_fs-0.1.0/moosefs/feature_selectors/__init__.py +26 -0
- moose_fs-0.1.0/moosefs/feature_selectors/base_selector.py +38 -0
- moose_fs-0.1.0/moosefs/feature_selectors/default_variance.py +21 -0
- moose_fs-0.1.0/moosefs/feature_selectors/elastic_net_selector.py +75 -0
- moose_fs-0.1.0/moosefs/feature_selectors/f_statistic_selector.py +42 -0
- moose_fs-0.1.0/moosefs/feature_selectors/lasso_selector.py +46 -0
- moose_fs-0.1.0/moosefs/feature_selectors/mrmr_selector.py +57 -0
- moose_fs-0.1.0/moosefs/feature_selectors/mutual_info_selector.py +45 -0
- moose_fs-0.1.0/moosefs/feature_selectors/random_forest_selector.py +48 -0
- moose_fs-0.1.0/moosefs/feature_selectors/svm_selector.py +50 -0
- moose_fs-0.1.0/moosefs/feature_selectors/variance_selectors.py +16 -0
- moose_fs-0.1.0/moosefs/feature_selectors/xgboost_selector.py +44 -0
- moose_fs-0.1.0/moosefs/merging_strategies/__init__.py +17 -0
- moose_fs-0.1.0/moosefs/merging_strategies/arithmetic_mean_merger.py +46 -0
- moose_fs-0.1.0/moosefs/merging_strategies/base_merger.py +64 -0
- moose_fs-0.1.0/moosefs/merging_strategies/borda_merger.py +46 -0
- moose_fs-0.1.0/moosefs/merging_strategies/consensus_merger.py +80 -0
- moose_fs-0.1.0/moosefs/merging_strategies/l2_norm_merger.py +42 -0
- moose_fs-0.1.0/moosefs/merging_strategies/union_of_intersections_merger.py +89 -0
- moose_fs-0.1.0/moosefs/metrics/__init__.py +23 -0
- moose_fs-0.1.0/moosefs/metrics/performance_metrics.py +239 -0
- moose_fs-0.1.0/moosefs/metrics/stability_metrics.py +49 -0
- moose_fs-0.1.0/moosefs/utils.py +161 -0
- moose_fs-0.1.0/pyproject.toml +114 -0
- moose_fs-0.1.0/scripts/config.yml +92 -0
- moose_fs-0.1.0/scripts/main.py +163 -0
- moose_fs-0.1.0/scripts/utils.py +186 -0
- moose_fs-0.1.0/tests/__init__.py +0 -0
- moose_fs-0.1.0/tests/config_test.yml +34 -0
- moose_fs-0.1.0/tests/test_base_merger_validation.py +16 -0
- moose_fs-0.1.0/tests/test_consensus_merger.py +33 -0
- moose_fs-0.1.0/tests/test_data_processor.py +68 -0
- moose_fs-0.1.0/tests/test_feature.py +32 -0
- moose_fs-0.1.0/tests/test_fs_methods.py +181 -0
- moose_fs-0.1.0/tests/test_main_script.py +75 -0
- moose_fs-0.1.0/tests/test_merging.py +346 -0
- moose_fs-0.1.0/tests/test_metrics.py +86 -0
- moose_fs-0.1.0/tests/test_novovicova.py +74 -0
- moose_fs-0.1.0/tests/test_pareto.py +107 -0
- moose_fs-0.1.0/tests/test_pipeline_internals.py +91 -0
- moose_fs-0.1.0/tests/test_pipeline_method.py +81 -0
- moose_fs-0.1.0/tests/test_pipeline_process.py +143 -0
- moose_fs-0.1.0/tests/test_reproducibility.py +68 -0
- moose_fs-0.1.0/tests/test_stability_metrics.py +28 -0
- moose_fs-0.1.0/tests/test_utils.py +34 -0
- moose_fs-0.1.0/tests/test_variance_default.py +20 -0
- moose_fs-0.1.0/tutorials/advanced_example.ipynb +256 -0
- moose_fs-0.1.0/tutorials/getting_started.ipynb +231 -0
- moose_fs-0.1.0/tutorials/simple_example.ipynb +213 -0
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
name: Documentation
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches:
|
|
6
|
+
- main
|
|
7
|
+
pull_request:
|
|
8
|
+
branches:
|
|
9
|
+
- main
|
|
10
|
+
|
|
11
|
+
concurrency:
|
|
12
|
+
group: docs-${{ github.workflow }}-${{ github.ref }}
|
|
13
|
+
cancel-in-progress: true
|
|
14
|
+
|
|
15
|
+
permissions:
|
|
16
|
+
contents: write
|
|
17
|
+
pages: write
|
|
18
|
+
id-token: write
|
|
19
|
+
|
|
20
|
+
jobs:
|
|
21
|
+
build_and_deploy:
|
|
22
|
+
name: Build Docs
|
|
23
|
+
runs-on: ubuntu-latest
|
|
24
|
+
|
|
25
|
+
steps:
|
|
26
|
+
- name: Checkout code
|
|
27
|
+
uses: actions/checkout@v4
|
|
28
|
+
|
|
29
|
+
- name: Set up uv and Python
|
|
30
|
+
uses: astral-sh/setup-uv@v3
|
|
31
|
+
with:
|
|
32
|
+
python-version: '3.11'
|
|
33
|
+
|
|
34
|
+
- name: Create project venv
|
|
35
|
+
run: uv venv --python 3.11
|
|
36
|
+
|
|
37
|
+
- name: Install docs dependencies
|
|
38
|
+
run: |
|
|
39
|
+
uv pip install -e . -e .[docs]
|
|
40
|
+
uv pip install build
|
|
41
|
+
|
|
42
|
+
- name: Ensure root directory is included in PYTHONPATH
|
|
43
|
+
run: |
|
|
44
|
+
export PYTHONPATH=$PYTHONPATH:$(pwd)
|
|
45
|
+
echo $PYTHONPATH
|
|
46
|
+
|
|
47
|
+
- name: Build package
|
|
48
|
+
run: uv run python -m build
|
|
49
|
+
|
|
50
|
+
- name: Clean build directory
|
|
51
|
+
run: |
|
|
52
|
+
rm -rf docs/build/* || true
|
|
53
|
+
|
|
54
|
+
- name: Build docs (sphinx)
|
|
55
|
+
if: ${{ hashFiles('docs/source/conf.py') != '' }}
|
|
56
|
+
run: |
|
|
57
|
+
export PYTHONPATH=$(pwd)
|
|
58
|
+
echo "Building docs with PYTHONPATH: $PYTHONPATH"
|
|
59
|
+
uv run sphinx-build -b html docs/source docs/build/html -v
|
|
60
|
+
|
|
61
|
+
- name: Deploy to GitHub Pages
|
|
62
|
+
if: ${{ hashFiles('docs/source/conf.py') != '' && github.event_name == 'push' }}
|
|
63
|
+
uses: peaceiris/actions-gh-pages@v3
|
|
64
|
+
with:
|
|
65
|
+
github_token: ${{ secrets.GITHUB_TOKEN }}
|
|
66
|
+
publish_dir: ./docs/build/html
|
|
67
|
+
|
|
68
|
+
- name: Verify Imports
|
|
69
|
+
run: |
|
|
70
|
+
uv run python -c "import moosefs.core.pareto; print('moosefs.core.pareto imported successfully')"
|
|
71
|
+
uv run python -c "import moosefs.feature_selection_pipeline; print('moosefs.feature_selection_pipeline imported successfully')"
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# This workflow builds the distribution and uploads to PyPI.
|
|
2
|
+
|
|
3
|
+
name: Publish to PyPI
|
|
4
|
+
|
|
5
|
+
on:
|
|
6
|
+
release:
|
|
7
|
+
types: [published]
|
|
8
|
+
|
|
9
|
+
env:
|
|
10
|
+
PYTHON_VERSION: '3.11'
|
|
11
|
+
|
|
12
|
+
permissions:
|
|
13
|
+
contents: write
|
|
14
|
+
|
|
15
|
+
jobs:
|
|
16
|
+
build-and-publish:
|
|
17
|
+
runs-on: ubuntu-latest
|
|
18
|
+
|
|
19
|
+
steps:
|
|
20
|
+
- name: Checkout code
|
|
21
|
+
uses: actions/checkout@v4
|
|
22
|
+
|
|
23
|
+
- name: Set up uv and Python 🐍
|
|
24
|
+
uses: astral-sh/setup-uv@v3
|
|
25
|
+
with:
|
|
26
|
+
python-version: ${{ env.PYTHON_VERSION }}
|
|
27
|
+
|
|
28
|
+
- name: Create project venv
|
|
29
|
+
run: uv venv --python ${{ env.PYTHON_VERSION }}
|
|
30
|
+
|
|
31
|
+
- name: Install build tools
|
|
32
|
+
run: uv pip install -e . -e .[dev]
|
|
33
|
+
|
|
34
|
+
- name: Build binary wheel and source tarball 🔨
|
|
35
|
+
run: uv run python -m build
|
|
36
|
+
|
|
37
|
+
- name: Verify distribution
|
|
38
|
+
run: uv run twine check dist/*
|
|
39
|
+
|
|
40
|
+
- name: Show built artifacts
|
|
41
|
+
run: ls -l dist
|
|
42
|
+
|
|
43
|
+
- name: Upload built artifacts
|
|
44
|
+
uses: actions/upload-artifact@v4
|
|
45
|
+
with:
|
|
46
|
+
name: dist
|
|
47
|
+
path: dist/*
|
|
48
|
+
retention-days: 7
|
|
49
|
+
|
|
50
|
+
- name: Publish distribution 📦 to PyPI
|
|
51
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
52
|
+
with:
|
|
53
|
+
password: ${{ secrets.PYPI_API_TOKEN }}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
name: tests
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches:
|
|
6
|
+
- main
|
|
7
|
+
- '**'
|
|
8
|
+
pull_request:
|
|
9
|
+
|
|
10
|
+
concurrency:
|
|
11
|
+
group: tests-${{ github.workflow }}-${{ github.ref }}
|
|
12
|
+
cancel-in-progress: true
|
|
13
|
+
|
|
14
|
+
jobs:
|
|
15
|
+
test:
|
|
16
|
+
runs-on: ubuntu-latest
|
|
17
|
+
strategy:
|
|
18
|
+
fail-fast: false
|
|
19
|
+
matrix:
|
|
20
|
+
python-version: [ '3.9', '3.10', '3.11', '3.12' ]
|
|
21
|
+
|
|
22
|
+
steps:
|
|
23
|
+
- name: Checkout code
|
|
24
|
+
uses: actions/checkout@v4
|
|
25
|
+
|
|
26
|
+
- name: Set up uv and Python
|
|
27
|
+
uses: astral-sh/setup-uv@v3
|
|
28
|
+
with:
|
|
29
|
+
python-version: ${{ matrix.python-version }}
|
|
30
|
+
|
|
31
|
+
- name: Create project venv
|
|
32
|
+
run: uv venv --python ${{ matrix.python-version }}
|
|
33
|
+
|
|
34
|
+
- name: Install dev dependencies (editable)
|
|
35
|
+
run: uv pip install -e . -e .[dev]
|
|
36
|
+
|
|
37
|
+
- name: Verify imports
|
|
38
|
+
run: uv run python -c "import moosefs; print('import ok')"
|
|
39
|
+
|
|
40
|
+
- name: Ruff format check
|
|
41
|
+
run: uv run ruff format --check .
|
|
42
|
+
|
|
43
|
+
- name: Ruff lint
|
|
44
|
+
run: uv run ruff check --output-format=github .
|
|
45
|
+
|
|
46
|
+
- name: Run pytest
|
|
47
|
+
run: uv run pytest -q --maxfail=1 --disable-warnings
|
|
48
|
+
|
|
49
|
+
- name: Upload junit/coverage (if any)
|
|
50
|
+
if: always()
|
|
51
|
+
uses: actions/upload-artifact@v4
|
|
52
|
+
with:
|
|
53
|
+
name: test-artifacts-py${{ matrix.python-version }}
|
|
54
|
+
path: |
|
|
55
|
+
./.pytest_cache/**
|
|
56
|
+
./coverage.xml
|
|
57
|
+
if-no-files-found: ignore
|
|
58
|
+
retention-days: 7
|
|
59
|
+
|
|
60
|
+
- name: Build package
|
|
61
|
+
run: uv run python -m build
|
|
62
|
+
|
|
63
|
+
- name: Verify distribution
|
|
64
|
+
run: uv run twine check dist/*
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
dist/
|
|
13
|
+
*.egg-info/
|
|
14
|
+
eggs/
|
|
15
|
+
lib/
|
|
16
|
+
lib64/
|
|
17
|
+
parts/
|
|
18
|
+
sdist/
|
|
19
|
+
var/
|
|
20
|
+
wheels/
|
|
21
|
+
pip-wheel-metadata/
|
|
22
|
+
|
|
23
|
+
# Virtual environments
|
|
24
|
+
.venv/
|
|
25
|
+
venv/
|
|
26
|
+
ENV/
|
|
27
|
+
env/
|
|
28
|
+
|
|
29
|
+
# Test, coverage, and cache
|
|
30
|
+
.pytest_cache/
|
|
31
|
+
.mypy_cache/
|
|
32
|
+
.ruff_cache/
|
|
33
|
+
.coverage
|
|
34
|
+
coverage.xml
|
|
35
|
+
|
|
36
|
+
# IDE/editor settings
|
|
37
|
+
.vscode/
|
|
38
|
+
.idea/
|
|
39
|
+
.history/
|
|
40
|
+
|
|
41
|
+
# OS files
|
|
42
|
+
.DS_Store
|
|
43
|
+
|
|
44
|
+
# Project-specific
|
|
45
|
+
results/
|
|
46
|
+
tests/test_experiment/
|
|
47
|
+
docs/build/
|
|
48
|
+
uv.lock
|
moose_fs-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 CI4CB-lab
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
moose_fs-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: moose-fs
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: MOOSE-FS: Multi-Objective Optimized Ensemble Feature Selection
|
|
5
|
+
Project-URL: Repository, https://github.com/CI4CB-lab/moosefs
|
|
6
|
+
Project-URL: Documentation, https://CI4CB-lab.github.io/moosefs/
|
|
7
|
+
Author-email: Arthur Babey <arthur.babey@heig-vd.ch>
|
|
8
|
+
License: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
14
|
+
Requires-Python: >=3.9
|
|
15
|
+
Requires-Dist: joblib
|
|
16
|
+
Requires-Dist: mrmr-selection
|
|
17
|
+
Requires-Dist: numpy
|
|
18
|
+
Requires-Dist: pandas
|
|
19
|
+
Requires-Dist: pyyaml
|
|
20
|
+
Requires-Dist: ranky
|
|
21
|
+
Requires-Dist: scikit-learn>=1.5.0
|
|
22
|
+
Requires-Dist: scipy>=1.11
|
|
23
|
+
Requires-Dist: xgboost
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: build>=1; extra == 'dev'
|
|
26
|
+
Requires-Dist: coverage[toml]; extra == 'dev'
|
|
27
|
+
Requires-Dist: pre-commit; extra == 'dev'
|
|
28
|
+
Requires-Dist: pytest>=7; extra == 'dev'
|
|
29
|
+
Requires-Dist: ruff>=0.14.2; extra == 'dev'
|
|
30
|
+
Requires-Dist: sphinx-autodoc-typehints; extra == 'dev'
|
|
31
|
+
Requires-Dist: sphinx-rtd-theme; extra == 'dev'
|
|
32
|
+
Requires-Dist: sphinx>=7; extra == 'dev'
|
|
33
|
+
Requires-Dist: twine>=5; extra == 'dev'
|
|
34
|
+
Provides-Extra: docs
|
|
35
|
+
Requires-Dist: sphinx-autodoc-typehints; extra == 'docs'
|
|
36
|
+
Requires-Dist: sphinx-rtd-theme; extra == 'docs'
|
|
37
|
+
Requires-Dist: sphinx>=7; extra == 'docs'
|
|
38
|
+
Provides-Extra: test
|
|
39
|
+
Requires-Dist: coverage[toml]; extra == 'test'
|
|
40
|
+
Requires-Dist: pytest>=7; extra == 'test'
|
|
41
|
+
Description-Content-Type: text/markdown
|
|
42
|
+
|
|
43
|
+
# MOOSE-FS
|
|
44
|
+
|
|
45
|
+
[](https://github.com/CI4CB-lab/moosefs/actions/workflows/tests.yml)
|
|
46
|
+
[Documentation](https://CI4CB-lab.github.io/moosefs/)
|
|
47
|
+
|
|
48
|
+
## Overview
|
|
49
|
+
|
|
50
|
+
MOOSE-FS is a feature selection library that leverages an ensemble-based approach to optimize both predictive performance and stability. By combining multiple feature selection methods, merging strategies, and evaluation metrics, it provides a highly flexible and tunable pipeline for both classification and regression tasks. The package automates feature selection across multiple iterations and uses Pareto optimization to identify the best feature subsets.
|
|
51
|
+
|
|
52
|
+
Users can define their feature selection process by:
|
|
53
|
+
- Selecting feature selection methods from predefined options or implementing custom ones.
|
|
54
|
+
- Choosing merging strategies to aggregate feature rankings.
|
|
55
|
+
- Specifying performance metrics to evaluate selected features.
|
|
56
|
+
- Configuring the number of features to select and the number of repetitions.
|
|
57
|
+
- Working with either **classification** or **regression** problems.
|
|
58
|
+
|
|
59
|
+
The library allows defining feature selectors, merging strategies, and metrics either as **class instances** or as **string identifiers**, which act as placeholders for built-in methods. The framework is modular and can be easily extended by adding new selection algorithms or merging strategies.
|
|
60
|
+
|
|
61
|
+
---
|
|
62
|
+
|
|
63
|
+
## Requirements
|
|
64
|
+
|
|
65
|
+
- **Python** 3.9 or higher
|
|
66
|
+
- **Dependencies**: Automatically installed from `pyproject.toml`.
|
|
67
|
+
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
## Installation
|
|
71
|
+
|
|
72
|
+
### From Source
|
|
73
|
+
|
|
74
|
+
To install the package from source, run:
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
pip install git+https://github.com/CI4CB-lab/moosefs.git
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Alternatively, clone the repository and install locally:
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
git clone https://github.com/CI4CB-lab/moosefs.git
|
|
84
|
+
cd moosefs
|
|
85
|
+
pip install .
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
---
|
|
89
|
+
|
|
90
|
+
## Using the Library
|
|
91
|
+
|
|
92
|
+
### 1. Feature Selection Pipeline
|
|
93
|
+
|
|
94
|
+
The core of MOOSE-FS is the `FeatureSelectionPipeline`, which provides a fully configurable workflow for feature selection. Users can specify:
|
|
95
|
+
- Feature selection methods
|
|
96
|
+
- Merging strategy
|
|
97
|
+
- Evaluation metrics
|
|
98
|
+
- Task type (classification or regression)
|
|
99
|
+
- Number of features to select
|
|
100
|
+
- Number of repetitions
|
|
101
|
+
|
|
102
|
+
#### Example Usage
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
# `data` can be a single DataFrame (last column = target)
|
|
106
|
+
# or you can pass `X` and `y` separately.
|
|
107
|
+
# Assume `data` is a pandas DataFrame whose last column "label" holds the targets.
|
|
108
|
+
from moosefs import FeatureSelectionPipeline
|
|
109
|
+
|
|
110
|
+
fs_methods = ["f_statistic_selector", "random_forest_selector", "svm_selector"]
|
|
111
|
+
merging_strategy = "union_of_intersections_merger"
|
|
112
|
+
|
|
113
|
+
pipeline = FeatureSelectionPipeline(
|
|
114
|
+
X=data.drop(columns=["label"]),
|
|
115
|
+
y=data["label"],
|
|
116
|
+
fs_methods=fs_methods,
|
|
117
|
+
merging_strategy=merging_strategy,
|
|
118
|
+
num_repeats=5,
|
|
119
|
+
task="classification",
|
|
120
|
+
num_features_to_select=10,
|
|
121
|
+
)
|
|
122
|
+
results = pipeline.run()
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
This will run feature selection, merge results using the chosen strategy, and return the best-selected features.
|
|
126
|
+
|
|
127
|
+
### 2. Extensibility
|
|
128
|
+
|
|
129
|
+
MOOSE-FS is designed to be easily extended. Users can implement custom:
|
|
130
|
+
- **Feature selection methods**: Define a new feature selector class and integrate it into the pipeline.
|
|
131
|
+
- **Merging strategies**: Implement a custom strategy to aggregate selected features.
|
|
132
|
+
- **Metrics**: Add new evaluation metrics tailored to specific tasks.
|
|
133
|
+
|
|
134
|
+
New methods can be used directly in the pipeline by passing the class or a corresponding identifier.
|
|
135
|
+
|
|
136
|
+
---
|
|
137
|
+
|
|
138
|
+
## Using the CLI
|
|
139
|
+
|
|
140
|
+
Once installed, the pipeline can also be run from the command line using:
|
|
141
|
+
|
|
142
|
+
```bash
|
|
143
|
+
efs-pipeline
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
This command executes `scripts/main.py` using parameters from `scripts/config.yaml`. Users can specify a different config file:
|
|
147
|
+
|
|
148
|
+
```bash
|
|
149
|
+
efs-pipeline path/to/your_config.yaml
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
### Example `config.yaml`
|
|
153
|
+
|
|
154
|
+
```yaml
|
|
155
|
+
experiment:
|
|
156
|
+
name: "example_experiment"
|
|
157
|
+
results_path: "results/"
|
|
158
|
+
data_path: "data/input_data.csv"
|
|
159
|
+
|
|
160
|
+
preprocessing:
|
|
161
|
+
normalize: true
|
|
162
|
+
handle_missing: true
|
|
163
|
+
|
|
164
|
+
pipeline:
|
|
165
|
+
fs_methods: ["f_statistic_selector", "random_forest_selector"]
|
|
166
|
+
merging_strategy: "union_of_intersections_merger"
|
|
167
|
+
num_repeats: 5
|
|
168
|
+
task: "classification"
|
|
169
|
+
num_features_to_select: 10
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
### Results
|
|
173
|
+
|
|
174
|
+
The results are saved in a structured directory under `results/example_experiment/`, including:
|
|
175
|
+
- A **text file** summarizing the pipeline run.
|
|
176
|
+
- A **CSV file** containing the final results.
|
|
177
|
+
|
|
178
|
+
---
|
|
179
|
+
|
|
180
|
+
## Code Structure
|
|
181
|
+
|
|
182
|
+
- **`core/`**: Core modules for data processing, metrics, and stability computation.
|
|
183
|
+
- **`feature_selection_pipeline.py`**: Defines the main feature selection workflow.
|
|
184
|
+
- **`feature_selectors/`**: Implements feature selection methods (e.g., F-statistic, mutual information, RandomForest, SVM).
|
|
185
|
+
- **`merging_strategies/`**: Implements merging strategies such as Borda count and union of intersections.
|
|
186
|
+
|
|
187
|
+
---
|
|
188
|
+
|
|
189
|
+
## Contributing
|
|
190
|
+
|
|
191
|
+
Contributions are welcome! If you have ideas for improving MOOSE-FS, feel free to open an issue or submit a pull request.
|
|
192
|
+
|
|
193
|
+
### Development (uv)
|
|
194
|
+
|
|
195
|
+
This project uses uv for local environments and dependency management. The library builds via the existing PEP 517 backend (hatchling); uv only manages the environment, installs, and command execution.
|
|
196
|
+
|
|
197
|
+
- Install/select Python 3.9+ and ensure `uv` is installed.
|
|
198
|
+
- Create a local virtual environment in `.venv`:
|
|
199
|
+
|
|
200
|
+
```bash
|
|
201
|
+
uv venv --python 3.9
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
- Install dev dependencies (editable):
|
|
205
|
+
|
|
206
|
+
```bash
|
|
207
|
+
uv pip install -e ".[dev]"
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
- Install pre-commit hooks:
|
|
211
|
+
|
|
212
|
+
```bash
|
|
213
|
+
uv run pre-commit install
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
- Run formatting and linting:
|
|
217
|
+
|
|
218
|
+
```bash
|
|
219
|
+
uv run ruff format .
|
|
220
|
+
uv run ruff check --fix .
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
- Run tests:
|
|
224
|
+
|
|
225
|
+
```bash
|
|
226
|
+
uv run pytest -q
|
|
227
|
+
```
|
|
228
|
+
---
|
|
229
|
+
|
|
230
|
+
## License
|
|
231
|
+
|
|
232
|
+
This project is licensed under the MIT License.
|
moose_fs-0.1.0/README.md
ADDED
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
# MOOSE-FS
|
|
2
|
+
|
|
3
|
+
[](https://github.com/CI4CB-lab/moosefs/actions/workflows/tests.yml)
|
|
4
|
+
[Documentation](https://CI4CB-lab.github.io/moosefs/)
|
|
5
|
+
|
|
6
|
+
## Overview
|
|
7
|
+
|
|
8
|
+
MOOSE-FS is a feature selection library that leverages an ensemble-based approach to optimize both predictive performance and stability. By combining multiple feature selection methods, merging strategies, and evaluation metrics, it provides a highly flexible and tunable pipeline for both classification and regression tasks. The package automates feature selection across multiple iterations and uses Pareto optimization to identify the best feature subsets.
|
|
9
|
+
|
|
10
|
+
Users can define their feature selection process by:
|
|
11
|
+
- Selecting feature selection methods from predefined options or implementing custom ones.
|
|
12
|
+
- Choosing merging strategies to aggregate feature rankings.
|
|
13
|
+
- Specifying performance metrics to evaluate selected features.
|
|
14
|
+
- Configuring the number of features to select and the number of repetitions.
|
|
15
|
+
- Working with either **classification** or **regression** problems.
|
|
16
|
+
|
|
17
|
+
The library allows defining feature selectors, merging strategies, and metrics either as **class instances** or as **string identifiers**, which act as placeholders for built-in methods. The framework is modular and can be easily extended by adding new selection algorithms or merging strategies.
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## Requirements
|
|
22
|
+
|
|
23
|
+
- **Python** 3.9 or higher
|
|
24
|
+
- **Dependencies**: Automatically installed from `pyproject.toml`.
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## Installation
|
|
29
|
+
|
|
30
|
+
### From Source
|
|
31
|
+
|
|
32
|
+
To install the package from source, run:
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install git+https://github.com/CI4CB-lab/moosefs.git
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Alternatively, clone the repository and install locally:
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
git clone https://github.com/CI4CB-lab/moosefs.git
|
|
42
|
+
cd moosefs
|
|
43
|
+
pip install .
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## Using the Library
|
|
49
|
+
|
|
50
|
+
### 1. Feature Selection Pipeline
|
|
51
|
+
|
|
52
|
+
The core of MOOSE-FS is the `FeatureSelectionPipeline`, which provides a fully configurable workflow for feature selection. Users can specify:
|
|
53
|
+
- Feature selection methods
|
|
54
|
+
- Merging strategy
|
|
55
|
+
- Evaluation metrics
|
|
56
|
+
- Task type (classification or regression)
|
|
57
|
+
- Number of features to select
|
|
58
|
+
- Number of repetitions
|
|
59
|
+
|
|
60
|
+
#### Example Usage
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
# `data` can be a single DataFrame (last column = target)
|
|
64
|
+
# or you can pass `X` and `y` separately.
|
|
65
|
+
# Assume `data` is a pandas DataFrame whose last column "label" holds the targets.
|
|
66
|
+
from moosefs import FeatureSelectionPipeline
|
|
67
|
+
|
|
68
|
+
fs_methods = ["f_statistic_selector", "random_forest_selector", "svm_selector"]
|
|
69
|
+
merging_strategy = "union_of_intersections_merger"
|
|
70
|
+
|
|
71
|
+
pipeline = FeatureSelectionPipeline(
|
|
72
|
+
X=data.drop(columns=["label"]),
|
|
73
|
+
y=data["label"],
|
|
74
|
+
fs_methods=fs_methods,
|
|
75
|
+
merging_strategy=merging_strategy,
|
|
76
|
+
num_repeats=5,
|
|
77
|
+
task="classification",
|
|
78
|
+
num_features_to_select=10,
|
|
79
|
+
)
|
|
80
|
+
results = pipeline.run()
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
This will run feature selection, merge results using the chosen strategy, and return the best-selected features.
|
|
84
|
+
|
|
85
|
+
### 2. Extensibility
|
|
86
|
+
|
|
87
|
+
MOOSE-FS is designed to be easily extended. Users can implement custom:
|
|
88
|
+
- **Feature selection methods**: Define a new feature selector class and integrate it into the pipeline.
|
|
89
|
+
- **Merging strategies**: Implement a custom strategy to aggregate selected features.
|
|
90
|
+
- **Metrics**: Add new evaluation metrics tailored to specific tasks.
|
|
91
|
+
|
|
92
|
+
New methods can be used directly in the pipeline by passing the class or a corresponding identifier.
|
|
93
|
+
|
|
94
|
+
---
|
|
95
|
+
|
|
96
|
+
## Using the CLI
|
|
97
|
+
|
|
98
|
+
Once installed, the pipeline can also be run from the command line using:
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
efs-pipeline
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
This command executes `scripts/main.py` using parameters from `scripts/config.yaml`. Users can specify a different config file:
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
efs-pipeline path/to/your_config.yaml
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### Example `config.yaml`
|
|
111
|
+
|
|
112
|
+
```yaml
|
|
113
|
+
experiment:
|
|
114
|
+
name: "example_experiment"
|
|
115
|
+
results_path: "results/"
|
|
116
|
+
data_path: "data/input_data.csv"
|
|
117
|
+
|
|
118
|
+
preprocessing:
|
|
119
|
+
normalize: true
|
|
120
|
+
handle_missing: true
|
|
121
|
+
|
|
122
|
+
pipeline:
|
|
123
|
+
fs_methods: ["f_statistic_selector", "random_forest_selector"]
|
|
124
|
+
merging_strategy: "union_of_intersections_merger"
|
|
125
|
+
num_repeats: 5
|
|
126
|
+
task: "classification"
|
|
127
|
+
num_features_to_select: 10
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
### Results
|
|
131
|
+
|
|
132
|
+
The results are saved in a structured directory under `results/example_experiment/`, including:
|
|
133
|
+
- A **text file** summarizing the pipeline run.
|
|
134
|
+
- A **CSV file** containing the final results.
|
|
135
|
+
|
|
136
|
+
---
|
|
137
|
+
|
|
138
|
+
## Code Structure
|
|
139
|
+
|
|
140
|
+
- **`core/`**: Core modules for data processing, metrics, and stability computation.
|
|
141
|
+
- **`feature_selection_pipeline.py`**: Defines the main feature selection workflow.
|
|
142
|
+
- **`feature_selectors/`**: Implements feature selection methods (e.g., F-statistic, mutual information, RandomForest, SVM).
|
|
143
|
+
- **`merging_strategies/`**: Implements merging strategies such as Borda count and union of intersections.
|
|
144
|
+
|
|
145
|
+
---
|
|
146
|
+
|
|
147
|
+
## Contributing
|
|
148
|
+
|
|
149
|
+
Contributions are welcome! If you have ideas for improving MOOSE-FS, feel free to open an issue or submit a pull request.
|
|
150
|
+
|
|
151
|
+
### Development (uv)
|
|
152
|
+
|
|
153
|
+
This project uses uv for local environments and dependency management. The library builds via the existing PEP 517 backend (hatchling); uv only manages the environment, installs, and command execution.
|
|
154
|
+
|
|
155
|
+
- Install/select Python 3.9+ and ensure `uv` is installed.
|
|
156
|
+
- Create a local virtual environment in `.venv`:
|
|
157
|
+
|
|
158
|
+
```bash
|
|
159
|
+
uv venv --python 3.9
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
- Install dev dependencies (editable):
|
|
163
|
+
|
|
164
|
+
```bash
|
|
165
|
+
uv pip install -e ".[dev]"
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
- Install pre-commit hooks:
|
|
169
|
+
|
|
170
|
+
```bash
|
|
171
|
+
uv run pre-commit install
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
- Run formatting and linting:
|
|
175
|
+
|
|
176
|
+
```bash
|
|
177
|
+
uv run ruff format .
|
|
178
|
+
uv run ruff check --fix .
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
- Run tests:
|
|
182
|
+
|
|
183
|
+
```bash
|
|
184
|
+
uv run pytest -q
|
|
185
|
+
```
|
|
186
|
+
---
|
|
187
|
+
|
|
188
|
+
## License
|
|
189
|
+
|
|
190
|
+
This project is licensed under the MIT License.
|