fcvopt 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fcvopt-0.4.0/.claude/settings.local.json +20 -0
- fcvopt-0.4.0/.dockerignore +6 -0
- fcvopt-0.4.0/.github/workflows/docs.yml +77 -0
- fcvopt-0.4.0/.gitignore +58 -0
- fcvopt-0.4.0/CLAUDE.md +186 -0
- fcvopt-0.4.0/Dockerfile +24 -0
- fcvopt-0.4.0/LICENSE +21 -0
- fcvopt-0.4.0/PKG-INFO +150 -0
- fcvopt-0.4.0/README.md +91 -0
- fcvopt-0.4.0/docs/Makefile +20 -0
- fcvopt-0.4.0/docs/make.bat +35 -0
- fcvopt-0.4.0/docs/requirements.txt +4 -0
- fcvopt-0.4.0/docs/source/conf.py +93 -0
- fcvopt-0.4.0/docs/source/configspace.rst +30 -0
- fcvopt-0.4.0/docs/source/crossvalidation.rst +79 -0
- fcvopt-0.4.0/docs/source/examples/01_Introduction_to_FCVOpt.ipynb +361 -0
- fcvopt-0.4.0/docs/source/examples/02_Tuning_Lightgbm_Sklearn_API.ipynb +324 -0
- fcvopt-0.4.0/docs/source/examples/03_Extending_CVobjective.ipynb +401 -0
- fcvopt-0.4.0/docs/source/examples/04_Standard_BO.ipynb +281 -0
- fcvopt-0.4.0/docs/source/examples/04_Tuning_TabularResNet.ipynb +386 -0
- fcvopt-0.4.0/docs/source/examples/index.rst +13 -0
- fcvopt-0.4.0/docs/source/examples.rst +344 -0
- fcvopt-0.4.0/docs/source/index.rst +117 -0
- fcvopt-0.4.0/docs/source/mlflow_integration.rst +147 -0
- fcvopt-0.4.0/docs/source/models.rst +96 -0
- fcvopt-0.4.0/docs/source/optimizers.rst +36 -0
- fcvopt-0.4.0/docs/source/technical_details.rst +139 -0
- fcvopt-0.4.0/examples/01_Introduction_to_FCVOpt.ipynb +361 -0
- fcvopt-0.4.0/examples/02_Tuning_Lightgbm_Sklearn_API.ipynb +324 -0
- fcvopt-0.4.0/examples/03_Extending_CVobjective.ipynb +401 -0
- fcvopt-0.4.0/examples/04_Standard_BO.ipynb +281 -0
- fcvopt-0.4.0/examples/04_Tuning_TabularResNet.ipynb +386 -0
- fcvopt-0.4.0/experiments/README.md +22 -0
- fcvopt-0.4.0/experiments/boosted_reg/README.md +38 -0
- fcvopt-0.4.0/experiments/boosted_reg/run_xgb_fcvopt.py +100 -0
- fcvopt-0.4.0/experiments/figures_auto/fig-rf-madelon.png +0 -0
- fcvopt-0.4.0/experiments/gen_torchscript_models.py +51 -0
- fcvopt-0.4.0/experiments/generate_figures.py +297 -0
- fcvopt-0.4.0/experiments/mlp/README.md +71 -0
- fcvopt-0.4.0/experiments/mlp/run_fcvopt.py +137 -0
- fcvopt-0.4.0/experiments/mlp/run_optuna.py +117 -0
- fcvopt-0.4.0/experiments/mlp/run_smac.py +124 -0
- fcvopt-0.4.0/experiments/mlp/true_eval.py +229 -0
- fcvopt-0.4.0/experiments/reproduce_rf.sh +55 -0
- fcvopt-0.4.0/experiments/rf_high_dim/README.md +71 -0
- fcvopt-0.4.0/experiments/rf_high_dim/config.py +21 -0
- fcvopt-0.4.0/experiments/rf_high_dim/run_fcvopt.py +124 -0
- fcvopt-0.4.0/experiments/rf_high_dim/run_optuna.py +117 -0
- fcvopt-0.4.0/experiments/rf_high_dim/run_smac.py +117 -0
- fcvopt-0.4.0/experiments/rf_high_dim/true_eval.py +188 -0
- fcvopt-0.4.0/experiments/tab_resnet/run_fcvopt.py +131 -0
- fcvopt-0.4.0/experiments/tab_resnet/run_optuna.py +120 -0
- fcvopt-0.4.0/experiments/tab_resnet/run_smac.py +118 -0
- fcvopt-0.4.0/experiments/tab_resnet/true_eval.py +234 -0
- fcvopt-0.4.0/experiments/xgb_class/README.md +59 -0
- fcvopt-0.4.0/experiments/xgb_class/run_xgb_fcvopt.py +132 -0
- fcvopt-0.4.0/experiments/xgb_class/run_xgb_optuna.py +123 -0
- fcvopt-0.4.0/experiments/xgb_class/run_xgb_smac.py +114 -0
- fcvopt-0.4.0/experiments/xgb_class/true_eval.py +219 -0
- fcvopt-0.4.0/fcvopt/__init__.py +19 -0
- fcvopt-0.4.0/fcvopt/configspace.py +122 -0
- fcvopt-0.4.0/fcvopt/crossvalidation/__init__.py +4 -0
- fcvopt-0.4.0/fcvopt/crossvalidation/cvobjective.py +205 -0
- fcvopt-0.4.0/fcvopt/crossvalidation/mlp_cvobj.py +339 -0
- fcvopt-0.4.0/fcvopt/crossvalidation/optuna_obj.py +57 -0
- fcvopt-0.4.0/fcvopt/crossvalidation/resnet_cvobj.py +312 -0
- fcvopt-0.4.0/fcvopt/crossvalidation/sklearn_cvobj.py +295 -0
- fcvopt-0.4.0/fcvopt/fit/__init__.py +1 -0
- fcvopt-0.4.0/fcvopt/fit/mll_scipy.py +287 -0
- fcvopt-0.4.0/fcvopt/kernels/__init__.py +5 -0
- fcvopt-0.4.0/fcvopt/kernels/constant_kernel.py +11 -0
- fcvopt-0.4.0/fcvopt/kernels/hamming_kernel.py +75 -0
- fcvopt-0.4.0/fcvopt/kernels/matern.py +8 -0
- fcvopt-0.4.0/fcvopt/kernels/multitaskkernel.py +78 -0
- fcvopt-0.4.0/fcvopt/models/__init__.py +2 -0
- fcvopt-0.4.0/fcvopt/models/gpregression.py +119 -0
- fcvopt-0.4.0/fcvopt/models/hmgp.py +269 -0
- fcvopt-0.4.0/fcvopt/models/multitaskgp.py +125 -0
- fcvopt-0.4.0/fcvopt/models/priors.py +52 -0
- fcvopt-0.4.0/fcvopt/models/warp.py +100 -0
- fcvopt-0.4.0/fcvopt/optimizers/__init__.py +2 -0
- fcvopt-0.4.0/fcvopt/optimizers/active_learning.py +204 -0
- fcvopt-0.4.0/fcvopt/optimizers/bayes_opt.py +1379 -0
- fcvopt-0.4.0/fcvopt/optimizers/cvrandopt.py +40 -0
- fcvopt-0.4.0/fcvopt/optimizers/fcvopt.py +478 -0
- fcvopt-0.4.0/fcvopt/optimizers/mtbo_cv.py +231 -0
- fcvopt-0.4.0/fcvopt/optimizers/optimize_acq.py +103 -0
- fcvopt-0.4.0/fcvopt/py.typed +2 -0
- fcvopt-0.4.0/fcvopt/util/__init__.py +1 -0
- fcvopt-0.4.0/fcvopt/util/samplers.py +46 -0
- fcvopt-0.4.0/pyproject.toml +100 -0
- fcvopt-0.4.0/tests/test_bayes_opt.py +671 -0
- fcvopt-0.4.0/tests/test_fcvopt.py +923 -0
- fcvopt-0.4.0/venv_setup.sh +19 -0
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
{
|
|
2
|
+
"permissions": {
|
|
3
|
+
"allow": [
|
|
4
|
+
"Bash(source:*)",
|
|
5
|
+
"Bash(python3:*)",
|
|
6
|
+
"Bash(bash:*)",
|
|
7
|
+
"Bash(tree:*)",
|
|
8
|
+
"Bash(pip install:*)",
|
|
9
|
+
"Bash(python -m build:*)",
|
|
10
|
+
"Bash(twine check:*)",
|
|
11
|
+
"WebSearch",
|
|
12
|
+
"WebFetch(domain:pypi.org)",
|
|
13
|
+
"Bash(python -m unittest:*)",
|
|
14
|
+
"Bash(git rm:*)",
|
|
15
|
+
"Bash(find:*)"
|
|
16
|
+
],
|
|
17
|
+
"deny": [],
|
|
18
|
+
"ask": []
|
|
19
|
+
}
|
|
20
|
+
}
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
name: Build and Deploy Documentation
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [ master, main ]
|
|
6
|
+
paths:
|
|
7
|
+
- 'docs/**'
|
|
8
|
+
- '.github/workflows/docs.yml'
|
|
9
|
+
pull_request:
|
|
10
|
+
branches: [ master, main ]
|
|
11
|
+
paths:
|
|
12
|
+
- 'docs/source/**'
|
|
13
|
+
- '.github/workflows/docs.yml'
|
|
14
|
+
|
|
15
|
+
# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
|
|
16
|
+
permissions:
|
|
17
|
+
contents: read
|
|
18
|
+
pages: write
|
|
19
|
+
id-token: write
|
|
20
|
+
|
|
21
|
+
# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
|
|
22
|
+
# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
|
|
23
|
+
concurrency:
|
|
24
|
+
group: "pages"
|
|
25
|
+
cancel-in-progress: false
|
|
26
|
+
|
|
27
|
+
jobs:
|
|
28
|
+
build:
|
|
29
|
+
runs-on: ubuntu-latest
|
|
30
|
+
|
|
31
|
+
steps:
|
|
32
|
+
- name: Checkout
|
|
33
|
+
uses: actions/checkout@v4
|
|
34
|
+
|
|
35
|
+
- name: Set up Python
|
|
36
|
+
uses: actions/setup-python@v4
|
|
37
|
+
with:
|
|
38
|
+
python-version: '3.10'
|
|
39
|
+
cache: 'pip' # Cache pip dependencies
|
|
40
|
+
|
|
41
|
+
- name: Install Pandoc
|
|
42
|
+
run: |
|
|
43
|
+
sudo apt-get update
|
|
44
|
+
sudo apt-get install -y pandoc
|
|
45
|
+
|
|
46
|
+
- name: Install dependencies
|
|
47
|
+
run: |
|
|
48
|
+
python -m pip install --upgrade pip
|
|
49
|
+
pip install -e .[docs,experiments]
|
|
50
|
+
|
|
51
|
+
- name: Build documentation
|
|
52
|
+
run: |
|
|
53
|
+
cd docs
|
|
54
|
+
make html
|
|
55
|
+
|
|
56
|
+
- name: Setup Pages
|
|
57
|
+
uses: actions/configure-pages@v4
|
|
58
|
+
if: github.ref == 'refs/heads/master' || github.ref == 'refs/heads/main'
|
|
59
|
+
|
|
60
|
+
- name: Upload artifact
|
|
61
|
+
uses: actions/upload-pages-artifact@v3
|
|
62
|
+
if: github.ref == 'refs/heads/master' || github.ref == 'refs/heads/main'
|
|
63
|
+
with:
|
|
64
|
+
path: './docs/build/html'
|
|
65
|
+
|
|
66
|
+
deploy:
|
|
67
|
+
environment:
|
|
68
|
+
name: github-pages
|
|
69
|
+
url: ${{ steps.deployment.outputs.page_url }}
|
|
70
|
+
runs-on: ubuntu-latest
|
|
71
|
+
needs: build
|
|
72
|
+
if: github.ref == 'refs/heads/master' || github.ref == 'refs/heads/main'
|
|
73
|
+
|
|
74
|
+
steps:
|
|
75
|
+
- name: Deploy to GitHub Pages
|
|
76
|
+
id: deployment
|
|
77
|
+
uses: actions/deploy-pages@v4
|
fcvopt-0.4.0/.gitignore
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# Python compiled files
|
|
2
|
+
*.pyc
|
|
3
|
+
*.pyo
|
|
4
|
+
*.pyd
|
|
5
|
+
__pycache__/
|
|
6
|
+
*.so
|
|
7
|
+
*.dylib
|
|
8
|
+
*.dll
|
|
9
|
+
|
|
10
|
+
# Distribution / packaging
|
|
11
|
+
build/
|
|
12
|
+
dist/
|
|
13
|
+
*.egg-info/
|
|
14
|
+
.eggs/
|
|
15
|
+
*.egg
|
|
16
|
+
|
|
17
|
+
# Virtual environments
|
|
18
|
+
fcvopt_test/
|
|
19
|
+
venv/
|
|
20
|
+
env/
|
|
21
|
+
ENV/
|
|
22
|
+
|
|
23
|
+
# IDEs
|
|
24
|
+
.vscode/
|
|
25
|
+
.idea/
|
|
26
|
+
*.swp
|
|
27
|
+
*.swo
|
|
28
|
+
*~
|
|
29
|
+
|
|
30
|
+
# OS files
|
|
31
|
+
.DS_Store
|
|
32
|
+
.DS_Store?
|
|
33
|
+
._*
|
|
34
|
+
.Spotlight-V100
|
|
35
|
+
.Trashes
|
|
36
|
+
Thumbs.db
|
|
37
|
+
|
|
38
|
+
# Project specific
|
|
39
|
+
test.py
|
|
40
|
+
*.pkl
|
|
41
|
+
*.pt
|
|
42
|
+
*.pth
|
|
43
|
+
*.csv
|
|
44
|
+
*/hp_opt_runs/*
|
|
45
|
+
docs/build/*
|
|
46
|
+
examples/**runs/*
|
|
47
|
+
|
|
48
|
+
# Quest/Slurm files
|
|
49
|
+
*.e*
|
|
50
|
+
*.o*
|
|
51
|
+
|
|
52
|
+
# MLflow
|
|
53
|
+
mlruns/
|
|
54
|
+
mlartifacts/
|
|
55
|
+
|
|
56
|
+
# Jupyter
|
|
57
|
+
.ipynb_checkpoints/
|
|
58
|
+
*.ipynb_checkpoints/
|
fcvopt-0.4.0/CLAUDE.md
ADDED
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
4
|
+
|
|
5
|
+
## Project Overview
|
|
6
|
+
|
|
7
|
+
FCVOpt is a Python package (v0.4.0) for **Fractional Cross-Validation** in hyperparameter optimization. It implements efficient hyperparameter tuning by evaluating only a fraction of cross-validation folds using hierarchical Gaussian processes (HGP).
|
|
8
|
+
|
|
9
|
+
**Key Innovation**: Instead of evaluating all K folds for every hyperparameter configuration, FCVOpt uses a hierarchical GP model to exploit fold-wise correlations, enabling optimization with only 1-2 folds evaluated per configuration—drastically reducing computation.
|
|
10
|
+
|
|
11
|
+
## Installation & Setup
|
|
12
|
+
|
|
13
|
+
### Virtual Environment Setup
|
|
14
|
+
```bash
|
|
15
|
+
chmod +x venv_setup.sh
|
|
16
|
+
./venv_setup.sh
|
|
17
|
+
source fcvopt_test/bin/activate
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
### Development Installation
|
|
21
|
+
```bash
|
|
22
|
+
pip install -e . # Core package
|
|
23
|
+
pip install -e .[experiments] # + experiment dependencies
|
|
24
|
+
pip install -e .[docs] # + documentation dependencies
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
### Docker Setup
|
|
28
|
+
```bash
|
|
29
|
+
docker build -t fcvopt_test .
|
|
30
|
+
docker run -v $(pwd)/experiments:/app/experiments -it fcvopt_test
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Running Tests
|
|
34
|
+
|
|
35
|
+
Tests use Python's `unittest` framework:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
python -m unittest discover tests/ # All tests
|
|
39
|
+
python -m unittest tests/test_fcvopt.py # FCVOpt tests
|
|
40
|
+
python -m unittest tests/test_bayes_opt.py # BayesOpt tests
|
|
41
|
+
python -m unittest tests.test_fcvopt.TestFCVOpt.test_initialization # Single test
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Building Documentation
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
cd docs/
|
|
48
|
+
make html # Build HTML documentation
|
|
49
|
+
make clean # Clean build artifacts
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
Documentation: https://syerramilli.github.io/fcvopt/
|
|
53
|
+
|
|
54
|
+
## Architecture Overview
|
|
55
|
+
|
|
56
|
+
### Core Modules
|
|
57
|
+
|
|
58
|
+
1. **`fcvopt/crossvalidation/`** - Cross-validation objective functions
|
|
59
|
+
- `cvobjective.py`: Base `CVObjective` abstract class
|
|
60
|
+
- `sklearn_cvobj.py`: `SklearnCVObj` wrapper for scikit-learn estimators, `XGBoostCVObjEarlyStopping` for XGBoost with early stopping
|
|
61
|
+
- `mlp_cvobj.py`: `MLPCVObj` for neural networks via Skorch
|
|
62
|
+
- `resnet_cvobj.py`: `ResNetCVObj` for TabResNet on tabular data
|
|
63
|
+
- `optuna_obj.py`: Integration with Optuna optimizer
|
|
64
|
+
|
|
65
|
+
2. **`fcvopt/optimizers/`** - Bayesian optimization algorithms
|
|
66
|
+
- `bayes_opt.py`: `BayesOpt` class - standard BO with full K-fold CV
|
|
67
|
+
- `fcvopt.py`: `FCVOpt` class - fractional CV optimizer (main algorithm)
|
|
68
|
+
- `active_learning.py`: `ActiveLearning` class using posterior variance reduction
|
|
69
|
+
- `cvrandopt.py`: `CVRandOpt` random search baseline
|
|
70
|
+
- `mtbo_cv.py`: `MTBO_CV` multi-task BO variant
|
|
71
|
+
- `optimize_acq.py`: Acquisition function optimization utilities
|
|
72
|
+
|
|
73
|
+
3. **`fcvopt/models/`** - Gaussian Process models
|
|
74
|
+
- `gpregression.py`: `GPR` - base GP regression model
|
|
75
|
+
- `hmgp.py`: `HGP` - Hierarchical GP for modeling fold-wise correlations
|
|
76
|
+
- `multitaskgp.py`: Multi-task GP for multiple outputs
|
|
77
|
+
- `warp.py`: Input warping transformations
|
|
78
|
+
- `priors.py`: Custom GP priors (e.g., `BetaPrior`)
|
|
79
|
+
|
|
80
|
+
4. **`fcvopt/kernels/`** - Custom GP kernels
|
|
81
|
+
- `hamming_kernel.py`: `HammingKernel` for categorical fold indices
|
|
82
|
+
- `constant_kernel.py`: Constant kernel
|
|
83
|
+
- `matern.py`: Matérn kernel implementations
|
|
84
|
+
- `multitaskkernel.py`: Multi-task covariance functions
|
|
85
|
+
|
|
86
|
+
5. **`fcvopt/configspace.py`** - Extended `ConfigurationSpace` wrapper
|
|
87
|
+
- `latinhypercube_sample()` for initial design sampling
|
|
88
|
+
- Conversion between Configuration objects and numeric arrays
|
|
89
|
+
|
|
90
|
+
6. **`fcvopt/fit/mll_scipy.py`** - GP hyperparameter optimization via scipy L-BFGS-B
|
|
91
|
+
|
|
92
|
+
7. **`fcvopt/util/samplers.py`** - Sampling utilities (Latin Hypercube, stratified)
|
|
93
|
+
|
|
94
|
+
### Key Design Patterns
|
|
95
|
+
|
|
96
|
+
**CVObjective Interface**: All CV objectives inherit from `CVObjective` and must implement:
|
|
97
|
+
- `construct_model(params)`: Return unfitted model instance
|
|
98
|
+
- `fit_and_test(params, train_index, test_index)`: Train and evaluate on one fold
|
|
99
|
+
|
|
100
|
+
**Optimizer Hierarchy**: `FCVOpt` extends `BayesOpt`, overriding:
|
|
101
|
+
- `_initialize()`: Assigns folds to initial random configurations
|
|
102
|
+
- `_acquisition()`: Adds fold selection after candidate selection
|
|
103
|
+
- `_construct_model()`: Uses `HGP` instead of standard GP
|
|
104
|
+
- `_select_fold_indices()`: Chooses folds via variance reduction or random sampling
|
|
105
|
+
|
|
106
|
+
**MLflow Integration**: Both `BayesOpt` and `FCVOpt` automatically track:
|
|
107
|
+
- Hyperparameter configurations and losses
|
|
108
|
+
- Model checkpoints (GP state)
|
|
109
|
+
- Iteration snapshots with acquisition values
|
|
110
|
+
|
|
111
|
+
### Data Flow in FCVOpt
|
|
112
|
+
|
|
113
|
+
1. **Initialization**: Sample `n_init` random configs, assign folds (random/stratified), evaluate
|
|
114
|
+
2. **Model Fitting**: Fit hierarchical GP on `(x, fold_idx) → y` observations
|
|
115
|
+
3. **Acquisition**: Optimize acquisition function to select next configuration
|
|
116
|
+
4. **Fold Selection**: Choose fold via variance reduction or random
|
|
117
|
+
5. **Evaluation**: Evaluate objective on selected (config, fold) pair
|
|
118
|
+
6. **Repeat**: Update GP, select next candidate
|
|
119
|
+
|
|
120
|
+
## Example Notebooks
|
|
121
|
+
|
|
122
|
+
Located in `examples/`:
|
|
123
|
+
- `01_Introduction_to_FCVOpt.ipynb` - Basic usage and concepts
|
|
124
|
+
- `02_Tuning_Lightgbm_Sklearn_API.ipynb` - LightGBM hyperparameter tuning
|
|
125
|
+
- `03_Extending_CVobjective.ipynb` - Creating custom CV objectives
|
|
126
|
+
- `04_Tuning_TabularResNet.ipynb` - TabResNet neural network tuning
|
|
127
|
+
|
|
128
|
+
## Experiments
|
|
129
|
+
|
|
130
|
+
Experiments for reproducing paper results are in `experiments/`:
|
|
131
|
+
- `rf_high_dim/`: Random forest on high-dimensional data
|
|
132
|
+
- `xgb_class/`: XGBoost classification
|
|
133
|
+
- `boosted_reg/`: Gradient boosted regression
|
|
134
|
+
- `mlp/`: Multi-layer perceptron via PyTorch
|
|
135
|
+
- `tab_resnet/`: Tabular ResNet
|
|
136
|
+
|
|
137
|
+
Utility scripts:
|
|
138
|
+
- `generate_figures.py`: Create paper figures
|
|
139
|
+
- `reproduce_rf.sh`: Bash script to run RF experiments
|
|
140
|
+
|
|
141
|
+
## Dependencies
|
|
142
|
+
|
|
143
|
+
**Core**: PyTorch (2.2.0), GPyTorch (1.9-1.10), BoTorch (>=0.8, <0.9), ConfigSpace (1.2.1), MLflow (>=3.0), scikit-learn, XGBoost, Skorch (0.13-0.15)
|
|
144
|
+
|
|
145
|
+
**Note on SMAC**: Experiments using SMAC require `pyrfr`, which needs a C++ compiler and `swig`. This is optional.
|
|
146
|
+
|
|
147
|
+
## Common Gotchas
|
|
148
|
+
|
|
149
|
+
- **Categorical hyperparameters**: `latinhypercube_sample()` only supports binary-valued categoricals. Use `ConfigSpace.sample_configuration()` for general categoricals.
|
|
150
|
+
|
|
151
|
+
- **MLflow tracking**: Optimizers auto-initialize MLflow. To use custom tracking URI, set before creating optimizer or pass `tracking_uri` parameter.
|
|
152
|
+
|
|
153
|
+
- **Fold indices**: In `FCVOpt`, fold indices include repeats: with `n_folds=5` and `n_repeats=2`, valid fold indices are 0-9.
|
|
154
|
+
|
|
155
|
+
- **EI acquisition**: Not supported for `FCVOpt` - use `'LCB'` or `'KG'` instead.
|
|
156
|
+
|
|
157
|
+
- **LightGBM compatibility**: A fix has been applied in `fcvopt/fit/mll_scipy.py` to handle LightGBM's C++ objects. When `n_jobs=1` (default), only a single restart is used. When `n_jobs>1`, `ThreadPoolExecutor` is used instead of `joblib.Parallel` to avoid pickling issues. If you experience segfaults, clear `__pycache__` and reinstall: `pip install -e .`
|
|
158
|
+
|
|
159
|
+
## Quick Usage Example
|
|
160
|
+
|
|
161
|
+
```python
|
|
162
|
+
from fcvopt.optimizers import FCVOpt
|
|
163
|
+
from fcvopt.crossvalidation import SklearnCVObj
|
|
164
|
+
from fcvopt.configspace import ConfigurationSpace
|
|
165
|
+
from ConfigSpace import Float
|
|
166
|
+
from sklearn.ensemble import RandomForestClassifier
|
|
167
|
+
from sklearn.metrics import log_loss
|
|
168
|
+
|
|
169
|
+
# Define hyperparameter space
|
|
170
|
+
config = ConfigurationSpace(seed=42)
|
|
171
|
+
config.add(Float('max_features', bounds=(0.1, 1.0)))
|
|
172
|
+
config.add(Float('min_samples_leaf', bounds=(0.01, 0.1)))
|
|
173
|
+
|
|
174
|
+
# Create CV objective
|
|
175
|
+
cv_obj = SklearnCVObj(
|
|
176
|
+
estimator=RandomForestClassifier(n_estimators=100),
|
|
177
|
+
X=X_train, y=y_train,
|
|
178
|
+
task='classification',
|
|
179
|
+
loss_metric=log_loss,
|
|
180
|
+
n_splits=5
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
# Run fractional CV optimization
|
|
184
|
+
optimizer = FCVOpt(obj=cv_obj, config=config, n_folds=5)
|
|
185
|
+
best_config = optimizer.run(n_iter=20, n_init=5)
|
|
186
|
+
```
|
fcvopt-0.4.0/Dockerfile
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
FROM python:3.10
|
|
2
|
+
|
|
3
|
+
# Set the working directory in the container
|
|
4
|
+
WORKDIR /app
|
|
5
|
+
|
|
6
|
+
# Copy the current directory contents into the container at /app
|
|
7
|
+
# Note: The .dockerignore file is used to exclude certain files and directories
|
|
8
|
+
# from being copied. This includes the experiments directory which is not needed
|
|
9
|
+
# for installing the library. It is recommnded to mount the experiments directory
|
|
10
|
+
# as a volume when running the container.
|
|
11
|
+
COPY . /app
|
|
12
|
+
|
|
13
|
+
# Upgrade pip
|
|
14
|
+
RUN pip install --upgrade pip
|
|
15
|
+
|
|
16
|
+
# Install the CPU only version of PyTorch (the index-url must be specified for Linux distributions)
|
|
17
|
+
RUN pip install torch==2.2.0 --index-url https://download.pytorch.org/whl/cpu
|
|
18
|
+
|
|
19
|
+
# Install the fcvopt library along with required dependencies
|
|
20
|
+
# and the extra dependencies for the experiments
|
|
21
|
+
RUN pip install .[experiments]
|
|
22
|
+
|
|
23
|
+
# Set the default command to run when the container starts
|
|
24
|
+
ENTRYPOINT ["/bin/bash"]
|
fcvopt-0.4.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Suraj Yerramilli
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
fcvopt-0.4.0/PKG-INFO
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: fcvopt
|
|
3
|
+
Version: 0.4.0
|
|
4
|
+
Summary: Fractional K-fold cross-validation for hyperparameter optimization
|
|
5
|
+
Project-URL: Homepage, https://github.com/syerramilli/fcvopt
|
|
6
|
+
Project-URL: Documentation, https://syerramilli.github.io/fcvopt/
|
|
7
|
+
Project-URL: Repository, https://github.com/syerramilli/fcvopt.git
|
|
8
|
+
Project-URL: Issues, https://github.com/syerramilli/fcvopt/issues
|
|
9
|
+
Author: Daniel W. Apley
|
|
10
|
+
Author-email: Suraj Yerramilli <surajyerramilli@gmail.com>
|
|
11
|
+
Maintainer-email: Suraj Yerramilli <surajyerramilli@gmail.com>
|
|
12
|
+
License-Expression: MIT
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
Keywords: automl,bayesian-optimization,cross-validation,gaussian-processes,hyperparameter-optimization,machine-learning
|
|
15
|
+
Classifier: Development Status :: 4 - Beta
|
|
16
|
+
Classifier: Intended Audience :: Developers
|
|
17
|
+
Classifier: Intended Audience :: Science/Research
|
|
18
|
+
Classifier: Operating System :: OS Independent
|
|
19
|
+
Classifier: Programming Language :: Python :: 3
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
24
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
25
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
26
|
+
Requires-Python: >=3.10
|
|
27
|
+
Requires-Dist: botorch>=0.12
|
|
28
|
+
Requires-Dist: configspace<2.0,>=1.0
|
|
29
|
+
Requires-Dist: gpytorch>=1.14
|
|
30
|
+
Requires-Dist: joblib>=1.3
|
|
31
|
+
Requires-Dist: mlflow>=2.10
|
|
32
|
+
Requires-Dist: numpy>=2.0
|
|
33
|
+
Requires-Dist: pandas>=2.2.2
|
|
34
|
+
Requires-Dist: scikit-learn>=1.4.2
|
|
35
|
+
Requires-Dist: scipy>=1.12
|
|
36
|
+
Requires-Dist: skorch>=0.15
|
|
37
|
+
Requires-Dist: torch>=2.3.1
|
|
38
|
+
Requires-Dist: xgboost<3,>=2.0.0
|
|
39
|
+
Provides-Extra: dev
|
|
40
|
+
Requires-Dist: black; extra == 'dev'
|
|
41
|
+
Requires-Dist: flake8; extra == 'dev'
|
|
42
|
+
Requires-Dist: isort; extra == 'dev'
|
|
43
|
+
Requires-Dist: pytest-cov; extra == 'dev'
|
|
44
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
45
|
+
Provides-Extra: docs
|
|
46
|
+
Requires-Dist: ipykernel; extra == 'docs'
|
|
47
|
+
Requires-Dist: nbsphinx; extra == 'docs'
|
|
48
|
+
Requires-Dist: sphinx-autodoc-typehints; extra == 'docs'
|
|
49
|
+
Requires-Dist: sphinx-rtd-theme; extra == 'docs'
|
|
50
|
+
Requires-Dist: sphinx>=4.0; extra == 'docs'
|
|
51
|
+
Requires-Dist: sphinxcontrib-napoleon; extra == 'docs'
|
|
52
|
+
Provides-Extra: experiments
|
|
53
|
+
Requires-Dist: lightgbm>=4.0; extra == 'experiments'
|
|
54
|
+
Requires-Dist: matplotlib>=3.7.0; extra == 'experiments'
|
|
55
|
+
Requires-Dist: optuna<4.0.0,>=3.6.0; extra == 'experiments'
|
|
56
|
+
Requires-Dist: seaborn>=0.12.2; extra == 'experiments'
|
|
57
|
+
Requires-Dist: smac<3.0,>=2.0; extra == 'experiments'
|
|
58
|
+
Description-Content-Type: text/markdown
|
|
59
|
+
|
|
60
|
+
# fcvopt: Fractional cross-validation for hyperparameter optimization
|
|
61
|
+
|
|
62
|
+
FCVOpt is a Python package for "Fractional Cross-Validation"in hyperparameter optimization. It implements efficient hyperparameter tuning by evaluating only a fraction of cross-validation folds using hierarchical Gaussian processes.
|
|
63
|
+
|
|
64
|
+
The documentation is available at [https://syerramilli.github.io/fcvopt/](https://syerramilli.github.io/fcvopt/).
|
|
65
|
+
|
|
66
|
+
🚀 **Key Features**:
|
|
67
|
+
|
|
68
|
+
* **Efficient Optimization**: Evaluate hyperparameters using only a subset of CV folds
|
|
69
|
+
* **Hierarchical Gaussian Processes**: Model fold-wise correlations for better predictions
|
|
70
|
+
* **MLflow Integration**: Automatic experiment tracking and model versioning
|
|
71
|
+
* **Framework Support**: Scikit-learn, XGBoost, PyTorch (via Skorch), and more
|
|
72
|
+
|
|
73
|
+
## Installation
|
|
74
|
+
|
|
75
|
+
### From Source
|
|
76
|
+
|
|
77
|
+
```{bash}
|
|
78
|
+
git clone https://github.com/syerramilli/fcvopt.git
|
|
79
|
+
cd fcvopt
|
|
80
|
+
pip install .
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
**With optional dependencies**:
|
|
84
|
+
|
|
85
|
+
```{bash}
|
|
86
|
+
pip install .[experiments] # For reproducing the results from the paper
|
|
87
|
+
pip install .[docs] # For building documentation
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Citing
|
|
91
|
+
If you use this code in your research, please cite the following paper:
|
|
92
|
+
|
|
93
|
+
```
|
|
94
|
+
@article{yerramilli2025fractional,
|
|
95
|
+
author = {Suraj Yerramilli and Daniel W. Apley},
|
|
96
|
+
title = {Fractional Cross-Validation for Optimizing Hyperparameters of Supervised Learning Algorithms},
|
|
97
|
+
journal = {Technometrics},
|
|
98
|
+
year = {2025},
|
|
99
|
+
doi = {10.1080/00401706.2025.2515926},
|
|
100
|
+
}
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
## Reproducting the experiment results from the paper
|
|
104
|
+
|
|
105
|
+
The experiments are all contained in the `experiments` folder. Each subdirectory within this folder contains scripts files to run each case study in the paper. Refer to the README file within each of the subdirectories for instructions to run the files.
|
|
106
|
+
|
|
107
|
+
For reproducibility, we provide two options for setting up the environment to run the experiments: a virtual environment using `venv` and a Docker container.
|
|
108
|
+
|
|
109
|
+
### Setting up a virtual environment
|
|
110
|
+
|
|
111
|
+
The bash script file `venv_setup.sh` can be used to create a virtual environment and install the required packages. Ensure you have Python >= 3.8 and <=3.12 installed.
|
|
112
|
+
|
|
113
|
+
To run the script, use the following commands:
|
|
114
|
+
|
|
115
|
+
```{bash}
|
|
116
|
+
chmod +x venv_setup.sh
|
|
117
|
+
./venv_setup.sh
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
**Note:**
|
|
121
|
+
The experiments involving the SMAC algorithm require the `smac` library, which in turn requires the building and compliling the `pyrfr` package While the main functions of `fcvopt` do not depend on `pyrfr`, you might encounter build issues during its installation if you do not have a C++ compiler and the `swig` binary installed on your system.
|
|
122
|
+
|
|
123
|
+
### Setting up a Docker container
|
|
124
|
+
|
|
125
|
+
The Dockerfile is provided to run the experiments in a container with the `fcvopt` package and all the required dependencies. The Dockerfile is based on the Python 3.10 debian image. To build the image, run the following command:
|
|
126
|
+
|
|
127
|
+
```{bash}
|
|
128
|
+
docker build -t fcvopt_test .
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
To run the container with the files in the `experiments` folder mounted, run the following command:
|
|
132
|
+
|
|
133
|
+
```{bash}
|
|
134
|
+
docker run -v <path_to_experiments_folder>:/app/experiments -it fcvopt_test
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
This will launch the container and open a bash shell. The experiments directory will be mounted in the container at `/app/experiments`. Mounting the directory allows you to access the files in the experiments folder from within the container, and any changes made to the files will be reflected in your local directory. Replace <path_to_experiments_folder> with the **absolute path** to your local experiments directory. Relative paths will not work, as the container will not have access to your local file system. On Linux and MacOS, you can use the $(pwd) command to get the absolute path of the current directory. For example:
|
|
138
|
+
|
|
139
|
+
```{bash}
|
|
140
|
+
docker run -v $(pwd)/experiments:/app/experiments -it fcvopt_test
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
Once inside the container, you can navigate to the /app/experiments directory and run the experiments as needed. For example:
|
|
144
|
+
|
|
145
|
+
```{bash}
|
|
146
|
+
cd experiments
|
|
147
|
+
bash reproduce_rf.sh
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
**Note:** On Ubuntu/Debian, you may need administrative privileges to run the Docker commands. You can do this by adding `sudo` before the command.
|
fcvopt-0.4.0/README.md
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# fcvopt: Fractional cross-validation for hyperparameter optimization
|
|
2
|
+
|
|
3
|
+
FCVOpt is a Python package for "Fractional Cross-Validation"in hyperparameter optimization. It implements efficient hyperparameter tuning by evaluating only a fraction of cross-validation folds using hierarchical Gaussian processes.
|
|
4
|
+
|
|
5
|
+
The documentation is available at [https://syerramilli.github.io/fcvopt/](https://syerramilli.github.io/fcvopt/).
|
|
6
|
+
|
|
7
|
+
🚀 **Key Features**:
|
|
8
|
+
|
|
9
|
+
* **Efficient Optimization**: Evaluate hyperparameters using only a subset of CV folds
|
|
10
|
+
* **Hierarchical Gaussian Processes**: Model fold-wise correlations for better predictions
|
|
11
|
+
* **MLflow Integration**: Automatic experiment tracking and model versioning
|
|
12
|
+
* **Framework Support**: Scikit-learn, XGBoost, PyTorch (via Skorch), and more
|
|
13
|
+
|
|
14
|
+
## Installation
|
|
15
|
+
|
|
16
|
+
### From Source
|
|
17
|
+
|
|
18
|
+
```{bash}
|
|
19
|
+
git clone https://github.com/syerramilli/fcvopt.git
|
|
20
|
+
cd fcvopt
|
|
21
|
+
pip install .
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
**With optional dependencies**:
|
|
25
|
+
|
|
26
|
+
```{bash}
|
|
27
|
+
pip install .[experiments] # For reproducing the results from the paper
|
|
28
|
+
pip install .[docs] # For building documentation
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## Citing
|
|
32
|
+
If you use this code in your research, please cite the following paper:
|
|
33
|
+
|
|
34
|
+
```
|
|
35
|
+
@article{yerramilli2025fractional,
|
|
36
|
+
author = {Suraj Yerramilli and Daniel W. Apley},
|
|
37
|
+
title = {Fractional Cross-Validation for Optimizing Hyperparameters of Supervised Learning Algorithms},
|
|
38
|
+
journal = {Technometrics},
|
|
39
|
+
year = {2025},
|
|
40
|
+
doi = {10.1080/00401706.2025.2515926},
|
|
41
|
+
}
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Reproducting the experiment results from the paper
|
|
45
|
+
|
|
46
|
+
The experiments are all contained in the `experiments` folder. Each subdirectory within this folder contains scripts files to run each case study in the paper. Refer to the README file within each of the subdirectories for instructions to run the files.
|
|
47
|
+
|
|
48
|
+
For reproducibility, we provide two options for setting up the environment to run the experiments: a virtual environment using `venv` and a Docker container.
|
|
49
|
+
|
|
50
|
+
### Setting up a virtual environment
|
|
51
|
+
|
|
52
|
+
The bash script file `venv_setup.sh` can be used to create a virtual environment and install the required packages. Ensure you have Python >= 3.8 and <=3.12 installed.
|
|
53
|
+
|
|
54
|
+
To run the script, use the following commands:
|
|
55
|
+
|
|
56
|
+
```{bash}
|
|
57
|
+
chmod +x venv_setup.sh
|
|
58
|
+
./venv_setup.sh
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
**Note:**
|
|
62
|
+
The experiments involving the SMAC algorithm require the `smac` library, which in turn requires the building and compliling the `pyrfr` package While the main functions of `fcvopt` do not depend on `pyrfr`, you might encounter build issues during its installation if you do not have a C++ compiler and the `swig` binary installed on your system.
|
|
63
|
+
|
|
64
|
+
### Setting up a Docker container
|
|
65
|
+
|
|
66
|
+
The Dockerfile is provided to run the experiments in a container with the `fcvopt` package and all the required dependencies. The Dockerfile is based on the Python 3.10 debian image. To build the image, run the following command:
|
|
67
|
+
|
|
68
|
+
```{bash}
|
|
69
|
+
docker build -t fcvopt_test .
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
To run the container with the files in the `experiments` folder mounted, run the following command:
|
|
73
|
+
|
|
74
|
+
```{bash}
|
|
75
|
+
docker run -v <path_to_experiments_folder>:/app/experiments -it fcvopt_test
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
This will launch the container and open a bash shell. The experiments directory will be mounted in the container at `/app/experiments`. Mounting the directory allows you to access the files in the experiments folder from within the container, and any changes made to the files will be reflected in your local directory. Replace <path_to_experiments_folder> with the **absolute path** to your local experiments directory. Relative paths will not work, as the container will not have access to your local file system. On Linux and MacOS, you can use the $(pwd) command to get the absolute path of the current directory. For example:
|
|
79
|
+
|
|
80
|
+
```{bash}
|
|
81
|
+
docker run -v $(pwd)/experiments:/app/experiments -it fcvopt_test
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
Once inside the container, you can navigate to the /app/experiments directory and run the experiments as needed. For example:
|
|
85
|
+
|
|
86
|
+
```{bash}
|
|
87
|
+
cd experiments
|
|
88
|
+
bash reproduce_rf.sh
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
**Note:** On Ubuntu/Debian, you may need administrative privileges to run the Docker commands. You can do this by adding `sudo` before the command.
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Minimal makefile for Sphinx documentation
|
|
2
|
+
#
|
|
3
|
+
|
|
4
|
+
# You can set these variables from the command line, and also
|
|
5
|
+
# from the environment for the first two.
|
|
6
|
+
SPHINXOPTS ?=
|
|
7
|
+
SPHINXBUILD ?= sphinx-build
|
|
8
|
+
SOURCEDIR = source
|
|
9
|
+
BUILDDIR = build
|
|
10
|
+
|
|
11
|
+
# Put it first so that "make" without argument is like "make help".
|
|
12
|
+
help:
|
|
13
|
+
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
|
14
|
+
|
|
15
|
+
.PHONY: help Makefile
|
|
16
|
+
|
|
17
|
+
# Catch-all target: route all unknown targets to Sphinx using the new
|
|
18
|
+
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
|
19
|
+
%: Makefile
|
|
20
|
+
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|