fcvopt 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. fcvopt-0.4.0/.claude/settings.local.json +20 -0
  2. fcvopt-0.4.0/.dockerignore +6 -0
  3. fcvopt-0.4.0/.github/workflows/docs.yml +77 -0
  4. fcvopt-0.4.0/.gitignore +58 -0
  5. fcvopt-0.4.0/CLAUDE.md +186 -0
  6. fcvopt-0.4.0/Dockerfile +24 -0
  7. fcvopt-0.4.0/LICENSE +21 -0
  8. fcvopt-0.4.0/PKG-INFO +150 -0
  9. fcvopt-0.4.0/README.md +91 -0
  10. fcvopt-0.4.0/docs/Makefile +20 -0
  11. fcvopt-0.4.0/docs/make.bat +35 -0
  12. fcvopt-0.4.0/docs/requirements.txt +4 -0
  13. fcvopt-0.4.0/docs/source/conf.py +93 -0
  14. fcvopt-0.4.0/docs/source/configspace.rst +30 -0
  15. fcvopt-0.4.0/docs/source/crossvalidation.rst +79 -0
  16. fcvopt-0.4.0/docs/source/examples/01_Introduction_to_FCVOpt.ipynb +361 -0
  17. fcvopt-0.4.0/docs/source/examples/02_Tuning_Lightgbm_Sklearn_API.ipynb +324 -0
  18. fcvopt-0.4.0/docs/source/examples/03_Extending_CVobjective.ipynb +401 -0
  19. fcvopt-0.4.0/docs/source/examples/04_Standard_BO.ipynb +281 -0
  20. fcvopt-0.4.0/docs/source/examples/04_Tuning_TabularResNet.ipynb +386 -0
  21. fcvopt-0.4.0/docs/source/examples/index.rst +13 -0
  22. fcvopt-0.4.0/docs/source/examples.rst +344 -0
  23. fcvopt-0.4.0/docs/source/index.rst +117 -0
  24. fcvopt-0.4.0/docs/source/mlflow_integration.rst +147 -0
  25. fcvopt-0.4.0/docs/source/models.rst +96 -0
  26. fcvopt-0.4.0/docs/source/optimizers.rst +36 -0
  27. fcvopt-0.4.0/docs/source/technical_details.rst +139 -0
  28. fcvopt-0.4.0/examples/01_Introduction_to_FCVOpt.ipynb +361 -0
  29. fcvopt-0.4.0/examples/02_Tuning_Lightgbm_Sklearn_API.ipynb +324 -0
  30. fcvopt-0.4.0/examples/03_Extending_CVobjective.ipynb +401 -0
  31. fcvopt-0.4.0/examples/04_Standard_BO.ipynb +281 -0
  32. fcvopt-0.4.0/examples/04_Tuning_TabularResNet.ipynb +386 -0
  33. fcvopt-0.4.0/experiments/README.md +22 -0
  34. fcvopt-0.4.0/experiments/boosted_reg/README.md +38 -0
  35. fcvopt-0.4.0/experiments/boosted_reg/run_xgb_fcvopt.py +100 -0
  36. fcvopt-0.4.0/experiments/figures_auto/fig-rf-madelon.png +0 -0
  37. fcvopt-0.4.0/experiments/gen_torchscript_models.py +51 -0
  38. fcvopt-0.4.0/experiments/generate_figures.py +297 -0
  39. fcvopt-0.4.0/experiments/mlp/README.md +71 -0
  40. fcvopt-0.4.0/experiments/mlp/run_fcvopt.py +137 -0
  41. fcvopt-0.4.0/experiments/mlp/run_optuna.py +117 -0
  42. fcvopt-0.4.0/experiments/mlp/run_smac.py +124 -0
  43. fcvopt-0.4.0/experiments/mlp/true_eval.py +229 -0
  44. fcvopt-0.4.0/experiments/reproduce_rf.sh +55 -0
  45. fcvopt-0.4.0/experiments/rf_high_dim/README.md +71 -0
  46. fcvopt-0.4.0/experiments/rf_high_dim/config.py +21 -0
  47. fcvopt-0.4.0/experiments/rf_high_dim/run_fcvopt.py +124 -0
  48. fcvopt-0.4.0/experiments/rf_high_dim/run_optuna.py +117 -0
  49. fcvopt-0.4.0/experiments/rf_high_dim/run_smac.py +117 -0
  50. fcvopt-0.4.0/experiments/rf_high_dim/true_eval.py +188 -0
  51. fcvopt-0.4.0/experiments/tab_resnet/run_fcvopt.py +131 -0
  52. fcvopt-0.4.0/experiments/tab_resnet/run_optuna.py +120 -0
  53. fcvopt-0.4.0/experiments/tab_resnet/run_smac.py +118 -0
  54. fcvopt-0.4.0/experiments/tab_resnet/true_eval.py +234 -0
  55. fcvopt-0.4.0/experiments/xgb_class/README.md +59 -0
  56. fcvopt-0.4.0/experiments/xgb_class/run_xgb_fcvopt.py +132 -0
  57. fcvopt-0.4.0/experiments/xgb_class/run_xgb_optuna.py +123 -0
  58. fcvopt-0.4.0/experiments/xgb_class/run_xgb_smac.py +114 -0
  59. fcvopt-0.4.0/experiments/xgb_class/true_eval.py +219 -0
  60. fcvopt-0.4.0/fcvopt/__init__.py +19 -0
  61. fcvopt-0.4.0/fcvopt/configspace.py +122 -0
  62. fcvopt-0.4.0/fcvopt/crossvalidation/__init__.py +4 -0
  63. fcvopt-0.4.0/fcvopt/crossvalidation/cvobjective.py +205 -0
  64. fcvopt-0.4.0/fcvopt/crossvalidation/mlp_cvobj.py +339 -0
  65. fcvopt-0.4.0/fcvopt/crossvalidation/optuna_obj.py +57 -0
  66. fcvopt-0.4.0/fcvopt/crossvalidation/resnet_cvobj.py +312 -0
  67. fcvopt-0.4.0/fcvopt/crossvalidation/sklearn_cvobj.py +295 -0
  68. fcvopt-0.4.0/fcvopt/fit/__init__.py +1 -0
  69. fcvopt-0.4.0/fcvopt/fit/mll_scipy.py +287 -0
  70. fcvopt-0.4.0/fcvopt/kernels/__init__.py +5 -0
  71. fcvopt-0.4.0/fcvopt/kernels/constant_kernel.py +11 -0
  72. fcvopt-0.4.0/fcvopt/kernels/hamming_kernel.py +75 -0
  73. fcvopt-0.4.0/fcvopt/kernels/matern.py +8 -0
  74. fcvopt-0.4.0/fcvopt/kernels/multitaskkernel.py +78 -0
  75. fcvopt-0.4.0/fcvopt/models/__init__.py +2 -0
  76. fcvopt-0.4.0/fcvopt/models/gpregression.py +119 -0
  77. fcvopt-0.4.0/fcvopt/models/hmgp.py +269 -0
  78. fcvopt-0.4.0/fcvopt/models/multitaskgp.py +125 -0
  79. fcvopt-0.4.0/fcvopt/models/priors.py +52 -0
  80. fcvopt-0.4.0/fcvopt/models/warp.py +100 -0
  81. fcvopt-0.4.0/fcvopt/optimizers/__init__.py +2 -0
  82. fcvopt-0.4.0/fcvopt/optimizers/active_learning.py +204 -0
  83. fcvopt-0.4.0/fcvopt/optimizers/bayes_opt.py +1379 -0
  84. fcvopt-0.4.0/fcvopt/optimizers/cvrandopt.py +40 -0
  85. fcvopt-0.4.0/fcvopt/optimizers/fcvopt.py +478 -0
  86. fcvopt-0.4.0/fcvopt/optimizers/mtbo_cv.py +231 -0
  87. fcvopt-0.4.0/fcvopt/optimizers/optimize_acq.py +103 -0
  88. fcvopt-0.4.0/fcvopt/py.typed +2 -0
  89. fcvopt-0.4.0/fcvopt/util/__init__.py +1 -0
  90. fcvopt-0.4.0/fcvopt/util/samplers.py +46 -0
  91. fcvopt-0.4.0/pyproject.toml +100 -0
  92. fcvopt-0.4.0/tests/test_bayes_opt.py +671 -0
  93. fcvopt-0.4.0/tests/test_fcvopt.py +923 -0
  94. fcvopt-0.4.0/venv_setup.sh +19 -0
@@ -0,0 +1,20 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(source:*)",
5
+ "Bash(python3:*)",
6
+ "Bash(bash:*)",
7
+ "Bash(tree:*)",
8
+ "Bash(pip install:*)",
9
+ "Bash(python -m build:*)",
10
+ "Bash(twine check:*)",
11
+ "WebSearch",
12
+ "WebFetch(domain:pypi.org)",
13
+ "Bash(python -m unittest:*)",
14
+ "Bash(git rm:*)",
15
+ "Bash(find:*)"
16
+ ],
17
+ "deny": [],
18
+ "ask": []
19
+ }
20
+ }
@@ -0,0 +1,6 @@
1
+ # Exclude directory
2
+ notebooks/
3
+ examples/
4
+ .vscode/
5
+ experiments/
6
+ tests/
@@ -0,0 +1,77 @@
1
+ name: Build and Deploy Documentation
2
+
3
+ on:
4
+ push:
5
+ branches: [ master, main ]
6
+ paths:
7
+ - 'docs/**'
8
+ - '.github/workflows/docs.yml'
9
+ pull_request:
10
+ branches: [ master, main ]
11
+ paths:
12
+ - 'docs/source/**'
13
+ - '.github/workflows/docs.yml'
14
+
15
+ # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
16
+ permissions:
17
+ contents: read
18
+ pages: write
19
+ id-token: write
20
+
21
+ # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
22
+ # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
23
+ concurrency:
24
+ group: "pages"
25
+ cancel-in-progress: false
26
+
27
+ jobs:
28
+ build:
29
+ runs-on: ubuntu-latest
30
+
31
+ steps:
32
+ - name: Checkout
33
+ uses: actions/checkout@v4
34
+
35
+ - name: Set up Python
36
+ uses: actions/setup-python@v4
37
+ with:
38
+ python-version: '3.10'
39
+ cache: 'pip' # Cache pip dependencies
40
+
41
+ - name: Install Pandoc
42
+ run: |
43
+ sudo apt-get update
44
+ sudo apt-get install -y pandoc
45
+
46
+ - name: Install dependencies
47
+ run: |
48
+ python -m pip install --upgrade pip
49
+ pip install -e .[docs,experiments]
50
+
51
+ - name: Build documentation
52
+ run: |
53
+ cd docs
54
+ make html
55
+
56
+ - name: Setup Pages
57
+ uses: actions/configure-pages@v4
58
+ if: github.ref == 'refs/heads/master' || github.ref == 'refs/heads/main'
59
+
60
+ - name: Upload artifact
61
+ uses: actions/upload-pages-artifact@v3
62
+ if: github.ref == 'refs/heads/master' || github.ref == 'refs/heads/main'
63
+ with:
64
+ path: './docs/build/html'
65
+
66
+ deploy:
67
+ environment:
68
+ name: github-pages
69
+ url: ${{ steps.deployment.outputs.page_url }}
70
+ runs-on: ubuntu-latest
71
+ needs: build
72
+ if: github.ref == 'refs/heads/master' || github.ref == 'refs/heads/main'
73
+
74
+ steps:
75
+ - name: Deploy to GitHub Pages
76
+ id: deployment
77
+ uses: actions/deploy-pages@v4
@@ -0,0 +1,58 @@
1
+ # Python compiled files
2
+ *.pyc
3
+ *.pyo
4
+ *.pyd
5
+ __pycache__/
6
+ *.so
7
+ *.dylib
8
+ *.dll
9
+
10
+ # Distribution / packaging
11
+ build/
12
+ dist/
13
+ *.egg-info/
14
+ .eggs/
15
+ *.egg
16
+
17
+ # Virtual environments
18
+ fcvopt_test/
19
+ venv/
20
+ env/
21
+ ENV/
22
+
23
+ # IDEs
24
+ .vscode/
25
+ .idea/
26
+ *.swp
27
+ *.swo
28
+ *~
29
+
30
+ # OS files
31
+ .DS_Store
32
+ .DS_Store?
33
+ ._*
34
+ .Spotlight-V100
35
+ .Trashes
36
+ Thumbs.db
37
+
38
+ # Project specific
39
+ test.py
40
+ *.pkl
41
+ *.pt
42
+ *.pth
43
+ *.csv
44
+ */hp_opt_runs/*
45
+ docs/build/*
46
+ examples/**runs/*
47
+
48
+ # Quest/Slurm files
49
+ *.e*
50
+ *.o*
51
+
52
+ # MLflow
53
+ mlruns/
54
+ mlartifacts/
55
+
56
+ # Jupyter
57
+ .ipynb_checkpoints/
58
+ *.ipynb_checkpoints/
fcvopt-0.4.0/CLAUDE.md ADDED
@@ -0,0 +1,186 @@
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## Project Overview
6
+
7
+ FCVOpt is a Python package (v0.4.0) for **Fractional Cross-Validation** in hyperparameter optimization. It implements efficient hyperparameter tuning by evaluating only a fraction of cross-validation folds using hierarchical Gaussian processes (HGP).
8
+
9
+ **Key Innovation**: Instead of evaluating all K folds for every hyperparameter configuration, FCVOpt uses a hierarchical GP model to exploit fold-wise correlations, enabling optimization with only 1-2 folds evaluated per configuration—drastically reducing computation.
10
+
11
+ ## Installation & Setup
12
+
13
+ ### Virtual Environment Setup
14
+ ```bash
15
+ chmod +x venv_setup.sh
16
+ ./venv_setup.sh
17
+ source fcvopt_test/bin/activate
18
+ ```
19
+
20
+ ### Development Installation
21
+ ```bash
22
+ pip install -e . # Core package
23
+ pip install -e .[experiments] # + experiment dependencies
24
+ pip install -e .[docs] # + documentation dependencies
25
+ ```
26
+
27
+ ### Docker Setup
28
+ ```bash
29
+ docker build -t fcvopt_test .
30
+ docker run -v $(pwd)/experiments:/app/experiments -it fcvopt_test
31
+ ```
32
+
33
+ ## Running Tests
34
+
35
+ Tests use Python's `unittest` framework:
36
+
37
+ ```bash
38
+ python -m unittest discover tests/ # All tests
39
+ python -m unittest tests/test_fcvopt.py # FCVOpt tests
40
+ python -m unittest tests/test_bayes_opt.py # BayesOpt tests
41
+ python -m unittest tests.test_fcvopt.TestFCVOpt.test_initialization # Single test
42
+ ```
43
+
44
+ ## Building Documentation
45
+
46
+ ```bash
47
+ cd docs/
48
+ make html # Build HTML documentation
49
+ make clean # Clean build artifacts
50
+ ```
51
+
52
+ Documentation: https://syerramilli.github.io/fcvopt/
53
+
54
+ ## Architecture Overview
55
+
56
+ ### Core Modules
57
+
58
+ 1. **`fcvopt/crossvalidation/`** - Cross-validation objective functions
59
+ - `cvobjective.py`: Base `CVObjective` abstract class
60
+ - `sklearn_cvobj.py`: `SklearnCVObj` wrapper for scikit-learn estimators, `XGBoostCVObjEarlyStopping` for XGBoost with early stopping
61
+ - `mlp_cvobj.py`: `MLPCVObj` for neural networks via Skorch
62
+ - `resnet_cvobj.py`: `ResNetCVObj` for TabResNet on tabular data
63
+ - `optuna_obj.py`: Integration with Optuna optimizer
64
+
65
+ 2. **`fcvopt/optimizers/`** - Bayesian optimization algorithms
66
+ - `bayes_opt.py`: `BayesOpt` class - standard BO with full K-fold CV
67
+ - `fcvopt.py`: `FCVOpt` class - fractional CV optimizer (main algorithm)
68
+ - `active_learning.py`: `ActiveLearning` class using posterior variance reduction
69
+ - `cvrandopt.py`: `CVRandOpt` random search baseline
70
+ - `mtbo_cv.py`: `MTBO_CV` multi-task BO variant
71
+ - `optimize_acq.py`: Acquisition function optimization utilities
72
+
73
+ 3. **`fcvopt/models/`** - Gaussian Process models
74
+ - `gpregression.py`: `GPR` - base GP regression model
75
+ - `hmgp.py`: `HGP` - Hierarchical GP for modeling fold-wise correlations
76
+ - `multitaskgp.py`: Multi-task GP for multiple outputs
77
+ - `warp.py`: Input warping transformations
78
+ - `priors.py`: Custom GP priors (e.g., `BetaPrior`)
79
+
80
+ 4. **`fcvopt/kernels/`** - Custom GP kernels
81
+ - `hamming_kernel.py`: `HammingKernel` for categorical fold indices
82
+ - `constant_kernel.py`: Constant kernel
83
+ - `matern.py`: Matérn kernel implementations
84
+ - `multitaskkernel.py`: Multi-task covariance functions
85
+
86
+ 5. **`fcvopt/configspace.py`** - Extended `ConfigurationSpace` wrapper
87
+ - `latinhypercube_sample()` for initial design sampling
88
+ - Conversion between Configuration objects and numeric arrays
89
+
90
+ 6. **`fcvopt/fit/mll_scipy.py`** - GP hyperparameter optimization via scipy L-BFGS-B
91
+
92
+ 7. **`fcvopt/util/samplers.py`** - Sampling utilities (Latin Hypercube, stratified)
93
+
94
+ ### Key Design Patterns
95
+
96
+ **CVObjective Interface**: All CV objectives inherit from `CVObjective` and must implement:
97
+ - `construct_model(params)`: Return unfitted model instance
98
+ - `fit_and_test(params, train_index, test_index)`: Train and evaluate on one fold
99
+
100
+ **Optimizer Hierarchy**: `FCVOpt` extends `BayesOpt`, overriding:
101
+ - `_initialize()`: Assigns folds to initial random configurations
102
+ - `_acquisition()`: Adds fold selection after candidate selection
103
+ - `_construct_model()`: Uses `HGP` instead of standard GP
104
+ - `_select_fold_indices()`: Chooses folds via variance reduction or random sampling
105
+
106
+ **MLflow Integration**: Both `BayesOpt` and `FCVOpt` automatically track:
107
+ - Hyperparameter configurations and losses
108
+ - Model checkpoints (GP state)
109
+ - Iteration snapshots with acquisition values
110
+
111
+ ### Data Flow in FCVOpt
112
+
113
+ 1. **Initialization**: Sample `n_init` random configs, assign folds (random/stratified), evaluate
114
+ 2. **Model Fitting**: Fit hierarchical GP on `(x, fold_idx) → y` observations
115
+ 3. **Acquisition**: Optimize acquisition function to select next configuration
116
+ 4. **Fold Selection**: Choose fold via variance reduction or random
117
+ 5. **Evaluation**: Evaluate objective on selected (config, fold) pair
118
+ 6. **Repeat**: Update GP, select next candidate
119
+
120
+ ## Example Notebooks
121
+
122
+ Located in `examples/`:
123
+ - `01_Introduction_to_FCVOpt.ipynb` - Basic usage and concepts
124
+ - `02_Tuning_Lightgbm_Sklearn_API.ipynb` - LightGBM hyperparameter tuning
125
+ - `03_Extending_CVobjective.ipynb` - Creating custom CV objectives
126
+ - `04_Tuning_TabularResNet.ipynb` - TabResNet neural network tuning
127
+
128
+ ## Experiments
129
+
130
+ Experiments for reproducing paper results are in `experiments/`:
131
+ - `rf_high_dim/`: Random forest on high-dimensional data
132
+ - `xgb_class/`: XGBoost classification
133
+ - `boosted_reg/`: Gradient boosted regression
134
+ - `mlp/`: Multi-layer perceptron via PyTorch
135
+ - `tab_resnet/`: Tabular ResNet
136
+
137
+ Utility scripts:
138
+ - `generate_figures.py`: Create paper figures
139
+ - `reproduce_rf.sh`: Bash script to run RF experiments
140
+
141
+ ## Dependencies
142
+
143
+ **Core**: PyTorch (2.2.0), GPyTorch (1.9-1.10), BoTorch (>=0.8, <0.9), ConfigSpace (1.2.1), MLflow (>=3.0), scikit-learn, XGBoost, Skorch (0.13-0.15)
144
+
145
+ **Note on SMAC**: Experiments using SMAC require `pyrfr`, which needs a C++ compiler and `swig`. This is optional.
146
+
147
+ ## Common Gotchas
148
+
149
+ - **Categorical hyperparameters**: `latinhypercube_sample()` only supports binary-valued categoricals. Use `ConfigSpace.sample_configuration()` for general categoricals.
150
+
151
+ - **MLflow tracking**: Optimizers auto-initialize MLflow. To use custom tracking URI, set before creating optimizer or pass `tracking_uri` parameter.
152
+
153
+ - **Fold indices**: In `FCVOpt`, fold indices include repeats: with `n_folds=5` and `n_repeats=2`, valid fold indices are 0-9.
154
+
155
+ - **EI acquisition**: Not supported for `FCVOpt` - use `'LCB'` or `'KG'` instead.
156
+
157
+ - **LightGBM compatibility**: A fix has been applied in `fcvopt/fit/mll_scipy.py` to handle LightGBM's C++ objects. When `n_jobs=1` (default), only a single restart is used. When `n_jobs>1`, `ThreadPoolExecutor` is used instead of `joblib.Parallel` to avoid pickling issues. If you experience segfaults, clear `__pycache__` and reinstall: `pip install -e .`
158
+
159
+ ## Quick Usage Example
160
+
161
+ ```python
162
+ from fcvopt.optimizers import FCVOpt
163
+ from fcvopt.crossvalidation import SklearnCVObj
164
+ from fcvopt.configspace import ConfigurationSpace
165
+ from ConfigSpace import Float
166
+ from sklearn.ensemble import RandomForestClassifier
167
+ from sklearn.metrics import log_loss
168
+
169
+ # Define hyperparameter space
170
+ config = ConfigurationSpace(seed=42)
171
+ config.add(Float('max_features', bounds=(0.1, 1.0)))
172
+ config.add(Float('min_samples_leaf', bounds=(0.01, 0.1)))
173
+
174
+ # Create CV objective
175
+ cv_obj = SklearnCVObj(
176
+ estimator=RandomForestClassifier(n_estimators=100),
177
+ X=X_train, y=y_train,
178
+ task='classification',
179
+ loss_metric=log_loss,
180
+ n_splits=5
181
+ )
182
+
183
+ # Run fractional CV optimization
184
+ optimizer = FCVOpt(obj=cv_obj, config=config, n_folds=5)
185
+ best_config = optimizer.run(n_iter=20, n_init=5)
186
+ ```
@@ -0,0 +1,24 @@
1
+ FROM python:3.10
2
+
3
+ # Set the working directory in the container
4
+ WORKDIR /app
5
+
6
+ # Copy the current directory contents into the container at /app
7
+ # Note: The .dockerignore file is used to exclude certain files and directories
8
+ # from being copied. This includes the experiments directory which is not needed
9
+ # for installing the library. It is recommnded to mount the experiments directory
10
+ # as a volume when running the container.
11
+ COPY . /app
12
+
13
+ # Upgrade pip
14
+ RUN pip install --upgrade pip
15
+
16
+ # Install the CPU only version of PyTorch (the index-url must be specified for Linux distributions)
17
+ RUN pip install torch==2.2.0 --index-url https://download.pytorch.org/whl/cpu
18
+
19
+ # Install the fcvopt library along with required dependencies
20
+ # and the extra dependencies for the experiments
21
+ RUN pip install .[experiments]
22
+
23
+ # Set the default command to run when the container starts
24
+ ENTRYPOINT ["/bin/bash"]
fcvopt-0.4.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Suraj Yerramilli
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
fcvopt-0.4.0/PKG-INFO ADDED
@@ -0,0 +1,150 @@
1
+ Metadata-Version: 2.4
2
+ Name: fcvopt
3
+ Version: 0.4.0
4
+ Summary: Fractional K-fold cross-validation for hyperparameter optimization
5
+ Project-URL: Homepage, https://github.com/syerramilli/fcvopt
6
+ Project-URL: Documentation, https://syerramilli.github.io/fcvopt/
7
+ Project-URL: Repository, https://github.com/syerramilli/fcvopt.git
8
+ Project-URL: Issues, https://github.com/syerramilli/fcvopt/issues
9
+ Author: Daniel W. Apley
10
+ Author-email: Suraj Yerramilli <surajyerramilli@gmail.com>
11
+ Maintainer-email: Suraj Yerramilli <surajyerramilli@gmail.com>
12
+ License-Expression: MIT
13
+ License-File: LICENSE
14
+ Keywords: automl,bayesian-optimization,cross-validation,gaussian-processes,hyperparameter-optimization,machine-learning
15
+ Classifier: Development Status :: 4 - Beta
16
+ Classifier: Intended Audience :: Developers
17
+ Classifier: Intended Audience :: Science/Research
18
+ Classifier: Operating System :: OS Independent
19
+ Classifier: Programming Language :: Python :: 3
20
+ Classifier: Programming Language :: Python :: 3.10
21
+ Classifier: Programming Language :: Python :: 3.11
22
+ Classifier: Programming Language :: Python :: 3.12
23
+ Classifier: Programming Language :: Python :: 3.13
24
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
25
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
26
+ Requires-Python: >=3.10
27
+ Requires-Dist: botorch>=0.12
28
+ Requires-Dist: configspace<2.0,>=1.0
29
+ Requires-Dist: gpytorch>=1.14
30
+ Requires-Dist: joblib>=1.3
31
+ Requires-Dist: mlflow>=2.10
32
+ Requires-Dist: numpy>=2.0
33
+ Requires-Dist: pandas>=2.2.2
34
+ Requires-Dist: scikit-learn>=1.4.2
35
+ Requires-Dist: scipy>=1.12
36
+ Requires-Dist: skorch>=0.15
37
+ Requires-Dist: torch>=2.3.1
38
+ Requires-Dist: xgboost<3,>=2.0.0
39
+ Provides-Extra: dev
40
+ Requires-Dist: black; extra == 'dev'
41
+ Requires-Dist: flake8; extra == 'dev'
42
+ Requires-Dist: isort; extra == 'dev'
43
+ Requires-Dist: pytest-cov; extra == 'dev'
44
+ Requires-Dist: pytest>=7.0; extra == 'dev'
45
+ Provides-Extra: docs
46
+ Requires-Dist: ipykernel; extra == 'docs'
47
+ Requires-Dist: nbsphinx; extra == 'docs'
48
+ Requires-Dist: sphinx-autodoc-typehints; extra == 'docs'
49
+ Requires-Dist: sphinx-rtd-theme; extra == 'docs'
50
+ Requires-Dist: sphinx>=4.0; extra == 'docs'
51
+ Requires-Dist: sphinxcontrib-napoleon; extra == 'docs'
52
+ Provides-Extra: experiments
53
+ Requires-Dist: lightgbm>=4.0; extra == 'experiments'
54
+ Requires-Dist: matplotlib>=3.7.0; extra == 'experiments'
55
+ Requires-Dist: optuna<4.0.0,>=3.6.0; extra == 'experiments'
56
+ Requires-Dist: seaborn>=0.12.2; extra == 'experiments'
57
+ Requires-Dist: smac<3.0,>=2.0; extra == 'experiments'
58
+ Description-Content-Type: text/markdown
59
+
60
+ # fcvopt: Fractional cross-validation for hyperparameter optimization
61
+
62
+ FCVOpt is a Python package for "Fractional Cross-Validation"in hyperparameter optimization. It implements efficient hyperparameter tuning by evaluating only a fraction of cross-validation folds using hierarchical Gaussian processes.
63
+
64
+ The documentation is available at [https://syerramilli.github.io/fcvopt/](https://syerramilli.github.io/fcvopt/).
65
+
66
+ 🚀 **Key Features**:
67
+
68
+ * **Efficient Optimization**: Evaluate hyperparameters using only a subset of CV folds
69
+ * **Hierarchical Gaussian Processes**: Model fold-wise correlations for better predictions
70
+ * **MLflow Integration**: Automatic experiment tracking and model versioning
71
+ * **Framework Support**: Scikit-learn, XGBoost, PyTorch (via Skorch), and more
72
+
73
+ ## Installation
74
+
75
+ ### From Source
76
+
77
+ ```{bash}
78
+ git clone https://github.com/syerramilli/fcvopt.git
79
+ cd fcvopt
80
+ pip install .
81
+ ```
82
+
83
+ **With optional dependencies**:
84
+
85
+ ```{bash}
86
+ pip install .[experiments] # For reproducing the results from the paper
87
+ pip install .[docs] # For building documentation
88
+ ```
89
+
90
+ ## Citing
91
+ If you use this code in your research, please cite the following paper:
92
+
93
+ ```
94
+ @article{yerramilli2025fractional,
95
+ author = {Suraj Yerramilli and Daniel W. Apley},
96
+ title = {Fractional Cross-Validation for Optimizing Hyperparameters of Supervised Learning Algorithms},
97
+ journal = {Technometrics},
98
+ year = {2025},
99
+ doi = {10.1080/00401706.2025.2515926},
100
+ }
101
+ ```
102
+
103
+ ## Reproducting the experiment results from the paper
104
+
105
+ The experiments are all contained in the `experiments` folder. Each subdirectory within this folder contains scripts files to run each case study in the paper. Refer to the README file within each of the subdirectories for instructions to run the files.
106
+
107
+ For reproducibility, we provide two options for setting up the environment to run the experiments: a virtual environment using `venv` and a Docker container.
108
+
109
+ ### Setting up a virtual environment
110
+
111
+ The bash script file `venv_setup.sh` can be used to create a virtual environment and install the required packages. Ensure you have Python >= 3.8 and <=3.12 installed.
112
+
113
+ To run the script, use the following commands:
114
+
115
+ ```{bash}
116
+ chmod +x venv_setup.sh
117
+ ./venv_setup.sh
118
+ ```
119
+
120
+ **Note:**
121
+ The experiments involving the SMAC algorithm require the `smac` library, which in turn requires the building and compliling the `pyrfr` package While the main functions of `fcvopt` do not depend on `pyrfr`, you might encounter build issues during its installation if you do not have a C++ compiler and the `swig` binary installed on your system.
122
+
123
+ ### Setting up a Docker container
124
+
125
+ The Dockerfile is provided to run the experiments in a container with the `fcvopt` package and all the required dependencies. The Dockerfile is based on the Python 3.10 debian image. To build the image, run the following command:
126
+
127
+ ```{bash}
128
+ docker build -t fcvopt_test .
129
+ ```
130
+
131
+ To run the container with the files in the `experiments` folder mounted, run the following command:
132
+
133
+ ```{bash}
134
+ docker run -v <path_to_experiments_folder>:/app/experiments -it fcvopt_test
135
+ ```
136
+
137
+ This will launch the container and open a bash shell. The experiments directory will be mounted in the container at `/app/experiments`. Mounting the directory allows you to access the files in the experiments folder from within the container, and any changes made to the files will be reflected in your local directory. Replace <path_to_experiments_folder> with the **absolute path** to your local experiments directory. Relative paths will not work, as the container will not have access to your local file system. On Linux and MacOS, you can use the $(pwd) command to get the absolute path of the current directory. For example:
138
+
139
+ ```{bash}
140
+ docker run -v $(pwd)/experiments:/app/experiments -it fcvopt_test
141
+ ```
142
+
143
+ Once inside the container, you can navigate to the /app/experiments directory and run the experiments as needed. For example:
144
+
145
+ ```{bash}
146
+ cd experiments
147
+ bash reproduce_rf.sh
148
+ ```
149
+
150
+ **Note:** On Ubuntu/Debian, you may need administrative privileges to run the Docker commands. You can do this by adding `sudo` before the command.
fcvopt-0.4.0/README.md ADDED
@@ -0,0 +1,91 @@
1
+ # fcvopt: Fractional cross-validation for hyperparameter optimization
2
+
3
+ FCVOpt is a Python package for "Fractional Cross-Validation"in hyperparameter optimization. It implements efficient hyperparameter tuning by evaluating only a fraction of cross-validation folds using hierarchical Gaussian processes.
4
+
5
+ The documentation is available at [https://syerramilli.github.io/fcvopt/](https://syerramilli.github.io/fcvopt/).
6
+
7
+ 🚀 **Key Features**:
8
+
9
+ * **Efficient Optimization**: Evaluate hyperparameters using only a subset of CV folds
10
+ * **Hierarchical Gaussian Processes**: Model fold-wise correlations for better predictions
11
+ * **MLflow Integration**: Automatic experiment tracking and model versioning
12
+ * **Framework Support**: Scikit-learn, XGBoost, PyTorch (via Skorch), and more
13
+
14
+ ## Installation
15
+
16
+ ### From Source
17
+
18
+ ```{bash}
19
+ git clone https://github.com/syerramilli/fcvopt.git
20
+ cd fcvopt
21
+ pip install .
22
+ ```
23
+
24
+ **With optional dependencies**:
25
+
26
+ ```{bash}
27
+ pip install .[experiments] # For reproducing the results from the paper
28
+ pip install .[docs] # For building documentation
29
+ ```
30
+
31
+ ## Citing
32
+ If you use this code in your research, please cite the following paper:
33
+
34
+ ```
35
+ @article{yerramilli2025fractional,
36
+ author = {Suraj Yerramilli and Daniel W. Apley},
37
+ title = {Fractional Cross-Validation for Optimizing Hyperparameters of Supervised Learning Algorithms},
38
+ journal = {Technometrics},
39
+ year = {2025},
40
+ doi = {10.1080/00401706.2025.2515926},
41
+ }
42
+ ```
43
+
44
+ ## Reproducting the experiment results from the paper
45
+
46
+ The experiments are all contained in the `experiments` folder. Each subdirectory within this folder contains scripts files to run each case study in the paper. Refer to the README file within each of the subdirectories for instructions to run the files.
47
+
48
+ For reproducibility, we provide two options for setting up the environment to run the experiments: a virtual environment using `venv` and a Docker container.
49
+
50
+ ### Setting up a virtual environment
51
+
52
+ The bash script file `venv_setup.sh` can be used to create a virtual environment and install the required packages. Ensure you have Python >= 3.8 and <=3.12 installed.
53
+
54
+ To run the script, use the following commands:
55
+
56
+ ```{bash}
57
+ chmod +x venv_setup.sh
58
+ ./venv_setup.sh
59
+ ```
60
+
61
+ **Note:**
62
+ The experiments involving the SMAC algorithm require the `smac` library, which in turn requires the building and compliling the `pyrfr` package While the main functions of `fcvopt` do not depend on `pyrfr`, you might encounter build issues during its installation if you do not have a C++ compiler and the `swig` binary installed on your system.
63
+
64
+ ### Setting up a Docker container
65
+
66
+ The Dockerfile is provided to run the experiments in a container with the `fcvopt` package and all the required dependencies. The Dockerfile is based on the Python 3.10 debian image. To build the image, run the following command:
67
+
68
+ ```{bash}
69
+ docker build -t fcvopt_test .
70
+ ```
71
+
72
+ To run the container with the files in the `experiments` folder mounted, run the following command:
73
+
74
+ ```{bash}
75
+ docker run -v <path_to_experiments_folder>:/app/experiments -it fcvopt_test
76
+ ```
77
+
78
+ This will launch the container and open a bash shell. The experiments directory will be mounted in the container at `/app/experiments`. Mounting the directory allows you to access the files in the experiments folder from within the container, and any changes made to the files will be reflected in your local directory. Replace <path_to_experiments_folder> with the **absolute path** to your local experiments directory. Relative paths will not work, as the container will not have access to your local file system. On Linux and MacOS, you can use the $(pwd) command to get the absolute path of the current directory. For example:
79
+
80
+ ```{bash}
81
+ docker run -v $(pwd)/experiments:/app/experiments -it fcvopt_test
82
+ ```
83
+
84
+ Once inside the container, you can navigate to the /app/experiments directory and run the experiments as needed. For example:
85
+
86
+ ```{bash}
87
+ cd experiments
88
+ bash reproduce_rf.sh
89
+ ```
90
+
91
+ **Note:** On Ubuntu/Debian, you may need administrative privileges to run the Docker commands. You can do this by adding `sudo` before the command.
@@ -0,0 +1,20 @@
1
+ # Minimal makefile for Sphinx documentation
2
+ #
3
+
4
+ # You can set these variables from the command line, and also
5
+ # from the environment for the first two.
6
+ SPHINXOPTS ?=
7
+ SPHINXBUILD ?= sphinx-build
8
+ SOURCEDIR = source
9
+ BUILDDIR = build
10
+
11
+ # Put it first so that "make" without argument is like "make help".
12
+ help:
13
+ @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14
+
15
+ .PHONY: help Makefile
16
+
17
+ # Catch-all target: route all unknown targets to Sphinx using the new
18
+ # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19
+ %: Makefile
20
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)