OptiRoulette 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optiroulette-0.1.0/LICENSE +21 -0
- optiroulette-0.1.0/MANIFEST.in +4 -0
- optiroulette-0.1.0/PKG-INFO +201 -0
- optiroulette-0.1.0/README.md +168 -0
- optiroulette-0.1.0/THIRD_PARTY_LICENSES.md +15 -0
- optiroulette-0.1.0/pyproject.toml +55 -0
- optiroulette-0.1.0/setup.cfg +4 -0
- optiroulette-0.1.0/src/OptiRoulette/__init__.py +4 -0
- optiroulette-0.1.0/src/OptiRoulette.egg-info/PKG-INFO +201 -0
- optiroulette-0.1.0/src/OptiRoulette.egg-info/SOURCES.txt +21 -0
- optiroulette-0.1.0/src/OptiRoulette.egg-info/dependency_links.txt +1 -0
- optiroulette-0.1.0/src/OptiRoulette.egg-info/requires.txt +9 -0
- optiroulette-0.1.0/src/OptiRoulette.egg-info/top_level.txt +2 -0
- optiroulette-0.1.0/src/optiroulette/__init__.py +24 -0
- optiroulette-0.1.0/src/optiroulette/compatibility.py +161 -0
- optiroulette-0.1.0/src/optiroulette/defaults.py +101 -0
- optiroulette-0.1.0/src/optiroulette/opti_roulette.py +612 -0
- optiroulette-0.1.0/src/optiroulette/optimizer_factory.py +150 -0
- optiroulette-0.1.0/src/optiroulette/optimizer_pool.py +380 -0
- optiroulette-0.1.0/src/optiroulette/resources/__init__.py +2 -0
- optiroulette-0.1.0/src/optiroulette/resources/optimized.yaml +685 -0
- optiroulette-0.1.0/tests/test_defaults.py +33 -0
- optiroulette-0.1.0/tests/test_smoke.py +39 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Stamatis Mastromichalakis
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: OptiRoulette
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: PyTorch optimizer with random switching, LR scaling and pool swaps
|
|
5
|
+
Author: Stamatis Mastromichalakis
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/MStamatis/OptiRoulette
|
|
8
|
+
Project-URL: Repository, https://github.com/MStamatis/OptiRoulette
|
|
9
|
+
Keywords: pytorch,optimizer,meta optimizer,deep-learning,training,optimization
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Requires-Python: >=3.8
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: torch>=2.1.0
|
|
25
|
+
Requires-Dist: numpy>=1.23.0
|
|
26
|
+
Requires-Dist: PyYAML>=6.0
|
|
27
|
+
Requires-Dist: pytorch-optimizer>=3.7.0
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: build>=1.2.1; extra == "dev"
|
|
30
|
+
Requires-Dist: twine>=5.1.1; extra == "dev"
|
|
31
|
+
Requires-Dist: pytest>=8.2.0; extra == "dev"
|
|
32
|
+
Dynamic: license-file
|
|
33
|
+
|
|
34
|
+
# OptiRoulette Optimizer
|
|
35
|
+
|
|
36
|
+
This repository accompanies the paper "OptiRoulette Optimizer: A New Stochastic
|
|
37
|
+
Meta-Optimizer for up to 5.3x Faster Convergence".
|
|
38
|
+
|
|
39
|
+
A standalone, pip-installable PyTorch meta-optimizer that brings OptiRoulette's training logic to any project:
|
|
40
|
+
- random optimizer switching
|
|
41
|
+
- warmup -> roulette phase handling
|
|
42
|
+
- optimizer pool with active/backup swapping
|
|
43
|
+
- compatibility-aware replacement
|
|
44
|
+
- learning-rate scaling rules when switching
|
|
45
|
+
- momentum/state transfer on swap
|
|
46
|
+
|
|
47
|
+
The default behavior is loaded from the bundled `optimized.yaml` profile (same optimizer pool logic used in this project).
|
|
48
|
+
|
|
49
|
+
## Research Highlights
|
|
50
|
+
|
|
51
|
+
Based on the current paper draft, OptiRoulette is a stochastic meta-optimizer
|
|
52
|
+
that combines:
|
|
53
|
+
- warmup optimizer locking
|
|
54
|
+
- randomized sampling from an active optimizer pool
|
|
55
|
+
- compatibility-aware LR scaling during optimizer transitions
|
|
56
|
+
- failure-aware pool replacement
|
|
57
|
+
|
|
58
|
+
Reported mean test accuracy vs a single-optimizer AdamW baseline:
|
|
59
|
+
|
|
60
|
+
| Dataset | AdamW | OptiRoulette | Delta |
|
|
61
|
+
|---|---:|---:|---:|
|
|
62
|
+
| CIFAR-100 | 0.6734 | 0.7656 | +9.22 pp |
|
|
63
|
+
| CIFAR-100-C | 0.2904 | 0.3355 | +4.52 pp |
|
|
64
|
+
| SVHN | 0.9667 | 0.9756 | +0.89 pp |
|
|
65
|
+
| Tiny ImageNet | 0.5669 | 0.6642 | +9.73 pp |
|
|
66
|
+
| Caltech-256 | 0.5946 | 0.6920 | +9.74 pp |
|
|
67
|
+
|
|
68
|
+
Additional paper-reported highlights:
|
|
69
|
+
- Target-hit reliability: in the reported 10-seed suites, OptiRoulette reaches
|
|
70
|
+
key validation targets in 10/10 runs, while the AdamW baseline reaches none
|
|
71
|
+
of those targets within budget.
|
|
72
|
+
- Faster time-to-target on shared milestones (example: Caltech-256 @ 0.59,
|
|
73
|
+
25.7 vs 77.0 epochs), with budget-capped lower-bound speedups up to 5.3x for
|
|
74
|
+
non-attained baseline targets.
|
|
75
|
+
- Paired-seed analysis is positive across datasets, except CIFAR-100-C test
|
|
76
|
+
ROC-AUC, which is not statistically significant in the current 10-seed study.
|
|
77
|
+
|
|
78
|
+
## Install
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
pip install OptiRoulette
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## Examples
|
|
85
|
+
|
|
86
|
+
- [CIFAR-100 demo notebook](examples/quick_cifar100_optiroulette.ipynb)
|
|
87
|
+
- [Tiny-ImageNet demo notebook](examples/quick_tiny_imagenet_optiroulette.ipynb)
|
|
88
|
+
|
|
89
|
+
## Quick Use
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
import torch
|
|
93
|
+
from optiroulette import OptiRoulette
|
|
94
|
+
|
|
95
|
+
model = torch.nn.Linear(128, 10)
|
|
96
|
+
optimizer = OptiRoulette(model.parameters())
|
|
97
|
+
|
|
98
|
+
for epoch in range(5):
|
|
99
|
+
optimizer.on_epoch_start(epoch)
|
|
100
|
+
|
|
101
|
+
for batch_idx in range(100):
|
|
102
|
+
optimizer.on_batch_start(batch_idx)
|
|
103
|
+
optimizer.zero_grad()
|
|
104
|
+
x = torch.randn(32, 128)
|
|
105
|
+
y = torch.randint(0, 10, (32,))
|
|
106
|
+
loss = torch.nn.functional.cross_entropy(model(x), y)
|
|
107
|
+
loss.backward()
|
|
108
|
+
optimizer.step()
|
|
109
|
+
|
|
110
|
+
# pass validation accuracy for warmup plateau logic (optional)
|
|
111
|
+
optimizer.on_epoch_end(val_acc=0.6)
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
## API
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
from optiroulette import (
|
|
118
|
+
OptiRoulette,
|
|
119
|
+
OptiRouletteOptimizer,
|
|
120
|
+
PoolConfig,
|
|
121
|
+
get_default_config,
|
|
122
|
+
get_default_seed,
|
|
123
|
+
get_default_optimizer_specs,
|
|
124
|
+
get_default_pool_setup,
|
|
125
|
+
get_default_roulette_config,
|
|
126
|
+
)
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
## Configuration Reference
|
|
130
|
+
|
|
131
|
+
For a full settings guide (constructor arguments, `optimizer_specs`,
|
|
132
|
+
`pool_config`, warmup/roulette options, and defaults precedence), see:
|
|
133
|
+
- `docs/configuration.md`
|
|
134
|
+
|
|
135
|
+
For package maintainers (release/publish steps), see:
|
|
136
|
+
- `docs/release.md`
|
|
137
|
+
|
|
138
|
+
### Defaults behavior
|
|
139
|
+
|
|
140
|
+
`OptiRoulette(model.parameters())` uses:
|
|
141
|
+
- default optimizer specs from bundled `optimized.yaml`
|
|
142
|
+
- default roulette settings from bundled `optimized.yaml`
|
|
143
|
+
- default pool config + active/backup names from bundled `optimized.yaml`
|
|
144
|
+
- default LR scaling rules from bundled `optimized.yaml`
|
|
145
|
+
- default optimizer RNG seed from bundled `optimized.yaml` (`system.seed`, fallback `42`)
|
|
146
|
+
|
|
147
|
+
If you provide manual optimizer/pool settings, those are used instead of defaults:
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
optimizer = OptiRoulette(
|
|
151
|
+
model.parameters(),
|
|
152
|
+
optimizer_specs={"adam": {"lr": 1e-3}},
|
|
153
|
+
)
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
Manual custom pool example (only your chosen optimizers are used):
|
|
157
|
+
|
|
158
|
+
```python
|
|
159
|
+
optimizer = OptiRoulette(
|
|
160
|
+
model.parameters(),
|
|
161
|
+
optimizer_specs={
|
|
162
|
+
"adam": {"lr": 1e-3},
|
|
163
|
+
"adamw": {"lr": 8e-4, "weight_decay": 0.01},
|
|
164
|
+
"lion": {"lr": 1e-4, "betas": (0.9, 0.99)},
|
|
165
|
+
},
|
|
166
|
+
active_names=["adam", "adamw"],
|
|
167
|
+
backup_names=["lion"],
|
|
168
|
+
)
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
Optional: override pool behavior too:
|
|
172
|
+
|
|
173
|
+
```python
|
|
174
|
+
optimizer = OptiRoulette(
|
|
175
|
+
model.parameters(),
|
|
176
|
+
optimizer_specs={
|
|
177
|
+
"adam": {"lr": 1e-3},
|
|
178
|
+
"adamw": {"lr": 8e-4, "weight_decay": 0.01},
|
|
179
|
+
"lion": {"lr": 1e-4, "betas": (0.9, 0.99)},
|
|
180
|
+
},
|
|
181
|
+
pool_config={
|
|
182
|
+
"num_active": 2,
|
|
183
|
+
"num_backup": 1,
|
|
184
|
+
"failure_threshold": -0.2,
|
|
185
|
+
"consecutive_failure_limit": 3,
|
|
186
|
+
},
|
|
187
|
+
active_names=["adam", "adamw"],
|
|
188
|
+
backup_names=["lion"],
|
|
189
|
+
)
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
## Third-Party Dependencies
|
|
193
|
+
|
|
194
|
+
This package depends on `pytorch-optimizer` for additional optimizer implementations.
|
|
195
|
+
See `THIRD_PARTY_LICENSES.md` for a short third-party license notice.
|
|
196
|
+
|
|
197
|
+
## Disclaimer
|
|
198
|
+
|
|
199
|
+
The OptiRoulette name refers exclusively to a machine-learning optimizer and has no
|
|
200
|
+
affiliation, sponsorship, or technical relation to roulette manufacturers, casinos,
|
|
201
|
+
or any physical/software gambling products or services.
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
# OptiRoulette Optimizer
|
|
2
|
+
|
|
3
|
+
This repository accompanies the paper "OptiRoulette Optimizer: A New Stochastic
|
|
4
|
+
Meta-Optimizer for up to 5.3x Faster Convergence".
|
|
5
|
+
|
|
6
|
+
A standalone, pip-installable PyTorch meta-optimizer that brings OptiRoulette's training logic to any project:
|
|
7
|
+
- random optimizer switching
|
|
8
|
+
- warmup -> roulette phase handling
|
|
9
|
+
- optimizer pool with active/backup swapping
|
|
10
|
+
- compatibility-aware replacement
|
|
11
|
+
- learning-rate scaling rules when switching
|
|
12
|
+
- momentum/state transfer on swap
|
|
13
|
+
|
|
14
|
+
The default behavior is loaded from the bundled `optimized.yaml` profile (same optimizer pool logic used in this project).
|
|
15
|
+
|
|
16
|
+
## Research Highlights
|
|
17
|
+
|
|
18
|
+
Based on the current paper draft, OptiRoulette is a stochastic meta-optimizer
|
|
19
|
+
that combines:
|
|
20
|
+
- warmup optimizer locking
|
|
21
|
+
- randomized sampling from an active optimizer pool
|
|
22
|
+
- compatibility-aware LR scaling during optimizer transitions
|
|
23
|
+
- failure-aware pool replacement
|
|
24
|
+
|
|
25
|
+
Reported mean test accuracy vs a single-optimizer AdamW baseline:
|
|
26
|
+
|
|
27
|
+
| Dataset | AdamW | OptiRoulette | Delta |
|
|
28
|
+
|---|---:|---:|---:|
|
|
29
|
+
| CIFAR-100 | 0.6734 | 0.7656 | +9.22 pp |
|
|
30
|
+
| CIFAR-100-C | 0.2904 | 0.3355 | +4.52 pp |
|
|
31
|
+
| SVHN | 0.9667 | 0.9756 | +0.89 pp |
|
|
32
|
+
| Tiny ImageNet | 0.5669 | 0.6642 | +9.73 pp |
|
|
33
|
+
| Caltech-256 | 0.5946 | 0.6920 | +9.74 pp |
|
|
34
|
+
|
|
35
|
+
Additional paper-reported highlights:
|
|
36
|
+
- Target-hit reliability: in the reported 10-seed suites, OptiRoulette reaches
|
|
37
|
+
key validation targets in 10/10 runs, while the AdamW baseline reaches none
|
|
38
|
+
of those targets within budget.
|
|
39
|
+
- Faster time-to-target on shared milestones (example: Caltech-256 @ 0.59,
|
|
40
|
+
25.7 vs 77.0 epochs), with budget-capped lower-bound speedups up to 5.3x for
|
|
41
|
+
non-attained baseline targets.
|
|
42
|
+
- Paired-seed analysis is positive across datasets, except CIFAR-100-C test
|
|
43
|
+
ROC-AUC, which is not statistically significant in the current 10-seed study.
|
|
44
|
+
|
|
45
|
+
## Install
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
pip install OptiRoulette
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Examples
|
|
52
|
+
|
|
53
|
+
- [CIFAR-100 demo notebook](examples/quick_cifar100_optiroulette.ipynb)
|
|
54
|
+
- [Tiny-ImageNet demo notebook](examples/quick_tiny_imagenet_optiroulette.ipynb)
|
|
55
|
+
|
|
56
|
+
## Quick Use
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
import torch
|
|
60
|
+
from optiroulette import OptiRoulette
|
|
61
|
+
|
|
62
|
+
model = torch.nn.Linear(128, 10)
|
|
63
|
+
optimizer = OptiRoulette(model.parameters())
|
|
64
|
+
|
|
65
|
+
for epoch in range(5):
|
|
66
|
+
optimizer.on_epoch_start(epoch)
|
|
67
|
+
|
|
68
|
+
for batch_idx in range(100):
|
|
69
|
+
optimizer.on_batch_start(batch_idx)
|
|
70
|
+
optimizer.zero_grad()
|
|
71
|
+
x = torch.randn(32, 128)
|
|
72
|
+
y = torch.randint(0, 10, (32,))
|
|
73
|
+
loss = torch.nn.functional.cross_entropy(model(x), y)
|
|
74
|
+
loss.backward()
|
|
75
|
+
optimizer.step()
|
|
76
|
+
|
|
77
|
+
# pass validation accuracy for warmup plateau logic (optional)
|
|
78
|
+
optimizer.on_epoch_end(val_acc=0.6)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## API
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
from optiroulette import (
|
|
85
|
+
OptiRoulette,
|
|
86
|
+
OptiRouletteOptimizer,
|
|
87
|
+
PoolConfig,
|
|
88
|
+
get_default_config,
|
|
89
|
+
get_default_seed,
|
|
90
|
+
get_default_optimizer_specs,
|
|
91
|
+
get_default_pool_setup,
|
|
92
|
+
get_default_roulette_config,
|
|
93
|
+
)
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## Configuration Reference
|
|
97
|
+
|
|
98
|
+
For a full settings guide (constructor arguments, `optimizer_specs`,
|
|
99
|
+
`pool_config`, warmup/roulette options, and defaults precedence), see:
|
|
100
|
+
- `docs/configuration.md`
|
|
101
|
+
|
|
102
|
+
For package maintainers (release/publish steps), see:
|
|
103
|
+
- `docs/release.md`
|
|
104
|
+
|
|
105
|
+
### Defaults behavior
|
|
106
|
+
|
|
107
|
+
`OptiRoulette(model.parameters())` uses:
|
|
108
|
+
- default optimizer specs from bundled `optimized.yaml`
|
|
109
|
+
- default roulette settings from bundled `optimized.yaml`
|
|
110
|
+
- default pool config + active/backup names from bundled `optimized.yaml`
|
|
111
|
+
- default LR scaling rules from bundled `optimized.yaml`
|
|
112
|
+
- default optimizer RNG seed from bundled `optimized.yaml` (`system.seed`, fallback `42`)
|
|
113
|
+
|
|
114
|
+
If you provide manual optimizer/pool settings, those are used instead of defaults:
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
optimizer = OptiRoulette(
|
|
118
|
+
model.parameters(),
|
|
119
|
+
optimizer_specs={"adam": {"lr": 1e-3}},
|
|
120
|
+
)
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
Manual custom pool example (only your chosen optimizers are used):
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
optimizer = OptiRoulette(
|
|
127
|
+
model.parameters(),
|
|
128
|
+
optimizer_specs={
|
|
129
|
+
"adam": {"lr": 1e-3},
|
|
130
|
+
"adamw": {"lr": 8e-4, "weight_decay": 0.01},
|
|
131
|
+
"lion": {"lr": 1e-4, "betas": (0.9, 0.99)},
|
|
132
|
+
},
|
|
133
|
+
active_names=["adam", "adamw"],
|
|
134
|
+
backup_names=["lion"],
|
|
135
|
+
)
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
Optional: override pool behavior too:
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
optimizer = OptiRoulette(
|
|
142
|
+
model.parameters(),
|
|
143
|
+
optimizer_specs={
|
|
144
|
+
"adam": {"lr": 1e-3},
|
|
145
|
+
"adamw": {"lr": 8e-4, "weight_decay": 0.01},
|
|
146
|
+
"lion": {"lr": 1e-4, "betas": (0.9, 0.99)},
|
|
147
|
+
},
|
|
148
|
+
pool_config={
|
|
149
|
+
"num_active": 2,
|
|
150
|
+
"num_backup": 1,
|
|
151
|
+
"failure_threshold": -0.2,
|
|
152
|
+
"consecutive_failure_limit": 3,
|
|
153
|
+
},
|
|
154
|
+
active_names=["adam", "adamw"],
|
|
155
|
+
backup_names=["lion"],
|
|
156
|
+
)
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
## Third-Party Dependencies
|
|
160
|
+
|
|
161
|
+
This package depends on `pytorch-optimizer` for additional optimizer implementations.
|
|
162
|
+
See `THIRD_PARTY_LICENSES.md` for a short third-party license notice.
|
|
163
|
+
|
|
164
|
+
## Disclaimer
|
|
165
|
+
|
|
166
|
+
The OptiRoulette name refers exclusively to a machine-learning optimizer and has no
|
|
167
|
+
affiliation, sponsorship, or technical relation to roulette manufacturers, casinos,
|
|
168
|
+
or any physical/software gambling products or services.
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Third-Party Licenses
|
|
2
|
+
|
|
3
|
+
This project uses the following third-party dependency:
|
|
4
|
+
|
|
5
|
+
## pytorch-optimizer
|
|
6
|
+
|
|
7
|
+
- Package: `pytorch-optimizer`
|
|
8
|
+
- Homepage: https://github.com/kozistr/pytorch_optimizer
|
|
9
|
+
- PyPI: https://pypi.org/project/pytorch-optimizer/
|
|
10
|
+
- Declared license (project metadata): Apache-2.0
|
|
11
|
+
|
|
12
|
+
Usage note:
|
|
13
|
+
- `OptiRoulette` imports `pytorch-optimizer` as a runtime dependency to access
|
|
14
|
+
additional optimizer classes.
|
|
15
|
+
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "OptiRoulette"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "PyTorch optimizer with random switching, LR scaling and pool swaps"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.8"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "Stamatis Mastromichalakis" }
|
|
14
|
+
]
|
|
15
|
+
keywords = ["pytorch", "optimizer", "meta optimizer", "deep-learning", "training", "optimization"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 4 - Beta",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"Intended Audience :: Science/Research",
|
|
20
|
+
"Operating System :: OS Independent",
|
|
21
|
+
"Programming Language :: Python :: 3",
|
|
22
|
+
"Programming Language :: Python :: 3.8",
|
|
23
|
+
"Programming Language :: Python :: 3.9",
|
|
24
|
+
"Programming Language :: Python :: 3.10",
|
|
25
|
+
"Programming Language :: Python :: 3.11",
|
|
26
|
+
"Programming Language :: Python :: 3.12",
|
|
27
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
28
|
+
]
|
|
29
|
+
dependencies = [
|
|
30
|
+
"torch>=2.1.0",
|
|
31
|
+
"numpy>=1.23.0",
|
|
32
|
+
"PyYAML>=6.0",
|
|
33
|
+
"pytorch-optimizer>=3.7.0",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
[project.urls]
|
|
37
|
+
Homepage = "https://github.com/MStamatis/OptiRoulette"
|
|
38
|
+
Repository = "https://github.com/MStamatis/OptiRoulette"
|
|
39
|
+
|
|
40
|
+
[project.optional-dependencies]
|
|
41
|
+
dev = [
|
|
42
|
+
"build>=1.2.1",
|
|
43
|
+
"twine>=5.1.1",
|
|
44
|
+
"pytest>=8.2.0",
|
|
45
|
+
]
|
|
46
|
+
|
|
47
|
+
[tool.setuptools]
|
|
48
|
+
package-dir = {"" = "src"}
|
|
49
|
+
include-package-data = true
|
|
50
|
+
|
|
51
|
+
[tool.setuptools.packages.find]
|
|
52
|
+
where = ["src"]
|
|
53
|
+
|
|
54
|
+
[tool.setuptools.package-data]
|
|
55
|
+
"optiroulette.resources" = ["optimized.yaml"]
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: OptiRoulette
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: PyTorch optimizer with random switching, LR scaling and pool swaps
|
|
5
|
+
Author: Stamatis Mastromichalakis
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/MStamatis/OptiRoulette
|
|
8
|
+
Project-URL: Repository, https://github.com/MStamatis/OptiRoulette
|
|
9
|
+
Keywords: pytorch,optimizer,meta optimizer,deep-learning,training,optimization
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Requires-Python: >=3.8
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: torch>=2.1.0
|
|
25
|
+
Requires-Dist: numpy>=1.23.0
|
|
26
|
+
Requires-Dist: PyYAML>=6.0
|
|
27
|
+
Requires-Dist: pytorch-optimizer>=3.7.0
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: build>=1.2.1; extra == "dev"
|
|
30
|
+
Requires-Dist: twine>=5.1.1; extra == "dev"
|
|
31
|
+
Requires-Dist: pytest>=8.2.0; extra == "dev"
|
|
32
|
+
Dynamic: license-file
|
|
33
|
+
|
|
34
|
+
# OptiRoulette Optimizer
|
|
35
|
+
|
|
36
|
+
This repository accompanies the paper "OptiRoulette Optimizer: A New Stochastic
|
|
37
|
+
Meta-Optimizer for up to 5.3x Faster Convergence".
|
|
38
|
+
|
|
39
|
+
A standalone, pip-installable PyTorch meta-optimizer that brings OptiRoulette's training logic to any project:
|
|
40
|
+
- random optimizer switching
|
|
41
|
+
- warmup -> roulette phase handling
|
|
42
|
+
- optimizer pool with active/backup swapping
|
|
43
|
+
- compatibility-aware replacement
|
|
44
|
+
- learning-rate scaling rules when switching
|
|
45
|
+
- momentum/state transfer on swap
|
|
46
|
+
|
|
47
|
+
The default behavior is loaded from the bundled `optimized.yaml` profile (same optimizer pool logic used in this project).
|
|
48
|
+
|
|
49
|
+
## Research Highlights
|
|
50
|
+
|
|
51
|
+
Based on the current paper draft, OptiRoulette is a stochastic meta-optimizer
|
|
52
|
+
that combines:
|
|
53
|
+
- warmup optimizer locking
|
|
54
|
+
- randomized sampling from an active optimizer pool
|
|
55
|
+
- compatibility-aware LR scaling during optimizer transitions
|
|
56
|
+
- failure-aware pool replacement
|
|
57
|
+
|
|
58
|
+
Reported mean test accuracy vs a single-optimizer AdamW baseline:
|
|
59
|
+
|
|
60
|
+
| Dataset | AdamW | OptiRoulette | Delta |
|
|
61
|
+
|---|---:|---:|---:|
|
|
62
|
+
| CIFAR-100 | 0.6734 | 0.7656 | +9.22 pp |
|
|
63
|
+
| CIFAR-100-C | 0.2904 | 0.3355 | +4.52 pp |
|
|
64
|
+
| SVHN | 0.9667 | 0.9756 | +0.89 pp |
|
|
65
|
+
| Tiny ImageNet | 0.5669 | 0.6642 | +9.73 pp |
|
|
66
|
+
| Caltech-256 | 0.5946 | 0.6920 | +9.74 pp |
|
|
67
|
+
|
|
68
|
+
Additional paper-reported highlights:
|
|
69
|
+
- Target-hit reliability: in the reported 10-seed suites, OptiRoulette reaches
|
|
70
|
+
key validation targets in 10/10 runs, while the AdamW baseline reaches none
|
|
71
|
+
of those targets within budget.
|
|
72
|
+
- Faster time-to-target on shared milestones (example: Caltech-256 @ 0.59,
|
|
73
|
+
25.7 vs 77.0 epochs), with budget-capped lower-bound speedups up to 5.3x for
|
|
74
|
+
non-attained baseline targets.
|
|
75
|
+
- Paired-seed analysis is positive across datasets, except CIFAR-100-C test
|
|
76
|
+
ROC-AUC, which is not statistically significant in the current 10-seed study.
|
|
77
|
+
|
|
78
|
+
## Install
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
pip install OptiRoulette
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## Examples
|
|
85
|
+
|
|
86
|
+
- [CIFAR-100 demo notebook](examples/quick_cifar100_optiroulette.ipynb)
|
|
87
|
+
- [Tiny-ImageNet demo notebook](examples/quick_tiny_imagenet_optiroulette.ipynb)
|
|
88
|
+
|
|
89
|
+
## Quick Use
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
import torch
|
|
93
|
+
from optiroulette import OptiRoulette
|
|
94
|
+
|
|
95
|
+
model = torch.nn.Linear(128, 10)
|
|
96
|
+
optimizer = OptiRoulette(model.parameters())
|
|
97
|
+
|
|
98
|
+
for epoch in range(5):
|
|
99
|
+
optimizer.on_epoch_start(epoch)
|
|
100
|
+
|
|
101
|
+
for batch_idx in range(100):
|
|
102
|
+
optimizer.on_batch_start(batch_idx)
|
|
103
|
+
optimizer.zero_grad()
|
|
104
|
+
x = torch.randn(32, 128)
|
|
105
|
+
y = torch.randint(0, 10, (32,))
|
|
106
|
+
loss = torch.nn.functional.cross_entropy(model(x), y)
|
|
107
|
+
loss.backward()
|
|
108
|
+
optimizer.step()
|
|
109
|
+
|
|
110
|
+
# pass validation accuracy for warmup plateau logic (optional)
|
|
111
|
+
optimizer.on_epoch_end(val_acc=0.6)
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
## API
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
from optiroulette import (
|
|
118
|
+
OptiRoulette,
|
|
119
|
+
OptiRouletteOptimizer,
|
|
120
|
+
PoolConfig,
|
|
121
|
+
get_default_config,
|
|
122
|
+
get_default_seed,
|
|
123
|
+
get_default_optimizer_specs,
|
|
124
|
+
get_default_pool_setup,
|
|
125
|
+
get_default_roulette_config,
|
|
126
|
+
)
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
## Configuration Reference
|
|
130
|
+
|
|
131
|
+
For a full settings guide (constructor arguments, `optimizer_specs`,
|
|
132
|
+
`pool_config`, warmup/roulette options, and defaults precedence), see:
|
|
133
|
+
- `docs/configuration.md`
|
|
134
|
+
|
|
135
|
+
For package maintainers (release/publish steps), see:
|
|
136
|
+
- `docs/release.md`
|
|
137
|
+
|
|
138
|
+
### Defaults behavior
|
|
139
|
+
|
|
140
|
+
`OptiRoulette(model.parameters())` uses:
|
|
141
|
+
- default optimizer specs from bundled `optimized.yaml`
|
|
142
|
+
- default roulette settings from bundled `optimized.yaml`
|
|
143
|
+
- default pool config + active/backup names from bundled `optimized.yaml`
|
|
144
|
+
- default LR scaling rules from bundled `optimized.yaml`
|
|
145
|
+
- default optimizer RNG seed from bundled `optimized.yaml` (`system.seed`, fallback `42`)
|
|
146
|
+
|
|
147
|
+
If you provide manual optimizer/pool settings, those are used instead of defaults:
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
optimizer = OptiRoulette(
|
|
151
|
+
model.parameters(),
|
|
152
|
+
optimizer_specs={"adam": {"lr": 1e-3}},
|
|
153
|
+
)
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
Manual custom pool example (only your chosen optimizers are used):
|
|
157
|
+
|
|
158
|
+
```python
|
|
159
|
+
optimizer = OptiRoulette(
|
|
160
|
+
model.parameters(),
|
|
161
|
+
optimizer_specs={
|
|
162
|
+
"adam": {"lr": 1e-3},
|
|
163
|
+
"adamw": {"lr": 8e-4, "weight_decay": 0.01},
|
|
164
|
+
"lion": {"lr": 1e-4, "betas": (0.9, 0.99)},
|
|
165
|
+
},
|
|
166
|
+
active_names=["adam", "adamw"],
|
|
167
|
+
backup_names=["lion"],
|
|
168
|
+
)
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
Optional: override pool behavior too:
|
|
172
|
+
|
|
173
|
+
```python
|
|
174
|
+
optimizer = OptiRoulette(
|
|
175
|
+
model.parameters(),
|
|
176
|
+
optimizer_specs={
|
|
177
|
+
"adam": {"lr": 1e-3},
|
|
178
|
+
"adamw": {"lr": 8e-4, "weight_decay": 0.01},
|
|
179
|
+
"lion": {"lr": 1e-4, "betas": (0.9, 0.99)},
|
|
180
|
+
},
|
|
181
|
+
pool_config={
|
|
182
|
+
"num_active": 2,
|
|
183
|
+
"num_backup": 1,
|
|
184
|
+
"failure_threshold": -0.2,
|
|
185
|
+
"consecutive_failure_limit": 3,
|
|
186
|
+
},
|
|
187
|
+
active_names=["adam", "adamw"],
|
|
188
|
+
backup_names=["lion"],
|
|
189
|
+
)
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
## Third-Party Dependencies
|
|
193
|
+
|
|
194
|
+
This package depends on `pytorch-optimizer` for additional optimizer implementations.
|
|
195
|
+
See `THIRD_PARTY_LICENSES.md` for a short third-party license notice.
|
|
196
|
+
|
|
197
|
+
## Disclaimer
|
|
198
|
+
|
|
199
|
+
The OptiRoulette name refers exclusively to a machine-learning optimizer and has no
|
|
200
|
+
affiliation, sponsorship, or technical relation to roulette manufacturers, casinos,
|
|
201
|
+
or any physical/software gambling products or services.
|