kusp-dial 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kusp_dial-0.1.0/LICENSE +21 -0
- kusp_dial-0.1.0/PKG-INFO +214 -0
- kusp_dial-0.1.0/README.md +185 -0
- kusp_dial-0.1.0/pyproject.toml +59 -0
- kusp_dial-0.1.0/setup.cfg +4 -0
- kusp_dial-0.1.0/src/kusp_dial.egg-info/PKG-INFO +214 -0
- kusp_dial-0.1.0/src/kusp_dial.egg-info/SOURCES.txt +15 -0
- kusp_dial-0.1.0/src/kusp_dial.egg-info/dependency_links.txt +1 -0
- kusp_dial-0.1.0/src/kusp_dial.egg-info/requires.txt +6 -0
- kusp_dial-0.1.0/src/kusp_dial.egg-info/top_level.txt +1 -0
- kusp_dial-0.1.0/src/thompson_bandits/__init__.py +28 -0
- kusp_dial-0.1.0/src/thompson_bandits/bandit.py +152 -0
- kusp_dial-0.1.0/src/thompson_bandits/stores.py +259 -0
- kusp_dial-0.1.0/src/thompson_bandits/types.py +76 -0
- kusp_dial-0.1.0/tests/test_bandit.py +225 -0
- kusp_dial-0.1.0/tests/test_integration.py +167 -0
- kusp_dial-0.1.0/tests/test_stores.py +194 -0
kusp_dial-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Alfonso DiRocco
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
kusp_dial-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: kusp-dial
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Online weight optimization via Thompson Sampling — learns optimal configurations from outcome feedback.
|
|
5
|
+
Author-email: Alfonso DiRocco <alfonso@kusp.dev>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/fonz-ai/dial
|
|
8
|
+
Project-URL: Repository, https://github.com/fonz-ai/dial
|
|
9
|
+
Project-URL: Issues, https://github.com/fonz-ai/dial/issues
|
|
10
|
+
Keywords: thompson-sampling,bandits,multi-armed-bandits,optimization,online-learning,retrieval,reinforcement-learning
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
19
|
+
Classifier: Typing :: Typed
|
|
20
|
+
Requires-Python: >=3.11
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
License-File: LICENSE
|
|
23
|
+
Requires-Dist: numpy>=1.24
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
26
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
27
|
+
Requires-Dist: ruff>=0.4.0; extra == "dev"
|
|
28
|
+
Dynamic: license-file
|
|
29
|
+
|
|
30
|
+
# dial
|
|
31
|
+
|
|
32
|
+
**Online weight optimization via Thompson Sampling.** Learns optimal configurations from outcome feedback — no grid search, no manual tuning. Converges in ~50 observations. [+41% NDCG@5](https://github.com/kusp-dev/retrieval-weight-experiment) over fixed-weight baselines in controlled experiments.
|
|
33
|
+
|
|
34
|
+
[](https://github.com/fonz-ai/dial/actions/workflows/ci.yml)
|
|
35
|
+
[](https://www.python.org/downloads/)
|
|
36
|
+
[](LICENSE)
|
|
37
|
+
|
|
38
|
+
```
|
|
39
|
+
pip install kusp-dial
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Quick start
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from thompson_bandits import ThompsonBandit, InMemoryStore
|
|
46
|
+
|
|
47
|
+
store = InMemoryStore(arm_ids=["relevance_heavy", "balanced", "recency_heavy"])
|
|
48
|
+
bandit = ThompsonBandit(store)
|
|
49
|
+
|
|
50
|
+
# Run the loop: select → observe → update
|
|
51
|
+
for query in queries:
|
|
52
|
+
arm = bandit.select()
|
|
53
|
+
reward = run_query(query, strategy=arm)
|
|
54
|
+
bandit.update(arm, reward=reward)
|
|
55
|
+
|
|
56
|
+
print(bandit.get_summary())
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
After 50 iterations:
|
|
60
|
+
|
|
61
|
+
```
|
|
62
|
+
BanditSummary(
|
|
63
|
+
best_arm='relevance_heavy',
|
|
64
|
+
total_pulls=50,
|
|
65
|
+
arms=[
|
|
66
|
+
ArmSummary(arm_id='balanced', mean=0.5765, pulls=11),
|
|
67
|
+
ArmSummary(arm_id='recency_heavy', mean=0.4210, pulls=8),
|
|
68
|
+
ArmSummary(arm_id='relevance_heavy', mean=0.8903, pulls=31),
|
|
69
|
+
]
|
|
70
|
+
)
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
The bandit explores all three options early, then converges — 31 of 50 pulls on the winner, without you telling it which arm is best.
|
|
74
|
+
|
|
75
|
+
## Why Dial?
|
|
76
|
+
|
|
77
|
+
**vs. grid search / random search** — Those require running every combination upfront. Dial learns online, one observation at a time. No batch experiments needed.
|
|
78
|
+
|
|
79
|
+
**vs. manual tuning** — Manual weights are a guess that stays frozen. Dial adapts when the best option shifts — user behavior drifts, data distributions change, what worked in January fails in March.
|
|
80
|
+
|
|
81
|
+
**vs. contextual bandits (LinUCB, neural)** — Those need feature engineering and thousands of observations. Dial works with 50 observations and zero features. Start with Dial; graduate to contextual bandits when you have the data to justify them.
|
|
82
|
+
|
|
83
|
+
**vs. Bayesian optimization (Optuna, Ax)** — Those optimize over continuous parameter spaces. Dial optimizes over discrete options (strategies, presets, model choices). Different problem shape.
|
|
84
|
+
|
|
85
|
+
### Use cases
|
|
86
|
+
|
|
87
|
+
- **Retrieval weight tuning** — learn the optimal blend of relevance, recency, and importance for RAG systems
|
|
88
|
+
- **Model routing** — discover which LLM performs best for different query types
|
|
89
|
+
- **Prompt selection** — A/B test prompt variants with automatic convergence
|
|
90
|
+
- **Feature flag rollout** — promote variants based on measured outcomes
|
|
91
|
+
- **Any multi-option decision** where you can observe a reward signal
|
|
92
|
+
|
|
93
|
+
## Features
|
|
94
|
+
|
|
95
|
+
- **Beta posteriors** — each arm maintains a `Beta(alpha, beta)` distribution updated with observed rewards
|
|
96
|
+
- **Discounted Thompson Sampling** — optional decay factor for non-stationary environments where the best arm shifts over time
|
|
97
|
+
- **Cost-aware rewards** — built-in `cost_aware_reward()` scales outcomes by resource efficiency
|
|
98
|
+
- **Pluggable storage** — `InMemoryStore` for testing, `SQLiteStore` for persistence, or implement the `ArmStore` protocol for anything else
|
|
99
|
+
- **Zero SQLite dependency in core** — bandit logic talks only to the `ArmStore` protocol
|
|
100
|
+
- **Type-safe** — full annotations, `runtime_checkable` Protocol
|
|
101
|
+
|
|
102
|
+
## Storage backends
|
|
103
|
+
|
|
104
|
+
### In-memory (ephemeral)
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
from thompson_bandits import InMemoryStore
|
|
108
|
+
|
|
109
|
+
store = InMemoryStore(arm_ids=["a", "b", "c"], prior_alpha=1.0, prior_beta=1.0)
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### SQLite (persistent)
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
from thompson_bandits import SQLiteStore
|
|
116
|
+
|
|
117
|
+
# From a file path (store owns the connection)
|
|
118
|
+
store = SQLiteStore.from_path("bandits.db", arm_ids=["a", "b", "c"])
|
|
119
|
+
|
|
120
|
+
# From an existing connection (you own the connection)
|
|
121
|
+
import sqlite3
|
|
122
|
+
conn = sqlite3.connect("bandits.db")
|
|
123
|
+
store = SQLiteStore(conn, arm_ids=["a", "b", "c"])
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
### Custom storage
|
|
127
|
+
|
|
128
|
+
Implement the `ArmStore` protocol — any class with the right methods works, no inheritance required:
|
|
129
|
+
|
|
130
|
+
```python
|
|
131
|
+
from thompson_bandits import ArmStore, ArmStats
|
|
132
|
+
|
|
133
|
+
class RedisStore:
|
|
134
|
+
def get_stats(self, arm_id: str) -> ArmStats | None: ...
|
|
135
|
+
def update_stats(self, arm_id: str, alpha_delta: float, beta_delta: float, reward: float) -> None: ...
|
|
136
|
+
def get_all_arms(self) -> list[ArmStats]: ...
|
|
137
|
+
def decay(self, arm_id: str, factor: float) -> None: ...
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
## Non-stationary environments
|
|
141
|
+
|
|
142
|
+
When the best option changes over time, enable discounting:
|
|
143
|
+
|
|
144
|
+
```python
|
|
145
|
+
from thompson_bandits import ThompsonBandit, InMemoryStore, BanditConfig
|
|
146
|
+
|
|
147
|
+
config = BanditConfig(discount=0.95) # decay factor in (0, 1)
|
|
148
|
+
bandit = ThompsonBandit(store, config=config)
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
Before each update, existing evidence is decayed by the discount factor. Recent observations carry more weight than old ones.
|
|
152
|
+
|
|
153
|
+
## Cost-aware optimization
|
|
154
|
+
|
|
155
|
+
When options have different costs (tokens, latency, dollars), scale rewards accordingly:
|
|
156
|
+
|
|
157
|
+
```python
|
|
158
|
+
from thompson_bandits import cost_aware_reward
|
|
159
|
+
|
|
160
|
+
raw_reward = 0.9
|
|
161
|
+
token_cost = 1500
|
|
162
|
+
baseline_cost = 1000
|
|
163
|
+
|
|
164
|
+
adjusted = cost_aware_reward(raw_reward, cost=token_cost, baseline_cost=baseline_cost)
|
|
165
|
+
bandit.update(arm, reward=adjusted)
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
## Inspecting state
|
|
169
|
+
|
|
170
|
+
```python
|
|
171
|
+
summary = bandit.get_summary()
|
|
172
|
+
print(summary.best_arm) # 'relevance_heavy'
|
|
173
|
+
print(summary.total_pulls) # 50
|
|
174
|
+
|
|
175
|
+
for arm in summary.arms:
|
|
176
|
+
print(f"{arm.arm_id}: mean={arm.mean:.3f}, pulls={arm.pulls}")
|
|
177
|
+
# balanced: mean=0.577, pulls=11
|
|
178
|
+
# recency_heavy: mean=0.421, pulls=8
|
|
179
|
+
# relevance_heavy: mean=0.890, pulls=31
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
## Research
|
|
183
|
+
|
|
184
|
+
Dial extracts the Thompson Sampling engine from a published research experiment on gradient-free retrieval weight learning. The experiment ran 1,200 episodes across 4 conditions on a $50/month API budget.
|
|
185
|
+
|
|
186
|
+
<details>
|
|
187
|
+
<summary>Citation (BibTeX)</summary>
|
|
188
|
+
|
|
189
|
+
```bibtex
|
|
190
|
+
@article{dirocco2026gradient,
|
|
191
|
+
title = {Gradient-Free Retrieval Weight Learning via Thompson Sampling
|
|
192
|
+
with LLM Self-Assessment},
|
|
193
|
+
author = {DiRocco, Alfonso},
|
|
194
|
+
year = {2026},
|
|
195
|
+
url = {https://github.com/kusp-dev/retrieval-weight-experiment},
|
|
196
|
+
note = {1,200 episodes, 4 conditions, +41\% NDCG@5 over fixed baselines}
|
|
197
|
+
}
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
</details>
|
|
201
|
+
|
|
202
|
+
## Development
|
|
203
|
+
|
|
204
|
+
```bash
|
|
205
|
+
git clone https://github.com/fonz-ai/dial.git
|
|
206
|
+
cd dial
|
|
207
|
+
uv sync --extra dev
|
|
208
|
+
uv run pytest tests/ -v
|
|
209
|
+
uv run ruff check src/ tests/
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
## License
|
|
213
|
+
|
|
214
|
+
MIT
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
# dial
|
|
2
|
+
|
|
3
|
+
**Online weight optimization via Thompson Sampling.** Learns optimal configurations from outcome feedback — no grid search, no manual tuning. Converges in ~50 observations. [+41% NDCG@5](https://github.com/kusp-dev/retrieval-weight-experiment) over fixed-weight baselines in controlled experiments.
|
|
4
|
+
|
|
5
|
+
[](https://github.com/fonz-ai/dial/actions/workflows/ci.yml)
|
|
6
|
+
[](https://www.python.org/downloads/)
|
|
7
|
+
[](LICENSE)
|
|
8
|
+
|
|
9
|
+
```
|
|
10
|
+
pip install kusp-dial
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Quick start
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
from thompson_bandits import ThompsonBandit, InMemoryStore
|
|
17
|
+
|
|
18
|
+
store = InMemoryStore(arm_ids=["relevance_heavy", "balanced", "recency_heavy"])
|
|
19
|
+
bandit = ThompsonBandit(store)
|
|
20
|
+
|
|
21
|
+
# Run the loop: select → observe → update
|
|
22
|
+
for query in queries:
|
|
23
|
+
arm = bandit.select()
|
|
24
|
+
reward = run_query(query, strategy=arm)
|
|
25
|
+
bandit.update(arm, reward=reward)
|
|
26
|
+
|
|
27
|
+
print(bandit.get_summary())
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
After 50 iterations:
|
|
31
|
+
|
|
32
|
+
```
|
|
33
|
+
BanditSummary(
|
|
34
|
+
best_arm='relevance_heavy',
|
|
35
|
+
total_pulls=50,
|
|
36
|
+
arms=[
|
|
37
|
+
ArmSummary(arm_id='balanced', mean=0.5765, pulls=11),
|
|
38
|
+
ArmSummary(arm_id='recency_heavy', mean=0.4210, pulls=8),
|
|
39
|
+
ArmSummary(arm_id='relevance_heavy', mean=0.8903, pulls=31),
|
|
40
|
+
]
|
|
41
|
+
)
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
The bandit explores all three options early, then converges — 31 of 50 pulls on the winner, without you telling it which arm is best.
|
|
45
|
+
|
|
46
|
+
## Why Dial?
|
|
47
|
+
|
|
48
|
+
**vs. grid search / random search** — Those require running every combination upfront. Dial learns online, one observation at a time. No batch experiments needed.
|
|
49
|
+
|
|
50
|
+
**vs. manual tuning** — Manual weights are a guess that stays frozen. Dial adapts when the best option shifts — user behavior drifts, data distributions change, what worked in January fails in March.
|
|
51
|
+
|
|
52
|
+
**vs. contextual bandits (LinUCB, neural)** — Those need feature engineering and thousands of observations. Dial works with 50 observations and zero features. Start with Dial; graduate to contextual bandits when you have the data to justify them.
|
|
53
|
+
|
|
54
|
+
**vs. Bayesian optimization (Optuna, Ax)** — Those optimize over continuous parameter spaces. Dial optimizes over discrete options (strategies, presets, model choices). Different problem shape.
|
|
55
|
+
|
|
56
|
+
### Use cases
|
|
57
|
+
|
|
58
|
+
- **Retrieval weight tuning** — learn the optimal blend of relevance, recency, and importance for RAG systems
|
|
59
|
+
- **Model routing** — discover which LLM performs best for different query types
|
|
60
|
+
- **Prompt selection** — A/B test prompt variants with automatic convergence
|
|
61
|
+
- **Feature flag rollout** — promote variants based on measured outcomes
|
|
62
|
+
- **Any multi-option decision** where you can observe a reward signal
|
|
63
|
+
|
|
64
|
+
## Features
|
|
65
|
+
|
|
66
|
+
- **Beta posteriors** — each arm maintains a `Beta(alpha, beta)` distribution updated with observed rewards
|
|
67
|
+
- **Discounted Thompson Sampling** — optional decay factor for non-stationary environments where the best arm shifts over time
|
|
68
|
+
- **Cost-aware rewards** — built-in `cost_aware_reward()` scales outcomes by resource efficiency
|
|
69
|
+
- **Pluggable storage** — `InMemoryStore` for testing, `SQLiteStore` for persistence, or implement the `ArmStore` protocol for anything else
|
|
70
|
+
- **Zero SQLite dependency in core** — bandit logic talks only to the `ArmStore` protocol
|
|
71
|
+
- **Type-safe** — full annotations, `runtime_checkable` Protocol
|
|
72
|
+
|
|
73
|
+
## Storage backends
|
|
74
|
+
|
|
75
|
+
### In-memory (ephemeral)
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
from thompson_bandits import InMemoryStore
|
|
79
|
+
|
|
80
|
+
store = InMemoryStore(arm_ids=["a", "b", "c"], prior_alpha=1.0, prior_beta=1.0)
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
### SQLite (persistent)
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
from thompson_bandits import SQLiteStore
|
|
87
|
+
|
|
88
|
+
# From a file path (store owns the connection)
|
|
89
|
+
store = SQLiteStore.from_path("bandits.db", arm_ids=["a", "b", "c"])
|
|
90
|
+
|
|
91
|
+
# From an existing connection (you own the connection)
|
|
92
|
+
import sqlite3
|
|
93
|
+
conn = sqlite3.connect("bandits.db")
|
|
94
|
+
store = SQLiteStore(conn, arm_ids=["a", "b", "c"])
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
### Custom storage
|
|
98
|
+
|
|
99
|
+
Implement the `ArmStore` protocol — any class with the right methods works, no inheritance required:
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
from thompson_bandits import ArmStore, ArmStats
|
|
103
|
+
|
|
104
|
+
class RedisStore:
|
|
105
|
+
def get_stats(self, arm_id: str) -> ArmStats | None: ...
|
|
106
|
+
def update_stats(self, arm_id: str, alpha_delta: float, beta_delta: float, reward: float) -> None: ...
|
|
107
|
+
def get_all_arms(self) -> list[ArmStats]: ...
|
|
108
|
+
def decay(self, arm_id: str, factor: float) -> None: ...
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
## Non-stationary environments
|
|
112
|
+
|
|
113
|
+
When the best option changes over time, enable discounting:
|
|
114
|
+
|
|
115
|
+
```python
|
|
116
|
+
from thompson_bandits import ThompsonBandit, InMemoryStore, BanditConfig
|
|
117
|
+
|
|
118
|
+
config = BanditConfig(discount=0.95) # decay factor in (0, 1)
|
|
119
|
+
bandit = ThompsonBandit(store, config=config)
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
Before each update, existing evidence is decayed by the discount factor. Recent observations carry more weight than old ones.
|
|
123
|
+
|
|
124
|
+
## Cost-aware optimization
|
|
125
|
+
|
|
126
|
+
When options have different costs (tokens, latency, dollars), scale rewards accordingly:
|
|
127
|
+
|
|
128
|
+
```python
|
|
129
|
+
from thompson_bandits import cost_aware_reward
|
|
130
|
+
|
|
131
|
+
raw_reward = 0.9
|
|
132
|
+
token_cost = 1500
|
|
133
|
+
baseline_cost = 1000
|
|
134
|
+
|
|
135
|
+
adjusted = cost_aware_reward(raw_reward, cost=token_cost, baseline_cost=baseline_cost)
|
|
136
|
+
bandit.update(arm, reward=adjusted)
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
## Inspecting state
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
summary = bandit.get_summary()
|
|
143
|
+
print(summary.best_arm) # 'relevance_heavy'
|
|
144
|
+
print(summary.total_pulls) # 50
|
|
145
|
+
|
|
146
|
+
for arm in summary.arms:
|
|
147
|
+
print(f"{arm.arm_id}: mean={arm.mean:.3f}, pulls={arm.pulls}")
|
|
148
|
+
# balanced: mean=0.577, pulls=11
|
|
149
|
+
# recency_heavy: mean=0.421, pulls=8
|
|
150
|
+
# relevance_heavy: mean=0.890, pulls=31
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## Research
|
|
154
|
+
|
|
155
|
+
Dial extracts the Thompson Sampling engine from a published research experiment on gradient-free retrieval weight learning. The experiment ran 1,200 episodes across 4 conditions on a $50/month API budget.
|
|
156
|
+
|
|
157
|
+
<details>
|
|
158
|
+
<summary>Citation (BibTeX)</summary>
|
|
159
|
+
|
|
160
|
+
```bibtex
|
|
161
|
+
@article{dirocco2026gradient,
|
|
162
|
+
title = {Gradient-Free Retrieval Weight Learning via Thompson Sampling
|
|
163
|
+
with LLM Self-Assessment},
|
|
164
|
+
author = {DiRocco, Alfonso},
|
|
165
|
+
year = {2026},
|
|
166
|
+
url = {https://github.com/kusp-dev/retrieval-weight-experiment},
|
|
167
|
+
note = {1,200 episodes, 4 conditions, +41\% NDCG@5 over fixed baselines}
|
|
168
|
+
}
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
</details>
|
|
172
|
+
|
|
173
|
+
## Development
|
|
174
|
+
|
|
175
|
+
```bash
|
|
176
|
+
git clone https://github.com/fonz-ai/dial.git
|
|
177
|
+
cd dial
|
|
178
|
+
uv sync --extra dev
|
|
179
|
+
uv run pytest tests/ -v
|
|
180
|
+
uv run ruff check src/ tests/
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
## License
|
|
184
|
+
|
|
185
|
+
MIT
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "kusp-dial"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Online weight optimization via Thompson Sampling — learns optimal configurations from outcome feedback."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
requires-python = ">=3.11"
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "Alfonso DiRocco", email = "alfonso@kusp.dev" },
|
|
14
|
+
]
|
|
15
|
+
keywords = [
|
|
16
|
+
"thompson-sampling",
|
|
17
|
+
"bandits",
|
|
18
|
+
"multi-armed-bandits",
|
|
19
|
+
"optimization",
|
|
20
|
+
"online-learning",
|
|
21
|
+
"retrieval",
|
|
22
|
+
"reinforcement-learning",
|
|
23
|
+
]
|
|
24
|
+
classifiers = [
|
|
25
|
+
"Development Status :: 4 - Beta",
|
|
26
|
+
"Intended Audience :: Developers",
|
|
27
|
+
"Intended Audience :: Science/Research",
|
|
28
|
+
"Programming Language :: Python :: 3",
|
|
29
|
+
"Programming Language :: Python :: 3.11",
|
|
30
|
+
"Programming Language :: Python :: 3.12",
|
|
31
|
+
"Programming Language :: Python :: 3.13",
|
|
32
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
33
|
+
"Typing :: Typed",
|
|
34
|
+
]
|
|
35
|
+
dependencies = [
|
|
36
|
+
"numpy>=1.24",
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
[project.urls]
|
|
40
|
+
Homepage = "https://github.com/fonz-ai/dial"
|
|
41
|
+
Repository = "https://github.com/fonz-ai/dial"
|
|
42
|
+
Issues = "https://github.com/fonz-ai/dial/issues"
|
|
43
|
+
|
|
44
|
+
[project.optional-dependencies]
|
|
45
|
+
dev = [
|
|
46
|
+
"pytest>=7.0",
|
|
47
|
+
"pytest-cov>=4.0",
|
|
48
|
+
"ruff>=0.4.0",
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
[tool.setuptools.packages.find]
|
|
52
|
+
where = ["src"]
|
|
53
|
+
|
|
54
|
+
[tool.pytest.ini_options]
|
|
55
|
+
testpaths = ["tests"]
|
|
56
|
+
|
|
57
|
+
[tool.ruff]
|
|
58
|
+
target-version = "py311"
|
|
59
|
+
line-length = 100
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: kusp-dial
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Online weight optimization via Thompson Sampling — learns optimal configurations from outcome feedback.
|
|
5
|
+
Author-email: Alfonso DiRocco <alfonso@kusp.dev>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/fonz-ai/dial
|
|
8
|
+
Project-URL: Repository, https://github.com/fonz-ai/dial
|
|
9
|
+
Project-URL: Issues, https://github.com/fonz-ai/dial/issues
|
|
10
|
+
Keywords: thompson-sampling,bandits,multi-armed-bandits,optimization,online-learning,retrieval,reinforcement-learning
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
19
|
+
Classifier: Typing :: Typed
|
|
20
|
+
Requires-Python: >=3.11
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
License-File: LICENSE
|
|
23
|
+
Requires-Dist: numpy>=1.24
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
26
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
27
|
+
Requires-Dist: ruff>=0.4.0; extra == "dev"
|
|
28
|
+
Dynamic: license-file
|
|
29
|
+
|
|
30
|
+
# dial
|
|
31
|
+
|
|
32
|
+
**Online weight optimization via Thompson Sampling.** Learns optimal configurations from outcome feedback — no grid search, no manual tuning. Converges in ~50 observations. [+41% NDCG@5](https://github.com/kusp-dev/retrieval-weight-experiment) over fixed-weight baselines in controlled experiments.
|
|
33
|
+
|
|
34
|
+
[](https://github.com/fonz-ai/dial/actions/workflows/ci.yml)
|
|
35
|
+
[](https://www.python.org/downloads/)
|
|
36
|
+
[](LICENSE)
|
|
37
|
+
|
|
38
|
+
```
|
|
39
|
+
pip install kusp-dial
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Quick start
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from thompson_bandits import ThompsonBandit, InMemoryStore
|
|
46
|
+
|
|
47
|
+
store = InMemoryStore(arm_ids=["relevance_heavy", "balanced", "recency_heavy"])
|
|
48
|
+
bandit = ThompsonBandit(store)
|
|
49
|
+
|
|
50
|
+
# Run the loop: select → observe → update
|
|
51
|
+
for query in queries:
|
|
52
|
+
arm = bandit.select()
|
|
53
|
+
reward = run_query(query, strategy=arm)
|
|
54
|
+
bandit.update(arm, reward=reward)
|
|
55
|
+
|
|
56
|
+
print(bandit.get_summary())
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
After 50 iterations:
|
|
60
|
+
|
|
61
|
+
```
|
|
62
|
+
BanditSummary(
|
|
63
|
+
best_arm='relevance_heavy',
|
|
64
|
+
total_pulls=50,
|
|
65
|
+
arms=[
|
|
66
|
+
ArmSummary(arm_id='balanced', mean=0.5765, pulls=11),
|
|
67
|
+
ArmSummary(arm_id='recency_heavy', mean=0.4210, pulls=8),
|
|
68
|
+
ArmSummary(arm_id='relevance_heavy', mean=0.8903, pulls=31),
|
|
69
|
+
]
|
|
70
|
+
)
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
The bandit explores all three options early, then converges — 31 of 50 pulls on the winner, without you telling it which arm is best.
|
|
74
|
+
|
|
75
|
+
## Why Dial?
|
|
76
|
+
|
|
77
|
+
**vs. grid search / random search** — Those require running every combination upfront. Dial learns online, one observation at a time. No batch experiments needed.
|
|
78
|
+
|
|
79
|
+
**vs. manual tuning** — Manual weights are a guess that stays frozen. Dial adapts when the best option shifts — user behavior drifts, data distributions change, what worked in January fails in March.
|
|
80
|
+
|
|
81
|
+
**vs. contextual bandits (LinUCB, neural)** — Those need feature engineering and thousands of observations. Dial works with 50 observations and zero features. Start with Dial; graduate to contextual bandits when you have the data to justify them.
|
|
82
|
+
|
|
83
|
+
**vs. Bayesian optimization (Optuna, Ax)** — Those optimize over continuous parameter spaces. Dial optimizes over discrete options (strategies, presets, model choices). Different problem shape.
|
|
84
|
+
|
|
85
|
+
### Use cases
|
|
86
|
+
|
|
87
|
+
- **Retrieval weight tuning** — learn the optimal blend of relevance, recency, and importance for RAG systems
|
|
88
|
+
- **Model routing** — discover which LLM performs best for different query types
|
|
89
|
+
- **Prompt selection** — A/B test prompt variants with automatic convergence
|
|
90
|
+
- **Feature flag rollout** — promote variants based on measured outcomes
|
|
91
|
+
- **Any multi-option decision** where you can observe a reward signal
|
|
92
|
+
|
|
93
|
+
## Features
|
|
94
|
+
|
|
95
|
+
- **Beta posteriors** — each arm maintains a `Beta(alpha, beta)` distribution updated with observed rewards
|
|
96
|
+
- **Discounted Thompson Sampling** — optional decay factor for non-stationary environments where the best arm shifts over time
|
|
97
|
+
- **Cost-aware rewards** — built-in `cost_aware_reward()` scales outcomes by resource efficiency
|
|
98
|
+
- **Pluggable storage** — `InMemoryStore` for testing, `SQLiteStore` for persistence, or implement the `ArmStore` protocol for anything else
|
|
99
|
+
- **Zero SQLite dependency in core** — bandit logic talks only to the `ArmStore` protocol
|
|
100
|
+
- **Type-safe** — full annotations, `runtime_checkable` Protocol
|
|
101
|
+
|
|
102
|
+
## Storage backends
|
|
103
|
+
|
|
104
|
+
### In-memory (ephemeral)
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
from thompson_bandits import InMemoryStore
|
|
108
|
+
|
|
109
|
+
store = InMemoryStore(arm_ids=["a", "b", "c"], prior_alpha=1.0, prior_beta=1.0)
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### SQLite (persistent)
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
from thompson_bandits import SQLiteStore
|
|
116
|
+
|
|
117
|
+
# From a file path (store owns the connection)
|
|
118
|
+
store = SQLiteStore.from_path("bandits.db", arm_ids=["a", "b", "c"])
|
|
119
|
+
|
|
120
|
+
# From an existing connection (you own the connection)
|
|
121
|
+
import sqlite3
|
|
122
|
+
conn = sqlite3.connect("bandits.db")
|
|
123
|
+
store = SQLiteStore(conn, arm_ids=["a", "b", "c"])
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
### Custom storage
|
|
127
|
+
|
|
128
|
+
Implement the `ArmStore` protocol — any class with the right methods works, no inheritance required:
|
|
129
|
+
|
|
130
|
+
```python
|
|
131
|
+
from thompson_bandits import ArmStore, ArmStats
|
|
132
|
+
|
|
133
|
+
class RedisStore:
|
|
134
|
+
def get_stats(self, arm_id: str) -> ArmStats | None: ...
|
|
135
|
+
def update_stats(self, arm_id: str, alpha_delta: float, beta_delta: float, reward: float) -> None: ...
|
|
136
|
+
def get_all_arms(self) -> list[ArmStats]: ...
|
|
137
|
+
def decay(self, arm_id: str, factor: float) -> None: ...
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
## Non-stationary environments
|
|
141
|
+
|
|
142
|
+
When the best option changes over time, enable discounting:
|
|
143
|
+
|
|
144
|
+
```python
|
|
145
|
+
from thompson_bandits import ThompsonBandit, InMemoryStore, BanditConfig
|
|
146
|
+
|
|
147
|
+
config = BanditConfig(discount=0.95) # decay factor in (0, 1)
|
|
148
|
+
bandit = ThompsonBandit(store, config=config)
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
Before each update, existing evidence is decayed by the discount factor. Recent observations carry more weight than old ones.
|
|
152
|
+
|
|
153
|
+
## Cost-aware optimization
|
|
154
|
+
|
|
155
|
+
When options have different costs (tokens, latency, dollars), scale rewards accordingly:
|
|
156
|
+
|
|
157
|
+
```python
|
|
158
|
+
from thompson_bandits import cost_aware_reward
|
|
159
|
+
|
|
160
|
+
raw_reward = 0.9
|
|
161
|
+
token_cost = 1500
|
|
162
|
+
baseline_cost = 1000
|
|
163
|
+
|
|
164
|
+
adjusted = cost_aware_reward(raw_reward, cost=token_cost, baseline_cost=baseline_cost)
|
|
165
|
+
bandit.update(arm, reward=adjusted)
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
## Inspecting state
|
|
169
|
+
|
|
170
|
+
```python
|
|
171
|
+
summary = bandit.get_summary()
|
|
172
|
+
print(summary.best_arm) # 'relevance_heavy'
|
|
173
|
+
print(summary.total_pulls) # 50
|
|
174
|
+
|
|
175
|
+
for arm in summary.arms:
|
|
176
|
+
print(f"{arm.arm_id}: mean={arm.mean:.3f}, pulls={arm.pulls}")
|
|
177
|
+
# balanced: mean=0.577, pulls=11
|
|
178
|
+
# recency_heavy: mean=0.421, pulls=8
|
|
179
|
+
# relevance_heavy: mean=0.890, pulls=31
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
## Research
|
|
183
|
+
|
|
184
|
+
Dial extracts the Thompson Sampling engine from a published research experiment on gradient-free retrieval weight learning. The experiment ran 1,200 episodes across 4 conditions on a $50/month API budget.
|
|
185
|
+
|
|
186
|
+
<details>
|
|
187
|
+
<summary>Citation (BibTeX)</summary>
|
|
188
|
+
|
|
189
|
+
```bibtex
|
|
190
|
+
@article{dirocco2026gradient,
|
|
191
|
+
title = {Gradient-Free Retrieval Weight Learning via Thompson Sampling
|
|
192
|
+
with LLM Self-Assessment},
|
|
193
|
+
author = {DiRocco, Alfonso},
|
|
194
|
+
year = {2026},
|
|
195
|
+
url = {https://github.com/kusp-dev/retrieval-weight-experiment},
|
|
196
|
+
note = {1,200 episodes, 4 conditions, +41\% NDCG@5 over fixed baselines}
|
|
197
|
+
}
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
</details>
|
|
201
|
+
|
|
202
|
+
## Development
|
|
203
|
+
|
|
204
|
+
```bash
|
|
205
|
+
git clone https://github.com/fonz-ai/dial.git
|
|
206
|
+
cd dial
|
|
207
|
+
uv sync --extra dev
|
|
208
|
+
uv run pytest tests/ -v
|
|
209
|
+
uv run ruff check src/ tests/
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
## License
|
|
213
|
+
|
|
214
|
+
MIT
|