noregret 0.0.0.dev3__tar.gz → 0.0.0.dev4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- noregret-0.0.0.dev4/PKG-INFO +347 -0
- noregret-0.0.0.dev4/README.rst +297 -0
- noregret-0.0.0.dev4/noregret/__init__.py +188 -0
- noregret-0.0.0.dev4/noregret/games/__init__.py +57 -0
- noregret-0.0.0.dev4/noregret/games/extensive_form/__init__.py +12 -0
- noregret-0.0.0.dev4/noregret/games/extensive_form/games.py +145 -0
- noregret-0.0.0.dev4/noregret/games/games.py +265 -0
- noregret-0.0.0.dev4/noregret/games/multilinear.py +160 -0
- noregret-0.0.0.dev4/noregret/games/normal_form/__init__.py +34 -0
- noregret-0.0.0.dev4/noregret/games/normal_form/assurance-game.json +1 -0
- noregret-0.0.0.dev4/noregret/games/normal_form/battle-of-the-sexes.json +1 -0
- noregret-0.0.0.dev4/noregret/games/normal_form/chicken.json +1 -0
- noregret-0.0.0.dev4/noregret/games/normal_form/games.py +146 -0
- noregret-0.0.0.dev4/noregret/games/normal_form/gift-exchange-game.json +1 -0
- noregret-0.0.0.dev4/noregret/games/normal_form/matching-pennies.json +1 -0
- noregret-0.0.0.dev4/noregret/games/normal_form/prisoners-dilemma.json +1 -0
- noregret-0.0.0.dev4/noregret/games/normal_form/pure-coordination.json +1 -0
- noregret-0.0.0.dev4/noregret/games/normal_form/rock-paper-scissors-plus.json +1 -0
- noregret-0.0.0.dev4/noregret/games/normal_form/rock-paper-scissors.json +1 -0
- noregret-0.0.0.dev4/noregret/games/normal_form/rock-paper-superscissors.json +1 -0
- noregret-0.0.0.dev4/noregret/games/normal_form/stag-hunt.json +1 -0
- noregret-0.0.0.dev4/noregret/games/utilities.py +141 -0
- noregret-0.0.0.dev4/noregret/kernels.py +163 -0
- noregret-0.0.0.dev4/noregret/regret_minimizers/__init__.py +46 -0
- noregret-0.0.0.dev4/noregret/regret_minimizers/probability_simplices.py +298 -0
- noregret-0.0.0.dev4/noregret/regret_minimizers/regret_minimizers.py +129 -0
- noregret-0.0.0.dev4/noregret/regret_minimizers/sequence_form_polytopes.py +197 -0
- noregret-0.0.0.dev4/noregret/sequence_form_polytopes.py +340 -0
- noregret-0.0.0.dev4/noregret/solvers/__init__.py +12 -0
- noregret-0.0.0.dev4/noregret/solvers/linear_programming.py +92 -0
- noregret-0.0.0.dev4/noregret/solvers/regret_minimization.py +153 -0
- noregret-0.0.0.dev4/noregret/tests/__init__.py +1 -0
- noregret-0.0.0.dev4/noregret/tests/test_games.py +62 -0
- noregret-0.0.0.dev4/noregret/tests/test_linear_programming.py +36 -0
- noregret-0.0.0.dev4/noregret/tests/test_regret_minimization.py +154 -0
- noregret-0.0.0.dev4/noregret/tests/test_sequence_form_polytopes.py +157 -0
- noregret-0.0.0.dev4/noregret/utilities.py +36 -0
- noregret-0.0.0.dev4/noregret.egg-info/PKG-INFO +347 -0
- noregret-0.0.0.dev4/noregret.egg-info/SOURCES.txt +43 -0
- noregret-0.0.0.dev4/noregret.egg-info/requires.txt +8 -0
- {noregret-0.0.0.dev3 → noregret-0.0.0.dev4}/setup.py +12 -6
- noregret-0.0.0.dev3/PKG-INFO +0 -106
- noregret-0.0.0.dev3/README.md +0 -61
- noregret-0.0.0.dev3/noregret/__init__.py +0 -0
- noregret-0.0.0.dev3/noregret/games.py +0 -788
- noregret-0.0.0.dev3/noregret/regret_minimizers.py +0 -810
- noregret-0.0.0.dev3/noregret/utilities.py +0 -310
- noregret-0.0.0.dev3/noregret.egg-info/PKG-INFO +0 -106
- noregret-0.0.0.dev3/noregret.egg-info/SOURCES.txt +0 -12
- noregret-0.0.0.dev3/noregret.egg-info/requires.txt +0 -3
- {noregret-0.0.0.dev3 → noregret-0.0.0.dev4}/LICENSE +0 -0
- {noregret-0.0.0.dev3 → noregret-0.0.0.dev4}/noregret.egg-info/dependency_links.txt +0 -0
- {noregret-0.0.0.dev3 → noregret-0.0.0.dev4}/noregret.egg-info/top_level.txt +0 -0
- {noregret-0.0.0.dev3 → noregret-0.0.0.dev4}/setup.cfg +0 -0
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: noregret
|
|
3
|
+
Version: 0.0.0.dev4
|
|
4
|
+
Summary: No-regret learning dynamics
|
|
5
|
+
Home-page: https://github.com/uoftcprg/noregret
|
|
6
|
+
Author: Universal, Open, Free, and Transparent Computer Poker Research Group
|
|
7
|
+
Author-email: juhok@cs.cmu.edu
|
|
8
|
+
License: MIT
|
|
9
|
+
Project-URL: Documentation, https://noregret.readthedocs.io/en/latest/
|
|
10
|
+
Project-URL: Source, https://github.com/uoftcprg/noregret
|
|
11
|
+
Project-URL: Tracker, https://github.com/uoftcprg/noregret/issues
|
|
12
|
+
Keywords: artificial-intelligence,game,game-theory,imperfect-information-game,online-learning,python
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Education
|
|
15
|
+
Classifier: Topic :: Education
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
18
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
19
|
+
Classifier: Operating System :: OS Independent
|
|
20
|
+
Classifier: Programming Language :: Python
|
|
21
|
+
Classifier: Programming Language :: Python :: 3
|
|
22
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
25
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
26
|
+
Requires-Python: >=3.12
|
|
27
|
+
Description-Content-Type: text/x-rst
|
|
28
|
+
License-File: LICENSE
|
|
29
|
+
Requires-Dist: cupy-cuda13x[ctk]<15,>=14.0.1
|
|
30
|
+
Requires-Dist: gurobipy<14,~=13.0.2
|
|
31
|
+
Requires-Dist: numpy<3,>=2.4.4
|
|
32
|
+
Requires-Dist: open-spiel<2,>=1.6.14
|
|
33
|
+
Requires-Dist: ordered-set<5,>=4.1.0
|
|
34
|
+
Requires-Dist: orjson<4,>=3.11.9
|
|
35
|
+
Requires-Dist: scipy<2,>=1.17.1
|
|
36
|
+
Requires-Dist: tqdm<5,>=4.67.3
|
|
37
|
+
Dynamic: author
|
|
38
|
+
Dynamic: author-email
|
|
39
|
+
Dynamic: classifier
|
|
40
|
+
Dynamic: description
|
|
41
|
+
Dynamic: description-content-type
|
|
42
|
+
Dynamic: home-page
|
|
43
|
+
Dynamic: keywords
|
|
44
|
+
Dynamic: license
|
|
45
|
+
Dynamic: license-file
|
|
46
|
+
Dynamic: project-url
|
|
47
|
+
Dynamic: requires-dist
|
|
48
|
+
Dynamic: requires-python
|
|
49
|
+
Dynamic: summary
|
|
50
|
+
|
|
51
|
+
========
|
|
52
|
+
NoRegret
|
|
53
|
+
========
|
|
54
|
+
|
|
55
|
+
NoRegret is an open-source software library for no-regret learning dynamics and computational game solving, developed by the Universal, Open, Free, and Transparent Computer Poker Research Group. NoRegret implements an extensive array of regret minimizers and game solvers, and also supports GPU-acceleration. The library can be used in a variety of use cases, from solving games to conducting research in online convex optimization. NoRegret's reliability has been established through extensive doctests and unit tests, achieving 91% code coverage.
|
|
56
|
+
|
|
57
|
+
Features
|
|
58
|
+
--------
|
|
59
|
+
|
|
60
|
+
* Extensive array of regret minimizers and game solvers.
|
|
61
|
+
* High-speed implementations.
|
|
62
|
+
* GPU-accleration.
|
|
63
|
+
|
|
64
|
+
Installation
|
|
65
|
+
------------
|
|
66
|
+
|
|
67
|
+
The NoRegret library requires Python Version 3.12 or above and can be installed using pip:
|
|
68
|
+
|
|
69
|
+
.. code-block:: bash
|
|
70
|
+
|
|
71
|
+
pip install noregret
|
|
72
|
+
|
|
73
|
+
Usages
|
|
74
|
+
------
|
|
75
|
+
|
|
76
|
+
Example usages of NoRegret is shown below.
|
|
77
|
+
|
|
78
|
+
Solving Games via Regret minimization
|
|
79
|
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
80
|
+
|
|
81
|
+
The code snippet below demonstrates how one can solve games via regret minimization using NoRegret.
|
|
82
|
+
|
|
83
|
+
.. code-block:: python
|
|
84
|
+
|
|
85
|
+
from functools import partial
|
|
86
|
+
from math import inf
|
|
87
|
+
|
|
88
|
+
from tqdm import tqdm
|
|
89
|
+
import matplotlib.pyplot as plt
|
|
90
|
+
import noregret as nr
|
|
91
|
+
import pandas as pd
|
|
92
|
+
import seaborn as sns
|
|
93
|
+
|
|
94
|
+
KERNEL = nr.FloatingPointKernel()
|
|
95
|
+
GAMES = {
|
|
96
|
+
'Rock paper superscissors': nr.to_efg(nr.RockPaperSuperscissors(KERNEL)),
|
|
97
|
+
'Kuhn poker': nr.from_open_spiel(KERNEL, 'kuhn_poker'),
|
|
98
|
+
'Leduc poker': nr.from_open_spiel(KERNEL, 'leduc_poker'),
|
|
99
|
+
}
|
|
100
|
+
PARAMETERS = {
|
|
101
|
+
'CFR': (nr.CFR, False, False),
|
|
102
|
+
'CFR+': (nr.CFR_plus, True, False),
|
|
103
|
+
'DCFR': (nr.DCFR, True, False),
|
|
104
|
+
'PCFR+': (partial(nr.CFR_plus, gamma=2), True, True),
|
|
105
|
+
'PCFR+*': (partial(nr.CFR_plus, gamma=inf), True, True),
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def main():
|
|
110
|
+
for name, game in tqdm(GAMES.items()):
|
|
111
|
+
iterations = []
|
|
112
|
+
exploitabilities = []
|
|
113
|
+
expected_utilities = []
|
|
114
|
+
variants = []
|
|
115
|
+
|
|
116
|
+
for variant, (R_type, alt, pred) in tqdm(
|
|
117
|
+
PARAMETERS.items(),
|
|
118
|
+
leave=False,
|
|
119
|
+
):
|
|
120
|
+
R_row = R_type(KERNEL, game.row_sequence_form_polytope)
|
|
121
|
+
R_col = R_type(KERNEL, game.column_sequence_form_polytope)
|
|
122
|
+
|
|
123
|
+
def update():
|
|
124
|
+
t = R_row.iteration_count
|
|
125
|
+
x_bar = R_row.average_strategy
|
|
126
|
+
y_bar = R_col.average_strategy
|
|
127
|
+
epsilon = game.exploitability(x_bar, y_bar)
|
|
128
|
+
u = game.expected_row_utility(x_bar, y_bar)
|
|
129
|
+
|
|
130
|
+
iterations.append(t)
|
|
131
|
+
exploitabilities.append(epsilon)
|
|
132
|
+
expected_utilities.append(u)
|
|
133
|
+
variants.append(variant)
|
|
134
|
+
|
|
135
|
+
nr.regret_minimization(
|
|
136
|
+
game,
|
|
137
|
+
R_row,
|
|
138
|
+
R_col,
|
|
139
|
+
alternation=alt,
|
|
140
|
+
prediction=pred,
|
|
141
|
+
update=update,
|
|
142
|
+
progress_bar={'leave': False},
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
data = {
|
|
146
|
+
'Iteration': iterations,
|
|
147
|
+
'Exploitability': exploitabilities,
|
|
148
|
+
'Expected utility': expected_utilities,
|
|
149
|
+
'Variant': variants,
|
|
150
|
+
}
|
|
151
|
+
df = pd.DataFrame(data)
|
|
152
|
+
|
|
153
|
+
plt.clf()
|
|
154
|
+
sns.lineplot(df, x='Iteration', y='Exploitability', hue='Variant')
|
|
155
|
+
plt.xscale('log')
|
|
156
|
+
plt.yscale('log')
|
|
157
|
+
plt.title(f'Exploitability in {name}')
|
|
158
|
+
plt.show()
|
|
159
|
+
|
|
160
|
+
plt.clf()
|
|
161
|
+
sns.lineplot(df, x='Iteration', y='Expected utility', hue='Variant')
|
|
162
|
+
plt.xscale('log')
|
|
163
|
+
plt.title(f'Expected utility in {name}')
|
|
164
|
+
plt.show()
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
if __name__ == '__main__':
|
|
168
|
+
main()
|
|
169
|
+
|
|
170
|
+
GPU-Accelerated Game Solving
|
|
171
|
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
172
|
+
|
|
173
|
+
The code snippet below demonstrates how one can solve games while leveraging GPU acceleration.
|
|
174
|
+
|
|
175
|
+
.. code-block:: python
|
|
176
|
+
|
|
177
|
+
from sys import stdout
|
|
178
|
+
|
|
179
|
+
from orjson import dumps, OPT_SERIALIZE_NUMPY
|
|
180
|
+
import noregret as nr
|
|
181
|
+
|
|
182
|
+
KERNEL = nr.CUDAKernel()
|
|
183
|
+
GAME = nr.from_open_spiel(KERNEL, 'liars_dice')
|
|
184
|
+
PARAMETERS = nr.CFR, True, False
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def main():
|
|
188
|
+
R_type, alt, pred = PARAMETERS
|
|
189
|
+
R_row = R_type(KERNEL, GAME.row_sequence_form_polytope)
|
|
190
|
+
R_col = R_type(KERNEL, GAME.column_sequence_form_polytope)
|
|
191
|
+
x_bar, y_bar = nr.regret_minimization(
|
|
192
|
+
GAME,
|
|
193
|
+
R_row,
|
|
194
|
+
R_col,
|
|
195
|
+
alternation=alt,
|
|
196
|
+
prediction=pred,
|
|
197
|
+
)
|
|
198
|
+
data = {
|
|
199
|
+
'x_bar': KERNEL.numpy.asnumpy(x_bar),
|
|
200
|
+
'y_bar': KERNEL.numpy.asnumpy(y_bar),
|
|
201
|
+
'Exploitability': GAME.exploitability(x_bar, y_bar).item(),
|
|
202
|
+
'Expected utility': GAME.expected_row_utility(x_bar, y_bar).item(),
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
stdout.buffer.write(dumps(data, option=OPT_SERIALIZE_NUMPY))
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
if __name__ == '__main__':
|
|
209
|
+
main()
|
|
210
|
+
|
|
211
|
+
Solving Games via Linear Programming
|
|
212
|
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
213
|
+
|
|
214
|
+
The code snippet below demonstrates how one can solve games via linear programming using NoRegret.
|
|
215
|
+
|
|
216
|
+
.. code-block:: python
|
|
217
|
+
|
|
218
|
+
import noregret as nr
|
|
219
|
+
|
|
220
|
+
KERNEL = nr.FloatingPointKernel()
|
|
221
|
+
GAMES = {
|
|
222
|
+
'Rock paper superscissors': nr.RockPaperSuperscissors(KERNEL),
|
|
223
|
+
'Kuhn poker': nr.from_open_spiel(KERNEL, 'kuhn_poker'),
|
|
224
|
+
'Leduc poker': nr.from_open_spiel(KERNEL, 'leduc_poker'),
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def main():
|
|
229
|
+
for name, game in GAMES.items():
|
|
230
|
+
x, y = nr.linear_programming(game)
|
|
231
|
+
v = game.expected_row_utility(x, y)
|
|
232
|
+
|
|
233
|
+
print(f'{name}:', v)
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
if __name__ == '__main__':
|
|
237
|
+
main()
|
|
238
|
+
|
|
239
|
+
Conduct Research in Online Convex Optimization
|
|
240
|
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
241
|
+
|
|
242
|
+
The code snippet below reproduces Leme, Piliouras, and Schneider (NeurIPS, 2024) using NoRegret.
|
|
243
|
+
|
|
244
|
+
.. code-block:: python
|
|
245
|
+
|
|
246
|
+
from functools import partial
|
|
247
|
+
|
|
248
|
+
import matplotlib.pyplot as plt
|
|
249
|
+
import noregret as nr
|
|
250
|
+
|
|
251
|
+
KERNEL = nr.FloatingPointKernel()
|
|
252
|
+
GAME = nr.RockPaperScissorsPlus(KERNEL)
|
|
253
|
+
R_type = partial(nr.MWU, learning_rate=1e-3)
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def main():
|
|
257
|
+
RM = R_type(KERNEL, GAME.row_dimension, is_time_symmetric=False)
|
|
258
|
+
BM_RM = nr.BM(KERNEL, GAME.row_dimension, R_type, is_time_symmetric=False)
|
|
259
|
+
|
|
260
|
+
nr.symmetric_regret_minimization(GAME, RM, iteration_count=100000)
|
|
261
|
+
nr.symmetric_regret_minimization(GAME, BM_RM, iteration_count=100000)
|
|
262
|
+
x, _ = nr.linear_programming(GAME)
|
|
263
|
+
|
|
264
|
+
strategies = KERNEL.numpy.array(RM.strategies)
|
|
265
|
+
|
|
266
|
+
plt.clf()
|
|
267
|
+
plt.plot(strategies[:, 0], strategies[:, 1])
|
|
268
|
+
plt.plot(strategies[-1, 0], strategies[-1, 1], 'bo')
|
|
269
|
+
plt.plot(*x[:2], 'ro')
|
|
270
|
+
plt.xlabel('Probability of action 1')
|
|
271
|
+
plt.ylabel('Probability of action 2')
|
|
272
|
+
plt.title('No-external regret dynamics')
|
|
273
|
+
plt.show()
|
|
274
|
+
|
|
275
|
+
strategies = KERNEL.numpy.array(BM_RM.strategies)
|
|
276
|
+
|
|
277
|
+
plt.clf()
|
|
278
|
+
plt.plot(strategies[:, 0], strategies[:, 1])
|
|
279
|
+
plt.plot(strategies[-1, 0], strategies[-1, 1], 'bo')
|
|
280
|
+
plt.plot(*x[:2], 'ro')
|
|
281
|
+
plt.xlabel('Probability of action 1')
|
|
282
|
+
plt.ylabel('Probability of action 2')
|
|
283
|
+
plt.title('No-swap regret dynamics')
|
|
284
|
+
plt.show()
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
if __name__ == '__main__':
|
|
288
|
+
main()
|
|
289
|
+
|
|
290
|
+
Testing and Validation
|
|
291
|
+
----------------------
|
|
292
|
+
|
|
293
|
+
Run style checks.
|
|
294
|
+
|
|
295
|
+
.. code-block:: bash
|
|
296
|
+
|
|
297
|
+
flake8 examples noregret
|
|
298
|
+
|
|
299
|
+
Run doctests.
|
|
300
|
+
|
|
301
|
+
.. code-block:: bash
|
|
302
|
+
|
|
303
|
+
shopt -s globstar
|
|
304
|
+
python -m doctest noregret/**/*.py
|
|
305
|
+
|
|
306
|
+
Run unit tests.
|
|
307
|
+
|
|
308
|
+
.. code-block:: bash
|
|
309
|
+
|
|
310
|
+
python -m unittest
|
|
311
|
+
|
|
312
|
+
Check coverage.
|
|
313
|
+
|
|
314
|
+
.. code-block:: bash
|
|
315
|
+
|
|
316
|
+
shopt -s globstar
|
|
317
|
+
coverage run -m doctest noregret/**/*.py
|
|
318
|
+
coverage run -a -m unittest
|
|
319
|
+
coverage report -m
|
|
320
|
+
coverage html
|
|
321
|
+
|
|
322
|
+
Contributing
|
|
323
|
+
------------
|
|
324
|
+
|
|
325
|
+
Contributions are welcome! Please read our Contributing Guide for more information.
|
|
326
|
+
|
|
327
|
+
License
|
|
328
|
+
-------
|
|
329
|
+
|
|
330
|
+
NoRegret is distributed under the MIT license.
|
|
331
|
+
|
|
332
|
+
Citing
|
|
333
|
+
------
|
|
334
|
+
|
|
335
|
+
If you use NoRegret in your research, please cite our library:
|
|
336
|
+
|
|
337
|
+
.. code-block:: bibtex
|
|
338
|
+
|
|
339
|
+
@misc{kim2026parallelizingcounterfactualregretminimization,
|
|
340
|
+
title={Parallelizing Counterfactual Regret Minimization},
|
|
341
|
+
author={Juho Kim and Tuomas Sandholm},
|
|
342
|
+
year={2026},
|
|
343
|
+
eprint={2605.14277},
|
|
344
|
+
archivePrefix={arXiv},
|
|
345
|
+
primaryClass={cs.AI},
|
|
346
|
+
url={https://arxiv.org/abs/2605.14277},
|
|
347
|
+
}
|
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
========
|
|
2
|
+
NoRegret
|
|
3
|
+
========
|
|
4
|
+
|
|
5
|
+
NoRegret is an open-source software library for no-regret learning dynamics and computational game solving, developed by the Universal, Open, Free, and Transparent Computer Poker Research Group. NoRegret implements an extensive array of regret minimizers and game solvers, and also supports GPU-acceleration. The library can be used in a variety of use cases, from solving games to conducting research in online convex optimization. NoRegret's reliability has been established through extensive doctests and unit tests, achieving 91% code coverage.
|
|
6
|
+
|
|
7
|
+
Features
|
|
8
|
+
--------
|
|
9
|
+
|
|
10
|
+
* Extensive array of regret minimizers and game solvers.
|
|
11
|
+
* High-speed implementations.
|
|
12
|
+
* GPU-accleration.
|
|
13
|
+
|
|
14
|
+
Installation
|
|
15
|
+
------------
|
|
16
|
+
|
|
17
|
+
The NoRegret library requires Python Version 3.12 or above and can be installed using pip:
|
|
18
|
+
|
|
19
|
+
.. code-block:: bash
|
|
20
|
+
|
|
21
|
+
pip install noregret
|
|
22
|
+
|
|
23
|
+
Usages
|
|
24
|
+
------
|
|
25
|
+
|
|
26
|
+
Example usages of NoRegret is shown below.
|
|
27
|
+
|
|
28
|
+
Solving Games via Regret minimization
|
|
29
|
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
30
|
+
|
|
31
|
+
The code snippet below demonstrates how one can solve games via regret minimization using NoRegret.
|
|
32
|
+
|
|
33
|
+
.. code-block:: python
|
|
34
|
+
|
|
35
|
+
from functools import partial
|
|
36
|
+
from math import inf
|
|
37
|
+
|
|
38
|
+
from tqdm import tqdm
|
|
39
|
+
import matplotlib.pyplot as plt
|
|
40
|
+
import noregret as nr
|
|
41
|
+
import pandas as pd
|
|
42
|
+
import seaborn as sns
|
|
43
|
+
|
|
44
|
+
KERNEL = nr.FloatingPointKernel()
|
|
45
|
+
GAMES = {
|
|
46
|
+
'Rock paper superscissors': nr.to_efg(nr.RockPaperSuperscissors(KERNEL)),
|
|
47
|
+
'Kuhn poker': nr.from_open_spiel(KERNEL, 'kuhn_poker'),
|
|
48
|
+
'Leduc poker': nr.from_open_spiel(KERNEL, 'leduc_poker'),
|
|
49
|
+
}
|
|
50
|
+
PARAMETERS = {
|
|
51
|
+
'CFR': (nr.CFR, False, False),
|
|
52
|
+
'CFR+': (nr.CFR_plus, True, False),
|
|
53
|
+
'DCFR': (nr.DCFR, True, False),
|
|
54
|
+
'PCFR+': (partial(nr.CFR_plus, gamma=2), True, True),
|
|
55
|
+
'PCFR+*': (partial(nr.CFR_plus, gamma=inf), True, True),
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def main():
|
|
60
|
+
for name, game in tqdm(GAMES.items()):
|
|
61
|
+
iterations = []
|
|
62
|
+
exploitabilities = []
|
|
63
|
+
expected_utilities = []
|
|
64
|
+
variants = []
|
|
65
|
+
|
|
66
|
+
for variant, (R_type, alt, pred) in tqdm(
|
|
67
|
+
PARAMETERS.items(),
|
|
68
|
+
leave=False,
|
|
69
|
+
):
|
|
70
|
+
R_row = R_type(KERNEL, game.row_sequence_form_polytope)
|
|
71
|
+
R_col = R_type(KERNEL, game.column_sequence_form_polytope)
|
|
72
|
+
|
|
73
|
+
def update():
|
|
74
|
+
t = R_row.iteration_count
|
|
75
|
+
x_bar = R_row.average_strategy
|
|
76
|
+
y_bar = R_col.average_strategy
|
|
77
|
+
epsilon = game.exploitability(x_bar, y_bar)
|
|
78
|
+
u = game.expected_row_utility(x_bar, y_bar)
|
|
79
|
+
|
|
80
|
+
iterations.append(t)
|
|
81
|
+
exploitabilities.append(epsilon)
|
|
82
|
+
expected_utilities.append(u)
|
|
83
|
+
variants.append(variant)
|
|
84
|
+
|
|
85
|
+
nr.regret_minimization(
|
|
86
|
+
game,
|
|
87
|
+
R_row,
|
|
88
|
+
R_col,
|
|
89
|
+
alternation=alt,
|
|
90
|
+
prediction=pred,
|
|
91
|
+
update=update,
|
|
92
|
+
progress_bar={'leave': False},
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
data = {
|
|
96
|
+
'Iteration': iterations,
|
|
97
|
+
'Exploitability': exploitabilities,
|
|
98
|
+
'Expected utility': expected_utilities,
|
|
99
|
+
'Variant': variants,
|
|
100
|
+
}
|
|
101
|
+
df = pd.DataFrame(data)
|
|
102
|
+
|
|
103
|
+
plt.clf()
|
|
104
|
+
sns.lineplot(df, x='Iteration', y='Exploitability', hue='Variant')
|
|
105
|
+
plt.xscale('log')
|
|
106
|
+
plt.yscale('log')
|
|
107
|
+
plt.title(f'Exploitability in {name}')
|
|
108
|
+
plt.show()
|
|
109
|
+
|
|
110
|
+
plt.clf()
|
|
111
|
+
sns.lineplot(df, x='Iteration', y='Expected utility', hue='Variant')
|
|
112
|
+
plt.xscale('log')
|
|
113
|
+
plt.title(f'Expected utility in {name}')
|
|
114
|
+
plt.show()
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
if __name__ == '__main__':
|
|
118
|
+
main()
|
|
119
|
+
|
|
120
|
+
GPU-Accelerated Game Solving
|
|
121
|
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
122
|
+
|
|
123
|
+
The code snippet below demonstrates how one can solve games while leveraging GPU acceleration.
|
|
124
|
+
|
|
125
|
+
.. code-block:: python
|
|
126
|
+
|
|
127
|
+
from sys import stdout
|
|
128
|
+
|
|
129
|
+
from orjson import dumps, OPT_SERIALIZE_NUMPY
|
|
130
|
+
import noregret as nr
|
|
131
|
+
|
|
132
|
+
KERNEL = nr.CUDAKernel()
|
|
133
|
+
GAME = nr.from_open_spiel(KERNEL, 'liars_dice')
|
|
134
|
+
PARAMETERS = nr.CFR, True, False
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def main():
|
|
138
|
+
R_type, alt, pred = PARAMETERS
|
|
139
|
+
R_row = R_type(KERNEL, GAME.row_sequence_form_polytope)
|
|
140
|
+
R_col = R_type(KERNEL, GAME.column_sequence_form_polytope)
|
|
141
|
+
x_bar, y_bar = nr.regret_minimization(
|
|
142
|
+
GAME,
|
|
143
|
+
R_row,
|
|
144
|
+
R_col,
|
|
145
|
+
alternation=alt,
|
|
146
|
+
prediction=pred,
|
|
147
|
+
)
|
|
148
|
+
data = {
|
|
149
|
+
'x_bar': KERNEL.numpy.asnumpy(x_bar),
|
|
150
|
+
'y_bar': KERNEL.numpy.asnumpy(y_bar),
|
|
151
|
+
'Exploitability': GAME.exploitability(x_bar, y_bar).item(),
|
|
152
|
+
'Expected utility': GAME.expected_row_utility(x_bar, y_bar).item(),
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
stdout.buffer.write(dumps(data, option=OPT_SERIALIZE_NUMPY))
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
if __name__ == '__main__':
|
|
159
|
+
main()
|
|
160
|
+
|
|
161
|
+
Solving Games via Linear Programming
|
|
162
|
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
163
|
+
|
|
164
|
+
The code snippet below demonstrates how one can solve games via linear programming using NoRegret.
|
|
165
|
+
|
|
166
|
+
.. code-block:: python
|
|
167
|
+
|
|
168
|
+
import noregret as nr
|
|
169
|
+
|
|
170
|
+
KERNEL = nr.FloatingPointKernel()
|
|
171
|
+
GAMES = {
|
|
172
|
+
'Rock paper superscissors': nr.RockPaperSuperscissors(KERNEL),
|
|
173
|
+
'Kuhn poker': nr.from_open_spiel(KERNEL, 'kuhn_poker'),
|
|
174
|
+
'Leduc poker': nr.from_open_spiel(KERNEL, 'leduc_poker'),
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def main():
|
|
179
|
+
for name, game in GAMES.items():
|
|
180
|
+
x, y = nr.linear_programming(game)
|
|
181
|
+
v = game.expected_row_utility(x, y)
|
|
182
|
+
|
|
183
|
+
print(f'{name}:', v)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
if __name__ == '__main__':
|
|
187
|
+
main()
|
|
188
|
+
|
|
189
|
+
Conduct Research in Online Convex Optimization
|
|
190
|
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
191
|
+
|
|
192
|
+
The code snippet below reproduces Leme, Piliouras, and Schneider (NeurIPS, 2024) using NoRegret.
|
|
193
|
+
|
|
194
|
+
.. code-block:: python
|
|
195
|
+
|
|
196
|
+
from functools import partial
|
|
197
|
+
|
|
198
|
+
import matplotlib.pyplot as plt
|
|
199
|
+
import noregret as nr
|
|
200
|
+
|
|
201
|
+
KERNEL = nr.FloatingPointKernel()
|
|
202
|
+
GAME = nr.RockPaperScissorsPlus(KERNEL)
|
|
203
|
+
R_type = partial(nr.MWU, learning_rate=1e-3)
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def main():
|
|
207
|
+
RM = R_type(KERNEL, GAME.row_dimension, is_time_symmetric=False)
|
|
208
|
+
BM_RM = nr.BM(KERNEL, GAME.row_dimension, R_type, is_time_symmetric=False)
|
|
209
|
+
|
|
210
|
+
nr.symmetric_regret_minimization(GAME, RM, iteration_count=100000)
|
|
211
|
+
nr.symmetric_regret_minimization(GAME, BM_RM, iteration_count=100000)
|
|
212
|
+
x, _ = nr.linear_programming(GAME)
|
|
213
|
+
|
|
214
|
+
strategies = KERNEL.numpy.array(RM.strategies)
|
|
215
|
+
|
|
216
|
+
plt.clf()
|
|
217
|
+
plt.plot(strategies[:, 0], strategies[:, 1])
|
|
218
|
+
plt.plot(strategies[-1, 0], strategies[-1, 1], 'bo')
|
|
219
|
+
plt.plot(*x[:2], 'ro')
|
|
220
|
+
plt.xlabel('Probability of action 1')
|
|
221
|
+
plt.ylabel('Probability of action 2')
|
|
222
|
+
plt.title('No-external regret dynamics')
|
|
223
|
+
plt.show()
|
|
224
|
+
|
|
225
|
+
strategies = KERNEL.numpy.array(BM_RM.strategies)
|
|
226
|
+
|
|
227
|
+
plt.clf()
|
|
228
|
+
plt.plot(strategies[:, 0], strategies[:, 1])
|
|
229
|
+
plt.plot(strategies[-1, 0], strategies[-1, 1], 'bo')
|
|
230
|
+
plt.plot(*x[:2], 'ro')
|
|
231
|
+
plt.xlabel('Probability of action 1')
|
|
232
|
+
plt.ylabel('Probability of action 2')
|
|
233
|
+
plt.title('No-swap regret dynamics')
|
|
234
|
+
plt.show()
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
if __name__ == '__main__':
|
|
238
|
+
main()
|
|
239
|
+
|
|
240
|
+
Testing and Validation
|
|
241
|
+
----------------------
|
|
242
|
+
|
|
243
|
+
Run style checks.
|
|
244
|
+
|
|
245
|
+
.. code-block:: bash
|
|
246
|
+
|
|
247
|
+
flake8 examples noregret
|
|
248
|
+
|
|
249
|
+
Run doctests.
|
|
250
|
+
|
|
251
|
+
.. code-block:: bash
|
|
252
|
+
|
|
253
|
+
shopt -s globstar
|
|
254
|
+
python -m doctest noregret/**/*.py
|
|
255
|
+
|
|
256
|
+
Run unit tests.
|
|
257
|
+
|
|
258
|
+
.. code-block:: bash
|
|
259
|
+
|
|
260
|
+
python -m unittest
|
|
261
|
+
|
|
262
|
+
Check coverage.
|
|
263
|
+
|
|
264
|
+
.. code-block:: bash
|
|
265
|
+
|
|
266
|
+
shopt -s globstar
|
|
267
|
+
coverage run -m doctest noregret/**/*.py
|
|
268
|
+
coverage run -a -m unittest
|
|
269
|
+
coverage report -m
|
|
270
|
+
coverage html
|
|
271
|
+
|
|
272
|
+
Contributing
|
|
273
|
+
------------
|
|
274
|
+
|
|
275
|
+
Contributions are welcome! Please read our Contributing Guide for more information.
|
|
276
|
+
|
|
277
|
+
License
|
|
278
|
+
-------
|
|
279
|
+
|
|
280
|
+
NoRegret is distributed under the MIT license.
|
|
281
|
+
|
|
282
|
+
Citing
|
|
283
|
+
------
|
|
284
|
+
|
|
285
|
+
If you use NoRegret in your research, please cite our library:
|
|
286
|
+
|
|
287
|
+
.. code-block:: bibtex
|
|
288
|
+
|
|
289
|
+
@misc{kim2026parallelizingcounterfactualregretminimization,
|
|
290
|
+
title={Parallelizing Counterfactual Regret Minimization},
|
|
291
|
+
author={Juho Kim and Tuomas Sandholm},
|
|
292
|
+
year={2026},
|
|
293
|
+
eprint={2605.14277},
|
|
294
|
+
archivePrefix={arXiv},
|
|
295
|
+
primaryClass={cs.AI},
|
|
296
|
+
url={https://arxiv.org/abs/2605.14277},
|
|
297
|
+
}
|