noregret 0.0.0.dev7__tar.gz → 0.0.0.dev9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {noregret-0.0.0.dev7 → noregret-0.0.0.dev9}/PKG-INFO +52 -56
- {noregret-0.0.0.dev7 → noregret-0.0.0.dev9}/README.rst +51 -55
- {noregret-0.0.0.dev7 → noregret-0.0.0.dev9}/noregret/__init__.py +27 -2
- {noregret-0.0.0.dev7 → noregret-0.0.0.dev9}/noregret/games/__init__.py +8 -1
- {noregret-0.0.0.dev7 → noregret-0.0.0.dev9}/noregret/games/black_box.py +91 -17
- noregret-0.0.0.dev7/noregret/games/extensive_form/games.py → noregret-0.0.0.dev9/noregret/games/extensive_form.py +26 -36
- noregret-0.0.0.dev7/noregret/games/normal_form/games.py → noregret-0.0.0.dev9/noregret/games/normal_form.py +2 -2
- {noregret-0.0.0.dev7 → noregret-0.0.0.dev9}/noregret/kernels.py +14 -11
- {noregret-0.0.0.dev7 → noregret-0.0.0.dev9}/noregret/regret_minimizers/__init__.py +6 -0
- noregret-0.0.0.dev9/noregret/regret_minimizers/stochastic.py +202 -0
- {noregret-0.0.0.dev7 → noregret-0.0.0.dev9}/noregret/solvers/__init__.py +2 -0
- {noregret-0.0.0.dev7 → noregret-0.0.0.dev9}/noregret/solvers/linear_programming.py +2 -4
- {noregret-0.0.0.dev7 → noregret-0.0.0.dev9}/noregret/solvers/regret_minimization.py +83 -22
- {noregret-0.0.0.dev7 → noregret-0.0.0.dev9}/noregret/tests/test_games.py +56 -33
- noregret-0.0.0.dev9/noregret/tests/test_linear_programming.py +36 -0
- noregret-0.0.0.dev9/noregret/tests/test_regret_minimization.py +263 -0
- {noregret-0.0.0.dev7 → noregret-0.0.0.dev9}/noregret/tests/test_sequence_form_polytopes.py +28 -28
- {noregret-0.0.0.dev7 → noregret-0.0.0.dev9}/noregret/utilities.py +14 -0
- {noregret-0.0.0.dev7 → noregret-0.0.0.dev9}/noregret.egg-info/PKG-INFO +52 -56
- {noregret-0.0.0.dev7 → noregret-0.0.0.dev9}/noregret.egg-info/SOURCES.txt +14 -15
- {noregret-0.0.0.dev7 → noregret-0.0.0.dev9}/setup.py +1 -1
- noregret-0.0.0.dev7/noregret/games/extensive_form/__init__.py +0 -14
- noregret-0.0.0.dev7/noregret/games/normal_form/__init__.py +0 -36
- noregret-0.0.0.dev7/noregret/tests/test_linear_programming.py +0 -36
- noregret-0.0.0.dev7/noregret/tests/test_regret_minimization.py +0 -213
- {noregret-0.0.0.dev7 → noregret-0.0.0.dev9}/LICENSE +0 -0
- {noregret-0.0.0.dev7/noregret/games/normal_form → noregret-0.0.0.dev9/noregret/games/examples}/assurance-game.json +0 -0
- {noregret-0.0.0.dev7/noregret/games/normal_form → noregret-0.0.0.dev9/noregret/games/examples}/battle-of-the-sexes.json +0 -0
- {noregret-0.0.0.dev7/noregret/games/normal_form → noregret-0.0.0.dev9/noregret/games/examples}/chicken.json +0 -0
- {noregret-0.0.0.dev7/noregret/games/normal_form → noregret-0.0.0.dev9/noregret/games/examples}/gift-exchange-game.json +0 -0
- {noregret-0.0.0.dev7/noregret/games/normal_form → noregret-0.0.0.dev9/noregret/games/examples}/matching-pennies.json +0 -0
- {noregret-0.0.0.dev7/noregret/games/normal_form → noregret-0.0.0.dev9/noregret/games/examples}/prisoners-dilemma.json +0 -0
- {noregret-0.0.0.dev7/noregret/games/normal_form → noregret-0.0.0.dev9/noregret/games/examples}/pure-coordination.json +0 -0
- {noregret-0.0.0.dev7/noregret/games/normal_form → noregret-0.0.0.dev9/noregret/games/examples}/rock-paper-scissors-plus.json +0 -0
- {noregret-0.0.0.dev7/noregret/games/normal_form → noregret-0.0.0.dev9/noregret/games/examples}/rock-paper-scissors.json +0 -0
- {noregret-0.0.0.dev7/noregret/games/normal_form → noregret-0.0.0.dev9/noregret/games/examples}/rock-paper-superscissors.json +0 -0
- {noregret-0.0.0.dev7/noregret/games/normal_form → noregret-0.0.0.dev9/noregret/games/examples}/stag-hunt.json +0 -0
- {noregret-0.0.0.dev7 → noregret-0.0.0.dev9}/noregret/games/games.py +0 -0
- {noregret-0.0.0.dev7 → noregret-0.0.0.dev9}/noregret/games/multilinear.py +0 -0
- {noregret-0.0.0.dev7 → noregret-0.0.0.dev9}/noregret/regret_minimizers/probability_simplices.py +0 -0
- {noregret-0.0.0.dev7 → noregret-0.0.0.dev9}/noregret/regret_minimizers/regret_minimizers.py +0 -0
- {noregret-0.0.0.dev7 → noregret-0.0.0.dev9}/noregret/regret_minimizers/sequence_form_polytopes.py +0 -0
- {noregret-0.0.0.dev7 → noregret-0.0.0.dev9}/noregret/sequence_form_polytopes.py +0 -0
- {noregret-0.0.0.dev7 → noregret-0.0.0.dev9}/noregret/tests/__init__.py +0 -0
- {noregret-0.0.0.dev7 → noregret-0.0.0.dev9}/noregret.egg-info/dependency_links.txt +0 -0
- {noregret-0.0.0.dev7 → noregret-0.0.0.dev9}/noregret.egg-info/requires.txt +0 -0
- {noregret-0.0.0.dev7 → noregret-0.0.0.dev9}/noregret.egg-info/top_level.txt +0 -0
- {noregret-0.0.0.dev7 → noregret-0.0.0.dev9}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: noregret
|
|
3
|
-
Version: 0.0.0.
|
|
3
|
+
Version: 0.0.0.dev9
|
|
4
4
|
Summary: No-regret learning dynamics
|
|
5
5
|
Home-page: https://github.com/uoftcprg/noregret
|
|
6
6
|
Author: Universal, Open, Free, and Transparent Computer Poker Research Group
|
|
@@ -52,7 +52,7 @@ Dynamic: summary
|
|
|
52
52
|
NoRegret
|
|
53
53
|
========
|
|
54
54
|
|
|
55
|
-
NoRegret is an open-source software library for no-regret learning dynamics and computational game solving, developed by the Universal, Open, Free, and Transparent Computer Poker Research Group. NoRegret implements an extensive array of regret minimizers and game solvers, and also supports GPU-acceleration. The library can be used in a variety of use cases, from solving games to conducting research in online convex optimization. NoRegret's reliability has been established through extensive doctests and unit tests, achieving
|
|
55
|
+
NoRegret is an open-source software library for no-regret learning dynamics and computational game solving, developed by the Universal, Open, Free, and Transparent Computer Poker Research Group. NoRegret implements an extensive array of regret minimizers and game solvers, and also supports GPU-acceleration. The library can be used in a variety of use cases, from solving games to conducting research in online convex optimization. NoRegret's reliability has been established through extensive doctests and unit tests, achieving 96% code coverage.
|
|
56
56
|
|
|
57
57
|
Features
|
|
58
58
|
--------
|
|
@@ -84,18 +84,18 @@ The code snippet below demonstrates how one can solve games via regret minimizat
|
|
|
84
84
|
|
|
85
85
|
from functools import partial
|
|
86
86
|
from math import inf
|
|
87
|
-
|
|
87
|
+
|
|
88
88
|
from tqdm import tqdm
|
|
89
89
|
import matplotlib.pyplot as plt
|
|
90
90
|
import noregret as nr
|
|
91
91
|
import pandas as pd
|
|
92
92
|
import seaborn as sns
|
|
93
|
-
|
|
94
|
-
|
|
93
|
+
|
|
94
|
+
KER = nr.FPKer()
|
|
95
95
|
GAMES = {
|
|
96
|
-
'Rock paper superscissors': nr.to_efg(nr.RockPaperSuperscissors(
|
|
97
|
-
'Kuhn poker': nr.to_efg(
|
|
98
|
-
'Leduc poker': nr.to_efg(
|
|
96
|
+
'Rock paper superscissors': nr.to_efg(KER, nr.RockPaperSuperscissors(KER)),
|
|
97
|
+
'Kuhn poker': nr.to_efg(KER, nr.open_spiel_game(KER, 'kuhn_poker')),
|
|
98
|
+
'Leduc poker': nr.to_efg(KER, nr.open_spiel_game(KER, 'leduc_poker')),
|
|
99
99
|
}
|
|
100
100
|
PARAMETERS = {
|
|
101
101
|
'CFR': (nr.CFR, False, False),
|
|
@@ -104,35 +104,35 @@ The code snippet below demonstrates how one can solve games via regret minimizat
|
|
|
104
104
|
'PCFR+': (partial(nr.CFR_plus, gamma=2), True, True),
|
|
105
105
|
'PCFR+*': (partial(nr.CFR_plus, gamma=inf), True, True),
|
|
106
106
|
}
|
|
107
|
-
|
|
108
|
-
|
|
107
|
+
|
|
108
|
+
|
|
109
109
|
def main():
|
|
110
110
|
for name, game in tqdm(GAMES.items()):
|
|
111
111
|
iterations = []
|
|
112
112
|
exploitabilities = []
|
|
113
113
|
expected_utilities = []
|
|
114
114
|
variants = []
|
|
115
|
-
|
|
115
|
+
|
|
116
116
|
for variant, (R_type, alt, pred) in tqdm(
|
|
117
117
|
PARAMETERS.items(),
|
|
118
118
|
leave=False,
|
|
119
119
|
):
|
|
120
|
-
R_row = R_type(
|
|
121
|
-
R_col = R_type(
|
|
122
|
-
|
|
120
|
+
R_row = R_type(KER, game.row_sequence_form_polytope)
|
|
121
|
+
R_col = R_type(KER, game.column_sequence_form_polytope)
|
|
122
|
+
|
|
123
123
|
def update():
|
|
124
124
|
t = R_row.iteration_count
|
|
125
125
|
x_bar = R_row.average_strategy
|
|
126
126
|
y_bar = R_col.average_strategy
|
|
127
127
|
epsilon = game.exploitability(x_bar, y_bar)
|
|
128
128
|
u = game.expected_row_utility(x_bar, y_bar)
|
|
129
|
-
|
|
129
|
+
|
|
130
130
|
iterations.append(t)
|
|
131
131
|
exploitabilities.append(epsilon)
|
|
132
132
|
expected_utilities.append(u)
|
|
133
133
|
variants.append(variant)
|
|
134
|
-
|
|
135
|
-
nr.
|
|
134
|
+
|
|
135
|
+
nr.rm(
|
|
136
136
|
game,
|
|
137
137
|
R_row,
|
|
138
138
|
R_col,
|
|
@@ -141,7 +141,7 @@ The code snippet below demonstrates how one can solve games via regret minimizat
|
|
|
141
141
|
update=update,
|
|
142
142
|
progress_bar={'leave': False},
|
|
143
143
|
)
|
|
144
|
-
|
|
144
|
+
|
|
145
145
|
data = {
|
|
146
146
|
'Iteration': iterations,
|
|
147
147
|
'Exploitability': exploitabilities,
|
|
@@ -149,21 +149,21 @@ The code snippet below demonstrates how one can solve games via regret minimizat
|
|
|
149
149
|
'Variant': variants,
|
|
150
150
|
}
|
|
151
151
|
df = pd.DataFrame(data)
|
|
152
|
-
|
|
152
|
+
|
|
153
153
|
plt.clf()
|
|
154
154
|
sns.lineplot(df, x='Iteration', y='Exploitability', hue='Variant')
|
|
155
155
|
plt.xscale('log')
|
|
156
156
|
plt.yscale('log')
|
|
157
157
|
plt.title(f'Exploitability in {name}')
|
|
158
158
|
plt.show()
|
|
159
|
-
|
|
159
|
+
|
|
160
160
|
plt.clf()
|
|
161
161
|
sns.lineplot(df, x='Iteration', y='Expected utility', hue='Variant')
|
|
162
162
|
plt.xscale('log')
|
|
163
163
|
plt.title(f'Expected utility in {name}')
|
|
164
164
|
plt.show()
|
|
165
|
-
|
|
166
|
-
|
|
165
|
+
|
|
166
|
+
|
|
167
167
|
if __name__ == '__main__':
|
|
168
168
|
main()
|
|
169
169
|
|
|
@@ -173,38 +173,34 @@ GPU-Accelerated Game Solving
|
|
|
173
173
|
The code snippet below demonstrates how one can solve games while leveraging GPU acceleration.
|
|
174
174
|
|
|
175
175
|
.. code-block:: python
|
|
176
|
-
|
|
176
|
+
|
|
177
177
|
from sys import stdout
|
|
178
|
-
|
|
178
|
+
|
|
179
179
|
from orjson import dumps, OPT_SERIALIZE_NUMPY
|
|
180
180
|
import noregret as nr
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
GAME = nr.
|
|
181
|
+
|
|
182
|
+
CPU_KER = nr.FPKer()
|
|
183
|
+
GAME = nr.open_spiel_game(CPU_KER, 'liars_dice')
|
|
184
|
+
GPU_KER = nr.CUDAKer()
|
|
185
|
+
GAME = nr.to_efg(GPU_KER, GAME)
|
|
184
186
|
PARAMETERS = nr.CFR, True, False
|
|
185
|
-
|
|
186
|
-
|
|
187
|
+
|
|
188
|
+
|
|
187
189
|
def main():
|
|
188
190
|
R_type, alt, pred = PARAMETERS
|
|
189
|
-
R_row = R_type(
|
|
190
|
-
R_col = R_type(
|
|
191
|
-
x_bar, y_bar = nr.
|
|
192
|
-
GAME,
|
|
193
|
-
R_row,
|
|
194
|
-
R_col,
|
|
195
|
-
alternation=alt,
|
|
196
|
-
prediction=pred,
|
|
197
|
-
)
|
|
191
|
+
R_row = R_type(GPU_KER, GAME.row_sequence_form_polytope)
|
|
192
|
+
R_col = R_type(GPU_KER, GAME.column_sequence_form_polytope)
|
|
193
|
+
x_bar, y_bar = nr.rm(GAME, R_row, R_col, alternation=alt, prediction=pred)
|
|
198
194
|
data = {
|
|
199
|
-
'x_bar':
|
|
200
|
-
'y_bar':
|
|
195
|
+
'x_bar': GPU_KER.numpy.asnumpy(x_bar),
|
|
196
|
+
'y_bar': GPU_KER.numpy.asnumpy(y_bar),
|
|
201
197
|
'Exploitability': GAME.exploitability(x_bar, y_bar).item(),
|
|
202
198
|
'Expected utility': GAME.expected_row_utility(x_bar, y_bar).item(),
|
|
203
199
|
}
|
|
204
|
-
|
|
200
|
+
|
|
205
201
|
stdout.buffer.write(dumps(data, option=OPT_SERIALIZE_NUMPY))
|
|
206
|
-
|
|
207
|
-
|
|
202
|
+
|
|
203
|
+
|
|
208
204
|
if __name__ == '__main__':
|
|
209
205
|
main()
|
|
210
206
|
|
|
@@ -216,23 +212,23 @@ The code snippet below demonstrates how one can solve games via linear programmi
|
|
|
216
212
|
.. code-block:: python
|
|
217
213
|
|
|
218
214
|
import noregret as nr
|
|
219
|
-
|
|
220
|
-
|
|
215
|
+
|
|
216
|
+
KER = nr.FPKer()
|
|
221
217
|
GAMES = {
|
|
222
|
-
'Rock paper superscissors': nr.RockPaperSuperscissors(
|
|
223
|
-
'Kuhn poker': nr.to_efg(
|
|
224
|
-
'Leduc poker': nr.to_efg(
|
|
218
|
+
'Rock paper superscissors': nr.RockPaperSuperscissors(KER),
|
|
219
|
+
'Kuhn poker': nr.to_efg(KER, nr.open_spiel_game(KER, 'kuhn_poker')),
|
|
220
|
+
'Leduc poker': nr.to_efg(KER, nr.open_spiel_game(KER, 'leduc_poker')),
|
|
225
221
|
}
|
|
226
|
-
|
|
227
|
-
|
|
222
|
+
|
|
223
|
+
|
|
228
224
|
def main():
|
|
229
225
|
for name, game in GAMES.items():
|
|
230
|
-
x, y = nr.
|
|
226
|
+
x, y = nr.lp(game)
|
|
231
227
|
v = game.expected_row_utility(x, y)
|
|
232
|
-
|
|
228
|
+
|
|
233
229
|
print(f'{name}:', v)
|
|
234
|
-
|
|
235
|
-
|
|
230
|
+
|
|
231
|
+
|
|
236
232
|
if __name__ == '__main__':
|
|
237
233
|
main()
|
|
238
234
|
|
|
@@ -286,11 +282,11 @@ If you use NoRegret in your research, please cite our library:
|
|
|
286
282
|
.. code-block:: bibtex
|
|
287
283
|
|
|
288
284
|
@misc{kim2026parallelizingcounterfactualregretminimization,
|
|
289
|
-
title={Parallelizing Counterfactual Regret Minimization},
|
|
285
|
+
title={Parallelizing Counterfactual Regret Minimization},
|
|
290
286
|
author={Juho Kim and Tuomas Sandholm},
|
|
291
287
|
year={2026},
|
|
292
288
|
eprint={2605.14277},
|
|
293
289
|
archivePrefix={arXiv},
|
|
294
290
|
primaryClass={cs.AI},
|
|
295
|
-
url={https://arxiv.org/abs/2605.14277},
|
|
291
|
+
url={https://arxiv.org/abs/2605.14277},
|
|
296
292
|
}
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
NoRegret
|
|
3
3
|
========
|
|
4
4
|
|
|
5
|
-
NoRegret is an open-source software library for no-regret learning dynamics and computational game solving, developed by the Universal, Open, Free, and Transparent Computer Poker Research Group. NoRegret implements an extensive array of regret minimizers and game solvers, and also supports GPU-acceleration. The library can be used in a variety of use cases, from solving games to conducting research in online convex optimization. NoRegret's reliability has been established through extensive doctests and unit tests, achieving
|
|
5
|
+
NoRegret is an open-source software library for no-regret learning dynamics and computational game solving, developed by the Universal, Open, Free, and Transparent Computer Poker Research Group. NoRegret implements an extensive array of regret minimizers and game solvers, and also supports GPU-acceleration. The library can be used in a variety of use cases, from solving games to conducting research in online convex optimization. NoRegret's reliability has been established through extensive doctests and unit tests, achieving 96% code coverage.
|
|
6
6
|
|
|
7
7
|
Features
|
|
8
8
|
--------
|
|
@@ -34,18 +34,18 @@ The code snippet below demonstrates how one can solve games via regret minimizat
|
|
|
34
34
|
|
|
35
35
|
from functools import partial
|
|
36
36
|
from math import inf
|
|
37
|
-
|
|
37
|
+
|
|
38
38
|
from tqdm import tqdm
|
|
39
39
|
import matplotlib.pyplot as plt
|
|
40
40
|
import noregret as nr
|
|
41
41
|
import pandas as pd
|
|
42
42
|
import seaborn as sns
|
|
43
|
-
|
|
44
|
-
|
|
43
|
+
|
|
44
|
+
KER = nr.FPKer()
|
|
45
45
|
GAMES = {
|
|
46
|
-
'Rock paper superscissors': nr.to_efg(nr.RockPaperSuperscissors(
|
|
47
|
-
'Kuhn poker': nr.to_efg(
|
|
48
|
-
'Leduc poker': nr.to_efg(
|
|
46
|
+
'Rock paper superscissors': nr.to_efg(KER, nr.RockPaperSuperscissors(KER)),
|
|
47
|
+
'Kuhn poker': nr.to_efg(KER, nr.open_spiel_game(KER, 'kuhn_poker')),
|
|
48
|
+
'Leduc poker': nr.to_efg(KER, nr.open_spiel_game(KER, 'leduc_poker')),
|
|
49
49
|
}
|
|
50
50
|
PARAMETERS = {
|
|
51
51
|
'CFR': (nr.CFR, False, False),
|
|
@@ -54,35 +54,35 @@ The code snippet below demonstrates how one can solve games via regret minimizat
|
|
|
54
54
|
'PCFR+': (partial(nr.CFR_plus, gamma=2), True, True),
|
|
55
55
|
'PCFR+*': (partial(nr.CFR_plus, gamma=inf), True, True),
|
|
56
56
|
}
|
|
57
|
-
|
|
58
|
-
|
|
57
|
+
|
|
58
|
+
|
|
59
59
|
def main():
|
|
60
60
|
for name, game in tqdm(GAMES.items()):
|
|
61
61
|
iterations = []
|
|
62
62
|
exploitabilities = []
|
|
63
63
|
expected_utilities = []
|
|
64
64
|
variants = []
|
|
65
|
-
|
|
65
|
+
|
|
66
66
|
for variant, (R_type, alt, pred) in tqdm(
|
|
67
67
|
PARAMETERS.items(),
|
|
68
68
|
leave=False,
|
|
69
69
|
):
|
|
70
|
-
R_row = R_type(
|
|
71
|
-
R_col = R_type(
|
|
72
|
-
|
|
70
|
+
R_row = R_type(KER, game.row_sequence_form_polytope)
|
|
71
|
+
R_col = R_type(KER, game.column_sequence_form_polytope)
|
|
72
|
+
|
|
73
73
|
def update():
|
|
74
74
|
t = R_row.iteration_count
|
|
75
75
|
x_bar = R_row.average_strategy
|
|
76
76
|
y_bar = R_col.average_strategy
|
|
77
77
|
epsilon = game.exploitability(x_bar, y_bar)
|
|
78
78
|
u = game.expected_row_utility(x_bar, y_bar)
|
|
79
|
-
|
|
79
|
+
|
|
80
80
|
iterations.append(t)
|
|
81
81
|
exploitabilities.append(epsilon)
|
|
82
82
|
expected_utilities.append(u)
|
|
83
83
|
variants.append(variant)
|
|
84
|
-
|
|
85
|
-
nr.
|
|
84
|
+
|
|
85
|
+
nr.rm(
|
|
86
86
|
game,
|
|
87
87
|
R_row,
|
|
88
88
|
R_col,
|
|
@@ -91,7 +91,7 @@ The code snippet below demonstrates how one can solve games via regret minimizat
|
|
|
91
91
|
update=update,
|
|
92
92
|
progress_bar={'leave': False},
|
|
93
93
|
)
|
|
94
|
-
|
|
94
|
+
|
|
95
95
|
data = {
|
|
96
96
|
'Iteration': iterations,
|
|
97
97
|
'Exploitability': exploitabilities,
|
|
@@ -99,21 +99,21 @@ The code snippet below demonstrates how one can solve games via regret minimizat
|
|
|
99
99
|
'Variant': variants,
|
|
100
100
|
}
|
|
101
101
|
df = pd.DataFrame(data)
|
|
102
|
-
|
|
102
|
+
|
|
103
103
|
plt.clf()
|
|
104
104
|
sns.lineplot(df, x='Iteration', y='Exploitability', hue='Variant')
|
|
105
105
|
plt.xscale('log')
|
|
106
106
|
plt.yscale('log')
|
|
107
107
|
plt.title(f'Exploitability in {name}')
|
|
108
108
|
plt.show()
|
|
109
|
-
|
|
109
|
+
|
|
110
110
|
plt.clf()
|
|
111
111
|
sns.lineplot(df, x='Iteration', y='Expected utility', hue='Variant')
|
|
112
112
|
plt.xscale('log')
|
|
113
113
|
plt.title(f'Expected utility in {name}')
|
|
114
114
|
plt.show()
|
|
115
|
-
|
|
116
|
-
|
|
115
|
+
|
|
116
|
+
|
|
117
117
|
if __name__ == '__main__':
|
|
118
118
|
main()
|
|
119
119
|
|
|
@@ -123,38 +123,34 @@ GPU-Accelerated Game Solving
|
|
|
123
123
|
The code snippet below demonstrates how one can solve games while leveraging GPU acceleration.
|
|
124
124
|
|
|
125
125
|
.. code-block:: python
|
|
126
|
-
|
|
126
|
+
|
|
127
127
|
from sys import stdout
|
|
128
|
-
|
|
128
|
+
|
|
129
129
|
from orjson import dumps, OPT_SERIALIZE_NUMPY
|
|
130
130
|
import noregret as nr
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
GAME = nr.
|
|
131
|
+
|
|
132
|
+
CPU_KER = nr.FPKer()
|
|
133
|
+
GAME = nr.open_spiel_game(CPU_KER, 'liars_dice')
|
|
134
|
+
GPU_KER = nr.CUDAKer()
|
|
135
|
+
GAME = nr.to_efg(GPU_KER, GAME)
|
|
134
136
|
PARAMETERS = nr.CFR, True, False
|
|
135
|
-
|
|
136
|
-
|
|
137
|
+
|
|
138
|
+
|
|
137
139
|
def main():
|
|
138
140
|
R_type, alt, pred = PARAMETERS
|
|
139
|
-
R_row = R_type(
|
|
140
|
-
R_col = R_type(
|
|
141
|
-
x_bar, y_bar = nr.
|
|
142
|
-
GAME,
|
|
143
|
-
R_row,
|
|
144
|
-
R_col,
|
|
145
|
-
alternation=alt,
|
|
146
|
-
prediction=pred,
|
|
147
|
-
)
|
|
141
|
+
R_row = R_type(GPU_KER, GAME.row_sequence_form_polytope)
|
|
142
|
+
R_col = R_type(GPU_KER, GAME.column_sequence_form_polytope)
|
|
143
|
+
x_bar, y_bar = nr.rm(GAME, R_row, R_col, alternation=alt, prediction=pred)
|
|
148
144
|
data = {
|
|
149
|
-
'x_bar':
|
|
150
|
-
'y_bar':
|
|
145
|
+
'x_bar': GPU_KER.numpy.asnumpy(x_bar),
|
|
146
|
+
'y_bar': GPU_KER.numpy.asnumpy(y_bar),
|
|
151
147
|
'Exploitability': GAME.exploitability(x_bar, y_bar).item(),
|
|
152
148
|
'Expected utility': GAME.expected_row_utility(x_bar, y_bar).item(),
|
|
153
149
|
}
|
|
154
|
-
|
|
150
|
+
|
|
155
151
|
stdout.buffer.write(dumps(data, option=OPT_SERIALIZE_NUMPY))
|
|
156
|
-
|
|
157
|
-
|
|
152
|
+
|
|
153
|
+
|
|
158
154
|
if __name__ == '__main__':
|
|
159
155
|
main()
|
|
160
156
|
|
|
@@ -166,23 +162,23 @@ The code snippet below demonstrates how one can solve games via linear programmi
|
|
|
166
162
|
.. code-block:: python
|
|
167
163
|
|
|
168
164
|
import noregret as nr
|
|
169
|
-
|
|
170
|
-
|
|
165
|
+
|
|
166
|
+
KER = nr.FPKer()
|
|
171
167
|
GAMES = {
|
|
172
|
-
'Rock paper superscissors': nr.RockPaperSuperscissors(
|
|
173
|
-
'Kuhn poker': nr.to_efg(
|
|
174
|
-
'Leduc poker': nr.to_efg(
|
|
168
|
+
'Rock paper superscissors': nr.RockPaperSuperscissors(KER),
|
|
169
|
+
'Kuhn poker': nr.to_efg(KER, nr.open_spiel_game(KER, 'kuhn_poker')),
|
|
170
|
+
'Leduc poker': nr.to_efg(KER, nr.open_spiel_game(KER, 'leduc_poker')),
|
|
175
171
|
}
|
|
176
|
-
|
|
177
|
-
|
|
172
|
+
|
|
173
|
+
|
|
178
174
|
def main():
|
|
179
175
|
for name, game in GAMES.items():
|
|
180
|
-
x, y = nr.
|
|
176
|
+
x, y = nr.lp(game)
|
|
181
177
|
v = game.expected_row_utility(x, y)
|
|
182
|
-
|
|
178
|
+
|
|
183
179
|
print(f'{name}:', v)
|
|
184
|
-
|
|
185
|
-
|
|
180
|
+
|
|
181
|
+
|
|
186
182
|
if __name__ == '__main__':
|
|
187
183
|
main()
|
|
188
184
|
|
|
@@ -236,11 +232,11 @@ If you use NoRegret in your research, please cite our library:
|
|
|
236
232
|
.. code-block:: bibtex
|
|
237
233
|
|
|
238
234
|
@misc{kim2026parallelizingcounterfactualregretminimization,
|
|
239
|
-
title={Parallelizing Counterfactual Regret Minimization},
|
|
235
|
+
title={Parallelizing Counterfactual Regret Minimization},
|
|
240
236
|
author={Juho Kim and Tuomas Sandholm},
|
|
241
237
|
year={2026},
|
|
242
238
|
eprint={2605.14277},
|
|
243
239
|
archivePrefix={arXiv},
|
|
244
240
|
primaryClass={cs.AI},
|
|
245
|
-
url={https://arxiv.org/abs/2605.14277},
|
|
241
|
+
url={https://arxiv.org/abs/2605.14277},
|
|
246
242
|
}
|
|
@@ -18,6 +18,7 @@ from noregret.games import (
|
|
|
18
18
|
RockPaperScissorsPlus,
|
|
19
19
|
RockPaperSuperscissors,
|
|
20
20
|
StagHunt,
|
|
21
|
+
StrategyProfile,
|
|
21
22
|
to_extensive_form_game,
|
|
22
23
|
TwoPlayerExtensiveFormGame,
|
|
23
24
|
TwoPlayerGame,
|
|
@@ -27,6 +28,7 @@ from noregret.games import (
|
|
|
27
28
|
TwoPlayerZeroSumGame,
|
|
28
29
|
TwoPlayerZeroSumMultilinearGame,
|
|
29
30
|
TwoPlayerZeroSumNormalFormGame,
|
|
31
|
+
UniformStrategyProfile,
|
|
30
32
|
)
|
|
31
33
|
from noregret.kernels import (
|
|
32
34
|
CUDAKernel,
|
|
@@ -46,6 +48,7 @@ from noregret.regret_minimizers import (
|
|
|
46
48
|
EuclideanRegularization,
|
|
47
49
|
FollowTheRegularizedLeader,
|
|
48
50
|
MirrorDescent,
|
|
51
|
+
MonteCarloCounterfactualRegretMinimization,
|
|
49
52
|
MultiplicativeWeightsUpdate,
|
|
50
53
|
OnlineGradientDescent,
|
|
51
54
|
ProbabilitySimplexRegretMinimizer,
|
|
@@ -54,15 +57,17 @@ from noregret.regret_minimizers import (
|
|
|
54
57
|
RegretMatchingPlus,
|
|
55
58
|
RegretMinimizer,
|
|
56
59
|
SequenceFormPolytopeRegretMinimizer,
|
|
60
|
+
StochasticRegretMinimizer,
|
|
57
61
|
SwapRegretMinimizer,
|
|
58
62
|
)
|
|
59
63
|
from noregret.sequence_form_polytopes import SequenceFormPolytope
|
|
60
64
|
from noregret.solvers import (
|
|
61
65
|
linear_programming,
|
|
62
66
|
regret_minimization,
|
|
67
|
+
stochastic_regret_minimization,
|
|
63
68
|
symmetric_regret_minimization,
|
|
64
69
|
)
|
|
65
|
-
from noregret.utilities import import_object, tuple_or_none
|
|
70
|
+
from noregret.utilities import import_object, sample, tuple_or_none
|
|
66
71
|
|
|
67
72
|
BM = BlumMansour
|
|
68
73
|
"""Alias for :class:`noregret.BlumMansour`."""
|
|
@@ -72,6 +77,8 @@ CFR2 = CounterfactualRegretMinimization2
|
|
|
72
77
|
"""Alias for :class:`noregret.CounterfactualRegretMinimization2`."""
|
|
73
78
|
CFR_plus = CounterfactualRegretMinimizationPlus
|
|
74
79
|
"""Alias for :class:`noregret.CounterfactualRegretMinimizationPlus`."""
|
|
80
|
+
CUDAKer = CUDAKernel
|
|
81
|
+
"""Alias for :class:`CUDAKernel`."""
|
|
75
82
|
DCFR = DiscountedCounterfactualRegretMinimization
|
|
76
83
|
"""Alias for
|
|
77
84
|
:class:`noregret.DiscountedCounterfactualRegretMinimization`.
|
|
@@ -86,10 +93,16 @@ EFG = ExtensiveFormGame
|
|
|
86
93
|
"""Alias for :class:`noregret.ExtensiveFormGame`."""
|
|
87
94
|
ER = EuclideanRegularization
|
|
88
95
|
"""Alias for :class:`noregret.EuclideanRegularization`."""
|
|
96
|
+
FPKer = FloatingPointKernel
|
|
97
|
+
"""Alias for :class:`FloatingPointKernel`."""
|
|
89
98
|
FTRL = FollowTheRegularizedLeader
|
|
90
99
|
"""Alias for :class:`noregret.FollowTheRegularizedLeader`."""
|
|
91
100
|
lp = linear_programming
|
|
92
101
|
"""Alias for :func:`noregret.linear_programming`."""
|
|
102
|
+
MCCFR = MonteCarloCounterfactualRegretMinimization
|
|
103
|
+
"""Alias for
|
|
104
|
+
:class:`noregret.MonteCarloCounterfactualRegretMinimization`.
|
|
105
|
+
"""
|
|
93
106
|
MD = MirrorDescent
|
|
94
107
|
"""Alias for :class:`noregret.MirrorDescent`."""
|
|
95
108
|
MWU = MultiplicativeWeightsUpdate
|
|
@@ -108,6 +121,8 @@ RM = RegretMatching
|
|
|
108
121
|
"""Alias for :class:`noregret.RegretMatching`."""
|
|
109
122
|
rm = regret_minimization
|
|
110
123
|
"""Alias for :func:`noregret.regret_minimization`."""
|
|
124
|
+
stochastic_rm = stochastic_regret_minimization
|
|
125
|
+
"""Alias for :func:`noregret.stochastic_regret_minimization`."""
|
|
111
126
|
symmetric_rm = symmetric_regret_minimization
|
|
112
127
|
"""Alias for :func:`noregret.symmetric_regret_minimization`."""
|
|
113
128
|
to_efg = to_extensive_form_game
|
|
@@ -125,6 +140,7 @@ __all__ = (
|
|
|
125
140
|
'CounterfactualRegretMinimization',
|
|
126
141
|
'CounterfactualRegretMinimization2',
|
|
127
142
|
'CounterfactualRegretMinimizationPlus',
|
|
143
|
+
'CUDAKer',
|
|
128
144
|
'CUDAKernel',
|
|
129
145
|
'DCFR',
|
|
130
146
|
'DiscountedCounterfactualRegretMinimization',
|
|
@@ -139,7 +155,7 @@ __all__ = (
|
|
|
139
155
|
'ExtensiveFormGame',
|
|
140
156
|
'FloatingPointKernel',
|
|
141
157
|
'FollowTheRegularizedLeader',
|
|
142
|
-
'
|
|
158
|
+
'FPKer',
|
|
143
159
|
'FTRL',
|
|
144
160
|
'Game',
|
|
145
161
|
'GiftExchangeGame',
|
|
@@ -150,8 +166,10 @@ __all__ = (
|
|
|
150
166
|
'lp',
|
|
151
167
|
'MatchingPennies',
|
|
152
168
|
'matrix_game',
|
|
169
|
+
'MCCFR',
|
|
153
170
|
'MD',
|
|
154
171
|
'MirrorDescent',
|
|
172
|
+
'MonteCarloCounterfactualRegretMinimization',
|
|
155
173
|
'MultilinearGame',
|
|
156
174
|
'MultiplicativeWeightsUpdate',
|
|
157
175
|
'MWU',
|
|
@@ -161,6 +179,7 @@ __all__ = (
|
|
|
161
179
|
'NormalFormGame',
|
|
162
180
|
'OGD',
|
|
163
181
|
'OnlineGradientDescent',
|
|
182
|
+
'open_spiel_game',
|
|
164
183
|
'PrisonersDilemma',
|
|
165
184
|
'ProbabilitySimplexRegretMinimizer',
|
|
166
185
|
'ProbabilitySimplexSwapRegretMinimizer',
|
|
@@ -175,10 +194,15 @@ __all__ = (
|
|
|
175
194
|
'RockPaperScissors',
|
|
176
195
|
'RockPaperScissorsPlus',
|
|
177
196
|
'RockPaperSuperscissors',
|
|
197
|
+
'sample',
|
|
178
198
|
'SequenceFormPolytope',
|
|
179
199
|
'SequenceFormPolytopeRegretMinimizer',
|
|
180
200
|
'Serializable',
|
|
181
201
|
'StagHunt',
|
|
202
|
+
'stochastic_regret_minimization',
|
|
203
|
+
'StochasticRegretMinimizer',
|
|
204
|
+
'stochastic_rm',
|
|
205
|
+
'StrategyProfile',
|
|
182
206
|
'SwapRegretMinimizer',
|
|
183
207
|
'symmetric_regret_minimization',
|
|
184
208
|
'symmetric_rm',
|
|
@@ -193,4 +217,5 @@ __all__ = (
|
|
|
193
217
|
'TwoPlayerZeroSumGame',
|
|
194
218
|
'TwoPlayerZeroSumMultilinearGame',
|
|
195
219
|
'TwoPlayerZeroSumNormalFormGame',
|
|
220
|
+
'UniformStrategyProfile',
|
|
196
221
|
)
|
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
"""Module for games."""
|
|
2
|
-
from noregret.games.black_box import
|
|
2
|
+
from noregret.games.black_box import (
|
|
3
|
+
BlackBoxGame,
|
|
4
|
+
open_spiel_game,
|
|
5
|
+
StrategyProfile,
|
|
6
|
+
UniformStrategyProfile,
|
|
7
|
+
)
|
|
3
8
|
from noregret.games.extensive_form import (
|
|
4
9
|
ExtensiveFormGame,
|
|
5
10
|
to_extensive_form_game,
|
|
@@ -49,6 +54,7 @@ __all__ = (
|
|
|
49
54
|
'RockPaperScissorsPlus',
|
|
50
55
|
'RockPaperSuperscissors',
|
|
51
56
|
'StagHunt',
|
|
57
|
+
'StrategyProfile',
|
|
52
58
|
'to_extensive_form_game',
|
|
53
59
|
'TwoPlayerExtensiveFormGame',
|
|
54
60
|
'TwoPlayerGame',
|
|
@@ -58,4 +64,5 @@ __all__ = (
|
|
|
58
64
|
'TwoPlayerZeroSumGame',
|
|
59
65
|
'TwoPlayerZeroSumMultilinearGame',
|
|
60
66
|
'TwoPlayerZeroSumNormalFormGame',
|
|
67
|
+
'UniformStrategyProfile',
|
|
61
68
|
)
|