noregret 0.0.0.dev6__tar.gz → 0.0.0.dev8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {noregret-0.0.0.dev6 → noregret-0.0.0.dev8}/PKG-INFO +52 -56
- {noregret-0.0.0.dev6 → noregret-0.0.0.dev8}/README.rst +51 -55
- {noregret-0.0.0.dev6 → noregret-0.0.0.dev8}/noregret/__init__.py +14 -6
- {noregret-0.0.0.dev6 → noregret-0.0.0.dev8}/noregret/games/__init__.py +6 -4
- {noregret-0.0.0.dev6 → noregret-0.0.0.dev8}/noregret/games/black_box.py +34 -9
- noregret-0.0.0.dev6/noregret/games/extensive_form/games.py → noregret-0.0.0.dev8/noregret/games/extensive_form.py +121 -1
- noregret-0.0.0.dev6/noregret/games/normal_form/games.py → noregret-0.0.0.dev8/noregret/games/normal_form.py +18 -2
- {noregret-0.0.0.dev6 → noregret-0.0.0.dev8}/noregret/kernels.py +14 -11
- {noregret-0.0.0.dev6 → noregret-0.0.0.dev8}/noregret/solvers/linear_programming.py +2 -4
- {noregret-0.0.0.dev6 → noregret-0.0.0.dev8}/noregret/solvers/regret_minimization.py +25 -15
- {noregret-0.0.0.dev6 → noregret-0.0.0.dev8}/noregret/tests/test_games.py +49 -29
- noregret-0.0.0.dev8/noregret/tests/test_linear_programming.py +36 -0
- {noregret-0.0.0.dev6 → noregret-0.0.0.dev8}/noregret/tests/test_regret_minimization.py +66 -44
- {noregret-0.0.0.dev6 → noregret-0.0.0.dev8}/noregret/tests/test_sequence_form_polytopes.py +28 -28
- {noregret-0.0.0.dev6 → noregret-0.0.0.dev8}/noregret.egg-info/PKG-INFO +52 -56
- {noregret-0.0.0.dev6 → noregret-0.0.0.dev8}/noregret.egg-info/SOURCES.txt +13 -16
- {noregret-0.0.0.dev6 → noregret-0.0.0.dev8}/setup.py +1 -1
- noregret-0.0.0.dev6/noregret/games/extensive_form/__init__.py +0 -12
- noregret-0.0.0.dev6/noregret/games/normal_form/__init__.py +0 -34
- noregret-0.0.0.dev6/noregret/games/utilities.py +0 -141
- noregret-0.0.0.dev6/noregret/tests/test_linear_programming.py +0 -36
- {noregret-0.0.0.dev6 → noregret-0.0.0.dev8}/LICENSE +0 -0
- {noregret-0.0.0.dev6/noregret/games/normal_form → noregret-0.0.0.dev8/noregret/games/examples}/assurance-game.json +0 -0
- {noregret-0.0.0.dev6/noregret/games/normal_form → noregret-0.0.0.dev8/noregret/games/examples}/battle-of-the-sexes.json +0 -0
- {noregret-0.0.0.dev6/noregret/games/normal_form → noregret-0.0.0.dev8/noregret/games/examples}/chicken.json +0 -0
- {noregret-0.0.0.dev6/noregret/games/normal_form → noregret-0.0.0.dev8/noregret/games/examples}/gift-exchange-game.json +0 -0
- {noregret-0.0.0.dev6/noregret/games/normal_form → noregret-0.0.0.dev8/noregret/games/examples}/matching-pennies.json +0 -0
- {noregret-0.0.0.dev6/noregret/games/normal_form → noregret-0.0.0.dev8/noregret/games/examples}/prisoners-dilemma.json +0 -0
- {noregret-0.0.0.dev6/noregret/games/normal_form → noregret-0.0.0.dev8/noregret/games/examples}/pure-coordination.json +0 -0
- {noregret-0.0.0.dev6/noregret/games/normal_form → noregret-0.0.0.dev8/noregret/games/examples}/rock-paper-scissors-plus.json +0 -0
- {noregret-0.0.0.dev6/noregret/games/normal_form → noregret-0.0.0.dev8/noregret/games/examples}/rock-paper-scissors.json +0 -0
- {noregret-0.0.0.dev6/noregret/games/normal_form → noregret-0.0.0.dev8/noregret/games/examples}/rock-paper-superscissors.json +0 -0
- {noregret-0.0.0.dev6/noregret/games/normal_form → noregret-0.0.0.dev8/noregret/games/examples}/stag-hunt.json +0 -0
- {noregret-0.0.0.dev6 → noregret-0.0.0.dev8}/noregret/games/games.py +0 -0
- {noregret-0.0.0.dev6 → noregret-0.0.0.dev8}/noregret/games/multilinear.py +0 -0
- {noregret-0.0.0.dev6 → noregret-0.0.0.dev8}/noregret/regret_minimizers/__init__.py +4 -4
- {noregret-0.0.0.dev6 → noregret-0.0.0.dev8}/noregret/regret_minimizers/probability_simplices.py +0 -0
- {noregret-0.0.0.dev6 → noregret-0.0.0.dev8}/noregret/regret_minimizers/regret_minimizers.py +0 -0
- {noregret-0.0.0.dev6 → noregret-0.0.0.dev8}/noregret/regret_minimizers/sequence_form_polytopes.py +0 -0
- {noregret-0.0.0.dev6 → noregret-0.0.0.dev8}/noregret/sequence_form_polytopes.py +0 -0
- {noregret-0.0.0.dev6 → noregret-0.0.0.dev8}/noregret/solvers/__init__.py +0 -0
- {noregret-0.0.0.dev6 → noregret-0.0.0.dev8}/noregret/tests/__init__.py +0 -0
- {noregret-0.0.0.dev6 → noregret-0.0.0.dev8}/noregret/utilities.py +0 -0
- {noregret-0.0.0.dev6 → noregret-0.0.0.dev8}/noregret.egg-info/dependency_links.txt +0 -0
- {noregret-0.0.0.dev6 → noregret-0.0.0.dev8}/noregret.egg-info/requires.txt +0 -0
- {noregret-0.0.0.dev6 → noregret-0.0.0.dev8}/noregret.egg-info/top_level.txt +0 -0
- {noregret-0.0.0.dev6 → noregret-0.0.0.dev8}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: noregret
|
|
3
|
-
Version: 0.0.0.
|
|
3
|
+
Version: 0.0.0.dev8
|
|
4
4
|
Summary: No-regret learning dynamics
|
|
5
5
|
Home-page: https://github.com/uoftcprg/noregret
|
|
6
6
|
Author: Universal, Open, Free, and Transparent Computer Poker Research Group
|
|
@@ -52,7 +52,7 @@ Dynamic: summary
|
|
|
52
52
|
NoRegret
|
|
53
53
|
========
|
|
54
54
|
|
|
55
|
-
NoRegret is an open-source software library for no-regret learning dynamics and computational game solving, developed by the Universal, Open, Free, and Transparent Computer Poker Research Group. NoRegret implements an extensive array of regret minimizers and game solvers, and also supports GPU-acceleration. The library can be used in a variety of use cases, from solving games to conducting research in online convex optimization. NoRegret's reliability has been established through extensive doctests and unit tests, achieving
|
|
55
|
+
NoRegret is an open-source software library for no-regret learning dynamics and computational game solving, developed by the Universal, Open, Free, and Transparent Computer Poker Research Group. NoRegret implements an extensive array of regret minimizers and game solvers, and also supports GPU-acceleration. The library can be used in a variety of use cases, from solving games to conducting research in online convex optimization. NoRegret's reliability has been established through extensive doctests and unit tests, achieving 96% code coverage.
|
|
56
56
|
|
|
57
57
|
Features
|
|
58
58
|
--------
|
|
@@ -84,18 +84,18 @@ The code snippet below demonstrates how one can solve games via regret minimizat
|
|
|
84
84
|
|
|
85
85
|
from functools import partial
|
|
86
86
|
from math import inf
|
|
87
|
-
|
|
87
|
+
|
|
88
88
|
from tqdm import tqdm
|
|
89
89
|
import matplotlib.pyplot as plt
|
|
90
90
|
import noregret as nr
|
|
91
91
|
import pandas as pd
|
|
92
92
|
import seaborn as sns
|
|
93
|
-
|
|
94
|
-
|
|
93
|
+
|
|
94
|
+
KER = nr.FPKer()
|
|
95
95
|
GAMES = {
|
|
96
|
-
'Rock paper superscissors': nr.to_efg(nr.RockPaperSuperscissors(
|
|
97
|
-
'Kuhn poker': nr.to_efg(
|
|
98
|
-
'Leduc poker': nr.to_efg(
|
|
96
|
+
'Rock paper superscissors': nr.to_efg(KER, nr.RockPaperSuperscissors(KER)),
|
|
97
|
+
'Kuhn poker': nr.to_efg(KER, nr.open_spiel_game(KER, 'kuhn_poker')),
|
|
98
|
+
'Leduc poker': nr.to_efg(KER, nr.open_spiel_game(KER, 'leduc_poker')),
|
|
99
99
|
}
|
|
100
100
|
PARAMETERS = {
|
|
101
101
|
'CFR': (nr.CFR, False, False),
|
|
@@ -104,35 +104,35 @@ The code snippet below demonstrates how one can solve games via regret minimizat
|
|
|
104
104
|
'PCFR+': (partial(nr.CFR_plus, gamma=2), True, True),
|
|
105
105
|
'PCFR+*': (partial(nr.CFR_plus, gamma=inf), True, True),
|
|
106
106
|
}
|
|
107
|
-
|
|
108
|
-
|
|
107
|
+
|
|
108
|
+
|
|
109
109
|
def main():
|
|
110
110
|
for name, game in tqdm(GAMES.items()):
|
|
111
111
|
iterations = []
|
|
112
112
|
exploitabilities = []
|
|
113
113
|
expected_utilities = []
|
|
114
114
|
variants = []
|
|
115
|
-
|
|
115
|
+
|
|
116
116
|
for variant, (R_type, alt, pred) in tqdm(
|
|
117
117
|
PARAMETERS.items(),
|
|
118
118
|
leave=False,
|
|
119
119
|
):
|
|
120
|
-
R_row = R_type(
|
|
121
|
-
R_col = R_type(
|
|
122
|
-
|
|
120
|
+
R_row = R_type(KER, game.row_sequence_form_polytope)
|
|
121
|
+
R_col = R_type(KER, game.column_sequence_form_polytope)
|
|
122
|
+
|
|
123
123
|
def update():
|
|
124
124
|
t = R_row.iteration_count
|
|
125
125
|
x_bar = R_row.average_strategy
|
|
126
126
|
y_bar = R_col.average_strategy
|
|
127
127
|
epsilon = game.exploitability(x_bar, y_bar)
|
|
128
128
|
u = game.expected_row_utility(x_bar, y_bar)
|
|
129
|
-
|
|
129
|
+
|
|
130
130
|
iterations.append(t)
|
|
131
131
|
exploitabilities.append(epsilon)
|
|
132
132
|
expected_utilities.append(u)
|
|
133
133
|
variants.append(variant)
|
|
134
|
-
|
|
135
|
-
nr.
|
|
134
|
+
|
|
135
|
+
nr.rm(
|
|
136
136
|
game,
|
|
137
137
|
R_row,
|
|
138
138
|
R_col,
|
|
@@ -141,7 +141,7 @@ The code snippet below demonstrates how one can solve games via regret minimizat
|
|
|
141
141
|
update=update,
|
|
142
142
|
progress_bar={'leave': False},
|
|
143
143
|
)
|
|
144
|
-
|
|
144
|
+
|
|
145
145
|
data = {
|
|
146
146
|
'Iteration': iterations,
|
|
147
147
|
'Exploitability': exploitabilities,
|
|
@@ -149,21 +149,21 @@ The code snippet below demonstrates how one can solve games via regret minimizat
|
|
|
149
149
|
'Variant': variants,
|
|
150
150
|
}
|
|
151
151
|
df = pd.DataFrame(data)
|
|
152
|
-
|
|
152
|
+
|
|
153
153
|
plt.clf()
|
|
154
154
|
sns.lineplot(df, x='Iteration', y='Exploitability', hue='Variant')
|
|
155
155
|
plt.xscale('log')
|
|
156
156
|
plt.yscale('log')
|
|
157
157
|
plt.title(f'Exploitability in {name}')
|
|
158
158
|
plt.show()
|
|
159
|
-
|
|
159
|
+
|
|
160
160
|
plt.clf()
|
|
161
161
|
sns.lineplot(df, x='Iteration', y='Expected utility', hue='Variant')
|
|
162
162
|
plt.xscale('log')
|
|
163
163
|
plt.title(f'Expected utility in {name}')
|
|
164
164
|
plt.show()
|
|
165
|
-
|
|
166
|
-
|
|
165
|
+
|
|
166
|
+
|
|
167
167
|
if __name__ == '__main__':
|
|
168
168
|
main()
|
|
169
169
|
|
|
@@ -173,38 +173,34 @@ GPU-Accelerated Game Solving
|
|
|
173
173
|
The code snippet below demonstrates how one can solve games while leveraging GPU acceleration.
|
|
174
174
|
|
|
175
175
|
.. code-block:: python
|
|
176
|
-
|
|
176
|
+
|
|
177
177
|
from sys import stdout
|
|
178
|
-
|
|
178
|
+
|
|
179
179
|
from orjson import dumps, OPT_SERIALIZE_NUMPY
|
|
180
180
|
import noregret as nr
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
GAME = nr.
|
|
181
|
+
|
|
182
|
+
CPU_KER = nr.FPKer()
|
|
183
|
+
GAME = nr.open_spiel_game(CPU_KER, 'liars_dice')
|
|
184
|
+
GPU_KER = nr.CUDAKer()
|
|
185
|
+
GAME = nr.to_efg(GPU_KER, GAME)
|
|
184
186
|
PARAMETERS = nr.CFR, True, False
|
|
185
|
-
|
|
186
|
-
|
|
187
|
+
|
|
188
|
+
|
|
187
189
|
def main():
|
|
188
190
|
R_type, alt, pred = PARAMETERS
|
|
189
|
-
R_row = R_type(
|
|
190
|
-
R_col = R_type(
|
|
191
|
-
x_bar, y_bar = nr.
|
|
192
|
-
GAME,
|
|
193
|
-
R_row,
|
|
194
|
-
R_col,
|
|
195
|
-
alternation=alt,
|
|
196
|
-
prediction=pred,
|
|
197
|
-
)
|
|
191
|
+
R_row = R_type(GPU_KER, GAME.row_sequence_form_polytope)
|
|
192
|
+
R_col = R_type(GPU_KER, GAME.column_sequence_form_polytope)
|
|
193
|
+
x_bar, y_bar = nr.rm(GAME, R_row, R_col, alternation=alt, prediction=pred)
|
|
198
194
|
data = {
|
|
199
|
-
'x_bar':
|
|
200
|
-
'y_bar':
|
|
195
|
+
'x_bar': GPU_KER.numpy.asnumpy(x_bar),
|
|
196
|
+
'y_bar': GPU_KER.numpy.asnumpy(y_bar),
|
|
201
197
|
'Exploitability': GAME.exploitability(x_bar, y_bar).item(),
|
|
202
198
|
'Expected utility': GAME.expected_row_utility(x_bar, y_bar).item(),
|
|
203
199
|
}
|
|
204
|
-
|
|
200
|
+
|
|
205
201
|
stdout.buffer.write(dumps(data, option=OPT_SERIALIZE_NUMPY))
|
|
206
|
-
|
|
207
|
-
|
|
202
|
+
|
|
203
|
+
|
|
208
204
|
if __name__ == '__main__':
|
|
209
205
|
main()
|
|
210
206
|
|
|
@@ -216,23 +212,23 @@ The code snippet below demonstrates how one can solve games via linear programmi
|
|
|
216
212
|
.. code-block:: python
|
|
217
213
|
|
|
218
214
|
import noregret as nr
|
|
219
|
-
|
|
220
|
-
|
|
215
|
+
|
|
216
|
+
KER = nr.FPKer()
|
|
221
217
|
GAMES = {
|
|
222
|
-
'Rock paper superscissors': nr.RockPaperSuperscissors(
|
|
223
|
-
'Kuhn poker': nr.to_efg(
|
|
224
|
-
'Leduc poker': nr.to_efg(
|
|
218
|
+
'Rock paper superscissors': nr.RockPaperSuperscissors(KER),
|
|
219
|
+
'Kuhn poker': nr.to_efg(KER, nr.open_spiel_game(KER, 'kuhn_poker')),
|
|
220
|
+
'Leduc poker': nr.to_efg(KER, nr.open_spiel_game(KER, 'leduc_poker')),
|
|
225
221
|
}
|
|
226
|
-
|
|
227
|
-
|
|
222
|
+
|
|
223
|
+
|
|
228
224
|
def main():
|
|
229
225
|
for name, game in GAMES.items():
|
|
230
|
-
x, y = nr.
|
|
226
|
+
x, y = nr.lp(game)
|
|
231
227
|
v = game.expected_row_utility(x, y)
|
|
232
|
-
|
|
228
|
+
|
|
233
229
|
print(f'{name}:', v)
|
|
234
|
-
|
|
235
|
-
|
|
230
|
+
|
|
231
|
+
|
|
236
232
|
if __name__ == '__main__':
|
|
237
233
|
main()
|
|
238
234
|
|
|
@@ -286,11 +282,11 @@ If you use NoRegret in your research, please cite our library:
|
|
|
286
282
|
.. code-block:: bibtex
|
|
287
283
|
|
|
288
284
|
@misc{kim2026parallelizingcounterfactualregretminimization,
|
|
289
|
-
title={Parallelizing Counterfactual Regret Minimization},
|
|
285
|
+
title={Parallelizing Counterfactual Regret Minimization},
|
|
290
286
|
author={Juho Kim and Tuomas Sandholm},
|
|
291
287
|
year={2026},
|
|
292
288
|
eprint={2605.14277},
|
|
293
289
|
archivePrefix={arXiv},
|
|
294
290
|
primaryClass={cs.AI},
|
|
295
|
-
url={https://arxiv.org/abs/2605.14277},
|
|
291
|
+
url={https://arxiv.org/abs/2605.14277},
|
|
296
292
|
}
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
NoRegret
|
|
3
3
|
========
|
|
4
4
|
|
|
5
|
-
NoRegret is an open-source software library for no-regret learning dynamics and computational game solving, developed by the Universal, Open, Free, and Transparent Computer Poker Research Group. NoRegret implements an extensive array of regret minimizers and game solvers, and also supports GPU-acceleration. The library can be used in a variety of use cases, from solving games to conducting research in online convex optimization. NoRegret's reliability has been established through extensive doctests and unit tests, achieving
|
|
5
|
+
NoRegret is an open-source software library for no-regret learning dynamics and computational game solving, developed by the Universal, Open, Free, and Transparent Computer Poker Research Group. NoRegret implements an extensive array of regret minimizers and game solvers, and also supports GPU-acceleration. The library can be used in a variety of use cases, from solving games to conducting research in online convex optimization. NoRegret's reliability has been established through extensive doctests and unit tests, achieving 96% code coverage.
|
|
6
6
|
|
|
7
7
|
Features
|
|
8
8
|
--------
|
|
@@ -34,18 +34,18 @@ The code snippet below demonstrates how one can solve games via regret minimizat
|
|
|
34
34
|
|
|
35
35
|
from functools import partial
|
|
36
36
|
from math import inf
|
|
37
|
-
|
|
37
|
+
|
|
38
38
|
from tqdm import tqdm
|
|
39
39
|
import matplotlib.pyplot as plt
|
|
40
40
|
import noregret as nr
|
|
41
41
|
import pandas as pd
|
|
42
42
|
import seaborn as sns
|
|
43
|
-
|
|
44
|
-
|
|
43
|
+
|
|
44
|
+
KER = nr.FPKer()
|
|
45
45
|
GAMES = {
|
|
46
|
-
'Rock paper superscissors': nr.to_efg(nr.RockPaperSuperscissors(
|
|
47
|
-
'Kuhn poker': nr.to_efg(
|
|
48
|
-
'Leduc poker': nr.to_efg(
|
|
46
|
+
'Rock paper superscissors': nr.to_efg(KER, nr.RockPaperSuperscissors(KER)),
|
|
47
|
+
'Kuhn poker': nr.to_efg(KER, nr.open_spiel_game(KER, 'kuhn_poker')),
|
|
48
|
+
'Leduc poker': nr.to_efg(KER, nr.open_spiel_game(KER, 'leduc_poker')),
|
|
49
49
|
}
|
|
50
50
|
PARAMETERS = {
|
|
51
51
|
'CFR': (nr.CFR, False, False),
|
|
@@ -54,35 +54,35 @@ The code snippet below demonstrates how one can solve games via regret minimizat
|
|
|
54
54
|
'PCFR+': (partial(nr.CFR_plus, gamma=2), True, True),
|
|
55
55
|
'PCFR+*': (partial(nr.CFR_plus, gamma=inf), True, True),
|
|
56
56
|
}
|
|
57
|
-
|
|
58
|
-
|
|
57
|
+
|
|
58
|
+
|
|
59
59
|
def main():
|
|
60
60
|
for name, game in tqdm(GAMES.items()):
|
|
61
61
|
iterations = []
|
|
62
62
|
exploitabilities = []
|
|
63
63
|
expected_utilities = []
|
|
64
64
|
variants = []
|
|
65
|
-
|
|
65
|
+
|
|
66
66
|
for variant, (R_type, alt, pred) in tqdm(
|
|
67
67
|
PARAMETERS.items(),
|
|
68
68
|
leave=False,
|
|
69
69
|
):
|
|
70
|
-
R_row = R_type(
|
|
71
|
-
R_col = R_type(
|
|
72
|
-
|
|
70
|
+
R_row = R_type(KER, game.row_sequence_form_polytope)
|
|
71
|
+
R_col = R_type(KER, game.column_sequence_form_polytope)
|
|
72
|
+
|
|
73
73
|
def update():
|
|
74
74
|
t = R_row.iteration_count
|
|
75
75
|
x_bar = R_row.average_strategy
|
|
76
76
|
y_bar = R_col.average_strategy
|
|
77
77
|
epsilon = game.exploitability(x_bar, y_bar)
|
|
78
78
|
u = game.expected_row_utility(x_bar, y_bar)
|
|
79
|
-
|
|
79
|
+
|
|
80
80
|
iterations.append(t)
|
|
81
81
|
exploitabilities.append(epsilon)
|
|
82
82
|
expected_utilities.append(u)
|
|
83
83
|
variants.append(variant)
|
|
84
|
-
|
|
85
|
-
nr.
|
|
84
|
+
|
|
85
|
+
nr.rm(
|
|
86
86
|
game,
|
|
87
87
|
R_row,
|
|
88
88
|
R_col,
|
|
@@ -91,7 +91,7 @@ The code snippet below demonstrates how one can solve games via regret minimizat
|
|
|
91
91
|
update=update,
|
|
92
92
|
progress_bar={'leave': False},
|
|
93
93
|
)
|
|
94
|
-
|
|
94
|
+
|
|
95
95
|
data = {
|
|
96
96
|
'Iteration': iterations,
|
|
97
97
|
'Exploitability': exploitabilities,
|
|
@@ -99,21 +99,21 @@ The code snippet below demonstrates how one can solve games via regret minimizat
|
|
|
99
99
|
'Variant': variants,
|
|
100
100
|
}
|
|
101
101
|
df = pd.DataFrame(data)
|
|
102
|
-
|
|
102
|
+
|
|
103
103
|
plt.clf()
|
|
104
104
|
sns.lineplot(df, x='Iteration', y='Exploitability', hue='Variant')
|
|
105
105
|
plt.xscale('log')
|
|
106
106
|
plt.yscale('log')
|
|
107
107
|
plt.title(f'Exploitability in {name}')
|
|
108
108
|
plt.show()
|
|
109
|
-
|
|
109
|
+
|
|
110
110
|
plt.clf()
|
|
111
111
|
sns.lineplot(df, x='Iteration', y='Expected utility', hue='Variant')
|
|
112
112
|
plt.xscale('log')
|
|
113
113
|
plt.title(f'Expected utility in {name}')
|
|
114
114
|
plt.show()
|
|
115
|
-
|
|
116
|
-
|
|
115
|
+
|
|
116
|
+
|
|
117
117
|
if __name__ == '__main__':
|
|
118
118
|
main()
|
|
119
119
|
|
|
@@ -123,38 +123,34 @@ GPU-Accelerated Game Solving
|
|
|
123
123
|
The code snippet below demonstrates how one can solve games while leveraging GPU acceleration.
|
|
124
124
|
|
|
125
125
|
.. code-block:: python
|
|
126
|
-
|
|
126
|
+
|
|
127
127
|
from sys import stdout
|
|
128
|
-
|
|
128
|
+
|
|
129
129
|
from orjson import dumps, OPT_SERIALIZE_NUMPY
|
|
130
130
|
import noregret as nr
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
GAME = nr.
|
|
131
|
+
|
|
132
|
+
CPU_KER = nr.FPKer()
|
|
133
|
+
GAME = nr.open_spiel_game(CPU_KER, 'liars_dice')
|
|
134
|
+
GPU_KER = nr.CUDAKer()
|
|
135
|
+
GAME = nr.to_efg(GPU_KER, GAME)
|
|
134
136
|
PARAMETERS = nr.CFR, True, False
|
|
135
|
-
|
|
136
|
-
|
|
137
|
+
|
|
138
|
+
|
|
137
139
|
def main():
|
|
138
140
|
R_type, alt, pred = PARAMETERS
|
|
139
|
-
R_row = R_type(
|
|
140
|
-
R_col = R_type(
|
|
141
|
-
x_bar, y_bar = nr.
|
|
142
|
-
GAME,
|
|
143
|
-
R_row,
|
|
144
|
-
R_col,
|
|
145
|
-
alternation=alt,
|
|
146
|
-
prediction=pred,
|
|
147
|
-
)
|
|
141
|
+
R_row = R_type(GPU_KER, GAME.row_sequence_form_polytope)
|
|
142
|
+
R_col = R_type(GPU_KER, GAME.column_sequence_form_polytope)
|
|
143
|
+
x_bar, y_bar = nr.rm(GAME, R_row, R_col, alternation=alt, prediction=pred)
|
|
148
144
|
data = {
|
|
149
|
-
'x_bar':
|
|
150
|
-
'y_bar':
|
|
145
|
+
'x_bar': GPU_KER.numpy.asnumpy(x_bar),
|
|
146
|
+
'y_bar': GPU_KER.numpy.asnumpy(y_bar),
|
|
151
147
|
'Exploitability': GAME.exploitability(x_bar, y_bar).item(),
|
|
152
148
|
'Expected utility': GAME.expected_row_utility(x_bar, y_bar).item(),
|
|
153
149
|
}
|
|
154
|
-
|
|
150
|
+
|
|
155
151
|
stdout.buffer.write(dumps(data, option=OPT_SERIALIZE_NUMPY))
|
|
156
|
-
|
|
157
|
-
|
|
152
|
+
|
|
153
|
+
|
|
158
154
|
if __name__ == '__main__':
|
|
159
155
|
main()
|
|
160
156
|
|
|
@@ -166,23 +162,23 @@ The code snippet below demonstrates how one can solve games via linear programmi
|
|
|
166
162
|
.. code-block:: python
|
|
167
163
|
|
|
168
164
|
import noregret as nr
|
|
169
|
-
|
|
170
|
-
|
|
165
|
+
|
|
166
|
+
KER = nr.FPKer()
|
|
171
167
|
GAMES = {
|
|
172
|
-
'Rock paper superscissors': nr.RockPaperSuperscissors(
|
|
173
|
-
'Kuhn poker': nr.to_efg(
|
|
174
|
-
'Leduc poker': nr.to_efg(
|
|
168
|
+
'Rock paper superscissors': nr.RockPaperSuperscissors(KER),
|
|
169
|
+
'Kuhn poker': nr.to_efg(KER, nr.open_spiel_game(KER, 'kuhn_poker')),
|
|
170
|
+
'Leduc poker': nr.to_efg(KER, nr.open_spiel_game(KER, 'leduc_poker')),
|
|
175
171
|
}
|
|
176
|
-
|
|
177
|
-
|
|
172
|
+
|
|
173
|
+
|
|
178
174
|
def main():
|
|
179
175
|
for name, game in GAMES.items():
|
|
180
|
-
x, y = nr.
|
|
176
|
+
x, y = nr.lp(game)
|
|
181
177
|
v = game.expected_row_utility(x, y)
|
|
182
|
-
|
|
178
|
+
|
|
183
179
|
print(f'{name}:', v)
|
|
184
|
-
|
|
185
|
-
|
|
180
|
+
|
|
181
|
+
|
|
186
182
|
if __name__ == '__main__':
|
|
187
183
|
main()
|
|
188
184
|
|
|
@@ -236,11 +232,11 @@ If you use NoRegret in your research, please cite our library:
|
|
|
236
232
|
.. code-block:: bibtex
|
|
237
233
|
|
|
238
234
|
@misc{kim2026parallelizingcounterfactualregretminimization,
|
|
239
|
-
title={Parallelizing Counterfactual Regret Minimization},
|
|
235
|
+
title={Parallelizing Counterfactual Regret Minimization},
|
|
240
236
|
author={Juho Kim and Tuomas Sandholm},
|
|
241
237
|
year={2026},
|
|
242
238
|
eprint={2605.14277},
|
|
243
239
|
archivePrefix={arXiv},
|
|
244
240
|
primaryClass={cs.AI},
|
|
245
|
-
url={https://arxiv.org/abs/2605.14277},
|
|
241
|
+
url={https://arxiv.org/abs/2605.14277},
|
|
246
242
|
}
|
|
@@ -5,19 +5,20 @@ from noregret.games import (
|
|
|
5
5
|
BlackBoxGame,
|
|
6
6
|
Chicken,
|
|
7
7
|
ExtensiveFormGame,
|
|
8
|
-
from_open_spiel,
|
|
9
8
|
Game,
|
|
10
9
|
GiftExchangeGame,
|
|
11
10
|
MatchingPennies,
|
|
11
|
+
matrix_game,
|
|
12
12
|
MultilinearGame,
|
|
13
13
|
NormalFormGame,
|
|
14
|
+
open_spiel_game,
|
|
14
15
|
PrisonersDilemma,
|
|
15
16
|
PureCoordination,
|
|
16
17
|
RockPaperScissors,
|
|
17
18
|
RockPaperScissorsPlus,
|
|
18
19
|
RockPaperSuperscissors,
|
|
19
20
|
StagHunt,
|
|
20
|
-
|
|
21
|
+
to_extensive_form_game,
|
|
21
22
|
TwoPlayerExtensiveFormGame,
|
|
22
23
|
TwoPlayerGame,
|
|
23
24
|
TwoPlayerMultilinearGame,
|
|
@@ -71,6 +72,8 @@ CFR2 = CounterfactualRegretMinimization2
|
|
|
71
72
|
"""Alias for :class:`noregret.CounterfactualRegretMinimization2`."""
|
|
72
73
|
CFR_plus = CounterfactualRegretMinimizationPlus
|
|
73
74
|
"""Alias for :class:`noregret.CounterfactualRegretMinimizationPlus`."""
|
|
75
|
+
CUDAKer = CUDAKernel
|
|
76
|
+
"""Alias for :class:`CUDAKernel`."""
|
|
74
77
|
DCFR = DiscountedCounterfactualRegretMinimization
|
|
75
78
|
"""Alias for
|
|
76
79
|
:class:`noregret.DiscountedCounterfactualRegretMinimization`.
|
|
@@ -85,6 +88,8 @@ EFG = ExtensiveFormGame
|
|
|
85
88
|
"""Alias for :class:`noregret.ExtensiveFormGame`."""
|
|
86
89
|
ER = EuclideanRegularization
|
|
87
90
|
"""Alias for :class:`noregret.EuclideanRegularization`."""
|
|
91
|
+
FPKer = FloatingPointKernel
|
|
92
|
+
"""Alias for :class:`FloatingPointKernel`."""
|
|
88
93
|
FTRL = FollowTheRegularizedLeader
|
|
89
94
|
"""Alias for :class:`noregret.FollowTheRegularizedLeader`."""
|
|
90
95
|
lp = linear_programming
|
|
@@ -109,8 +114,8 @@ rm = regret_minimization
|
|
|
109
114
|
"""Alias for :func:`noregret.regret_minimization`."""
|
|
110
115
|
symmetric_rm = symmetric_regret_minimization
|
|
111
116
|
"""Alias for :func:`noregret.symmetric_regret_minimization`."""
|
|
112
|
-
to_efg =
|
|
113
|
-
"""Alias for :func:`noregret.
|
|
117
|
+
to_efg = to_extensive_form_game
|
|
118
|
+
"""Alias for :func:`noregret.to_extensive_form_game`."""
|
|
114
119
|
|
|
115
120
|
__all__ = (
|
|
116
121
|
'AssuranceGame',
|
|
@@ -124,6 +129,7 @@ __all__ = (
|
|
|
124
129
|
'CounterfactualRegretMinimization',
|
|
125
130
|
'CounterfactualRegretMinimization2',
|
|
126
131
|
'CounterfactualRegretMinimizationPlus',
|
|
132
|
+
'CUDAKer',
|
|
127
133
|
'CUDAKernel',
|
|
128
134
|
'DCFR',
|
|
129
135
|
'DiscountedCounterfactualRegretMinimization',
|
|
@@ -138,7 +144,7 @@ __all__ = (
|
|
|
138
144
|
'ExtensiveFormGame',
|
|
139
145
|
'FloatingPointKernel',
|
|
140
146
|
'FollowTheRegularizedLeader',
|
|
141
|
-
'
|
|
147
|
+
'FPKer',
|
|
142
148
|
'FTRL',
|
|
143
149
|
'Game',
|
|
144
150
|
'GiftExchangeGame',
|
|
@@ -148,6 +154,7 @@ __all__ = (
|
|
|
148
154
|
'linear_programming',
|
|
149
155
|
'lp',
|
|
150
156
|
'MatchingPennies',
|
|
157
|
+
'matrix_game',
|
|
151
158
|
'MD',
|
|
152
159
|
'MirrorDescent',
|
|
153
160
|
'MultilinearGame',
|
|
@@ -159,6 +166,7 @@ __all__ = (
|
|
|
159
166
|
'NormalFormGame',
|
|
160
167
|
'OGD',
|
|
161
168
|
'OnlineGradientDescent',
|
|
169
|
+
'open_spiel_game',
|
|
162
170
|
'PrisonersDilemma',
|
|
163
171
|
'ProbabilitySimplexRegretMinimizer',
|
|
164
172
|
'ProbabilitySimplexSwapRegretMinimizer',
|
|
@@ -181,7 +189,7 @@ __all__ = (
|
|
|
181
189
|
'symmetric_regret_minimization',
|
|
182
190
|
'symmetric_rm',
|
|
183
191
|
'to_efg',
|
|
184
|
-
'
|
|
192
|
+
'to_extensive_form_game',
|
|
185
193
|
'tuple_or_none',
|
|
186
194
|
'TwoPlayerExtensiveFormGame',
|
|
187
195
|
'TwoPlayerGame',
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
"""Module for games."""
|
|
2
|
-
from noregret.games.black_box import BlackBoxGame,
|
|
2
|
+
from noregret.games.black_box import BlackBoxGame, open_spiel_game
|
|
3
3
|
from noregret.games.extensive_form import (
|
|
4
4
|
ExtensiveFormGame,
|
|
5
|
+
to_extensive_form_game,
|
|
5
6
|
TwoPlayerExtensiveFormGame,
|
|
6
7
|
TwoPlayerZeroSumExtensiveFormGame,
|
|
7
8
|
)
|
|
@@ -17,6 +18,7 @@ from noregret.games.normal_form import (
|
|
|
17
18
|
Chicken,
|
|
18
19
|
GiftExchangeGame,
|
|
19
20
|
MatchingPennies,
|
|
21
|
+
matrix_game,
|
|
20
22
|
NormalFormGame,
|
|
21
23
|
PrisonersDilemma,
|
|
22
24
|
PureCoordination,
|
|
@@ -27,7 +29,6 @@ from noregret.games.normal_form import (
|
|
|
27
29
|
TwoPlayerNormalFormGame,
|
|
28
30
|
TwoPlayerZeroSumNormalFormGame,
|
|
29
31
|
)
|
|
30
|
-
from noregret.games.utilities import to_extensive_form
|
|
31
32
|
|
|
32
33
|
__all__ = (
|
|
33
34
|
'AssuranceGame',
|
|
@@ -35,19 +36,20 @@ __all__ = (
|
|
|
35
36
|
'BlackBoxGame',
|
|
36
37
|
'Chicken',
|
|
37
38
|
'ExtensiveFormGame',
|
|
38
|
-
'from_open_spiel',
|
|
39
39
|
'Game',
|
|
40
40
|
'GiftExchangeGame',
|
|
41
41
|
'MatchingPennies',
|
|
42
|
+
'matrix_game',
|
|
42
43
|
'MultilinearGame',
|
|
43
44
|
'NormalFormGame',
|
|
45
|
+
'open_spiel_game',
|
|
44
46
|
'PrisonersDilemma',
|
|
45
47
|
'PureCoordination',
|
|
46
48
|
'RockPaperScissors',
|
|
47
49
|
'RockPaperScissorsPlus',
|
|
48
50
|
'RockPaperSuperscissors',
|
|
49
51
|
'StagHunt',
|
|
50
|
-
'
|
|
52
|
+
'to_extensive_form_game',
|
|
51
53
|
'TwoPlayerExtensiveFormGame',
|
|
52
54
|
'TwoPlayerGame',
|
|
53
55
|
'TwoPlayerMultilinearGame',
|
|
@@ -6,10 +6,14 @@ from functools import partial
|
|
|
6
6
|
from ordered_set import OrderedSet
|
|
7
7
|
from pyspiel import GameType, load_game
|
|
8
8
|
|
|
9
|
+
from noregret.kernels import Kernel
|
|
10
|
+
|
|
9
11
|
|
|
10
12
|
@dataclass
|
|
11
13
|
class BlackBoxGame(ABC):
|
|
12
14
|
"""Abstract base class for black box games."""
|
|
15
|
+
kernel: Kernel
|
|
16
|
+
"""Kernel."""
|
|
13
17
|
|
|
14
18
|
@property
|
|
15
19
|
@abstractmethod
|
|
@@ -99,7 +103,11 @@ class BlackBoxGame(ABC):
|
|
|
99
103
|
:param node: Node.
|
|
100
104
|
:return: Utilities.
|
|
101
105
|
"""
|
|
102
|
-
|
|
106
|
+
np = self.kernel.numpy
|
|
107
|
+
dtype = self.kernel.data_type
|
|
108
|
+
us = list(map(partial(self.utility, node), range(self.player_count)))
|
|
109
|
+
|
|
110
|
+
return np.array(us, dtype)
|
|
103
111
|
|
|
104
112
|
@abstractmethod
|
|
105
113
|
def information_set(self, node):
|
|
@@ -124,9 +132,12 @@ class BlackBoxGame(ABC):
|
|
|
124
132
|
:param node: Node.
|
|
125
133
|
:return: Chance probabilities.
|
|
126
134
|
"""
|
|
135
|
+
np = self.kernel.numpy
|
|
136
|
+
dtype = self.kernel.data_type
|
|
127
137
|
A = self.actions(node)
|
|
138
|
+
ps = list(map(partial(self.chance_probability, node), A))
|
|
128
139
|
|
|
129
|
-
return
|
|
140
|
+
return np.array(ps, dtype)
|
|
130
141
|
|
|
131
142
|
|
|
132
143
|
@dataclass
|
|
@@ -171,28 +182,42 @@ class _OpenSpielBlackBoxGame(BlackBoxGame):
|
|
|
171
182
|
def player(self, node):
|
|
172
183
|
i = node.current_player()
|
|
173
184
|
|
|
174
|
-
return None if i
|
|
185
|
+
return None if i < 0 else i
|
|
175
186
|
|
|
176
187
|
def utility(self, node, player):
|
|
177
|
-
|
|
188
|
+
np = self.kernel.numpy
|
|
189
|
+
dtype = self.kernel.data_type
|
|
190
|
+
|
|
191
|
+
return np.array(node.player_reward(player), dtype)
|
|
178
192
|
|
|
179
193
|
def utilities(self, node):
|
|
180
|
-
|
|
194
|
+
np = self.kernel.numpy
|
|
195
|
+
dtype = self.kernel.data_type
|
|
196
|
+
|
|
197
|
+
return np.array(node.rewards(), dtype)
|
|
181
198
|
|
|
182
199
|
def information_set(self, node):
|
|
183
200
|
return node.information_state_string()
|
|
184
201
|
|
|
185
202
|
def chance_probability(self, node, action):
|
|
186
|
-
|
|
203
|
+
np = self.kernel.numpy
|
|
204
|
+
dtype = self.kernel.data_type
|
|
205
|
+
p = node.chance_outcomes()[self.actions(node).index(action)][1]
|
|
206
|
+
|
|
207
|
+
return np.array(p, dtype)
|
|
187
208
|
|
|
188
209
|
def chance_probabilities(self, node):
|
|
189
|
-
|
|
210
|
+
np = self.kernel.numpy
|
|
211
|
+
dtype = self.kernel.data_type
|
|
212
|
+
|
|
213
|
+
return np.array([p for _, p in node.chance_outcomes()], dtype)
|
|
190
214
|
|
|
191
215
|
|
|
192
|
-
def
|
|
216
|
+
def open_spiel_game(kernel, game):
|
|
193
217
|
"""Load a game from OpenSpiel.
|
|
194
218
|
|
|
219
|
+
:param Kernel: Kernel.
|
|
195
220
|
:param game: Game in OpenSpiel.
|
|
196
221
|
:return: Game.
|
|
197
222
|
"""
|
|
198
|
-
return _OpenSpielBlackBoxGame(game)
|
|
223
|
+
return _OpenSpielBlackBoxGame(kernel, game)
|