noregret 0.0.0.dev4__tar.gz → 0.0.0.dev6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/PKG-INFO +7 -58
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/README.rst +6 -57
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret/__init__.py +6 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret/games/__init__.py +3 -1
- noregret-0.0.0.dev6/noregret/games/black_box.py +198 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret/games/extensive_form/games.py +2 -2
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret/games/games.py +7 -4
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret/games/multilinear.py +3 -7
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret/games/normal_form/games.py +5 -2
- noregret-0.0.0.dev6/noregret/games/utilities.py +141 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret/kernels.py +0 -1
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret/regret_minimizers/__init__.py +2 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret/regret_minimizers/probability_simplices.py +2 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret/regret_minimizers/regret_minimizers.py +7 -2
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret/regret_minimizers/sequence_form_polytopes.py +101 -12
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret/sequence_form_polytopes.py +5 -7
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret/solvers/regret_minimization.py +4 -1
- noregret-0.0.0.dev6/noregret/tests/test_games.py +182 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret/tests/test_linear_programming.py +2 -2
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret/tests/test_regret_minimization.py +40 -3
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret.egg-info/PKG-INFO +7 -58
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret.egg-info/SOURCES.txt +1 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/setup.py +1 -1
- noregret-0.0.0.dev4/noregret/games/utilities.py +0 -141
- noregret-0.0.0.dev4/noregret/tests/test_games.py +0 -62
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/LICENSE +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret/games/extensive_form/__init__.py +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret/games/normal_form/__init__.py +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret/games/normal_form/assurance-game.json +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret/games/normal_form/battle-of-the-sexes.json +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret/games/normal_form/chicken.json +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret/games/normal_form/gift-exchange-game.json +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret/games/normal_form/matching-pennies.json +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret/games/normal_form/prisoners-dilemma.json +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret/games/normal_form/pure-coordination.json +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret/games/normal_form/rock-paper-scissors-plus.json +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret/games/normal_form/rock-paper-scissors.json +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret/games/normal_form/rock-paper-superscissors.json +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret/games/normal_form/stag-hunt.json +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret/solvers/__init__.py +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret/solvers/linear_programming.py +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret/tests/__init__.py +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret/tests/test_sequence_form_polytopes.py +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret/utilities.py +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret.egg-info/dependency_links.txt +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret.egg-info/requires.txt +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/noregret.egg-info/top_level.txt +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev6}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: noregret
|
|
3
|
-
Version: 0.0.0.
|
|
3
|
+
Version: 0.0.0.dev6
|
|
4
4
|
Summary: No-regret learning dynamics
|
|
5
5
|
Home-page: https://github.com/uoftcprg/noregret
|
|
6
6
|
Author: Universal, Open, Free, and Transparent Computer Poker Research Group
|
|
@@ -52,7 +52,7 @@ Dynamic: summary
|
|
|
52
52
|
NoRegret
|
|
53
53
|
========
|
|
54
54
|
|
|
55
|
-
NoRegret is an open-source software library for no-regret learning dynamics and computational game solving, developed by the Universal, Open, Free, and Transparent Computer Poker Research Group. NoRegret implements an extensive array of regret minimizers and game solvers, and also supports GPU-acceleration. The library can be used in a variety of use cases, from solving games to conducting research in online convex optimization. NoRegret's reliability has been established through extensive doctests and unit tests, achieving
|
|
55
|
+
NoRegret is an open-source software library for no-regret learning dynamics and computational game solving, developed by the Universal, Open, Free, and Transparent Computer Poker Research Group. NoRegret implements an extensive array of regret minimizers and game solvers, and also supports GPU-acceleration. The library can be used in a variety of use cases, from solving games to conducting research in online convex optimization. NoRegret's reliability has been established through extensive doctests and unit tests, achieving 95% code coverage.
|
|
56
56
|
|
|
57
57
|
Features
|
|
58
58
|
--------
|
|
@@ -94,8 +94,8 @@ The code snippet below demonstrates how one can solve games via regret minimizat
|
|
|
94
94
|
KERNEL = nr.FloatingPointKernel()
|
|
95
95
|
GAMES = {
|
|
96
96
|
'Rock paper superscissors': nr.to_efg(nr.RockPaperSuperscissors(KERNEL)),
|
|
97
|
-
'Kuhn poker': nr.
|
|
98
|
-
'Leduc poker': nr.
|
|
97
|
+
'Kuhn poker': nr.to_efg(KERNEL, nr.from_open_spiel('kuhn_poker')),
|
|
98
|
+
'Leduc poker': nr.to_efg(KERNEL, nr.from_open_spiel('leduc_poker')),
|
|
99
99
|
}
|
|
100
100
|
PARAMETERS = {
|
|
101
101
|
'CFR': (nr.CFR, False, False),
|
|
@@ -180,7 +180,7 @@ The code snippet below demonstrates how one can solve games while leveraging GPU
|
|
|
180
180
|
import noregret as nr
|
|
181
181
|
|
|
182
182
|
KERNEL = nr.CUDAKernel()
|
|
183
|
-
GAME = nr.
|
|
183
|
+
GAME = nr.to_efg(KERNEL, nr.from_open_spiel('liars_dice'))
|
|
184
184
|
PARAMETERS = nr.CFR, True, False
|
|
185
185
|
|
|
186
186
|
|
|
@@ -220,8 +220,8 @@ The code snippet below demonstrates how one can solve games via linear programmi
|
|
|
220
220
|
KERNEL = nr.FloatingPointKernel()
|
|
221
221
|
GAMES = {
|
|
222
222
|
'Rock paper superscissors': nr.RockPaperSuperscissors(KERNEL),
|
|
223
|
-
'Kuhn poker': nr.
|
|
224
|
-
'Leduc poker': nr.
|
|
223
|
+
'Kuhn poker': nr.to_efg(KERNEL, nr.from_open_spiel('kuhn_poker')),
|
|
224
|
+
'Leduc poker': nr.to_efg(KERNEL, nr.from_open_spiel('leduc_poker')),
|
|
225
225
|
}
|
|
226
226
|
|
|
227
227
|
|
|
@@ -236,57 +236,6 @@ The code snippet below demonstrates how one can solve games via linear programmi
|
|
|
236
236
|
if __name__ == '__main__':
|
|
237
237
|
main()
|
|
238
238
|
|
|
239
|
-
Conduct Research in Online Convex Optimization
|
|
240
|
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
241
|
-
|
|
242
|
-
The code snippet below reproduces Leme, Piliouras, and Schneider (NeurIPS, 2024) using NoRegret.
|
|
243
|
-
|
|
244
|
-
.. code-block:: python
|
|
245
|
-
|
|
246
|
-
from functools import partial
|
|
247
|
-
|
|
248
|
-
import matplotlib.pyplot as plt
|
|
249
|
-
import noregret as nr
|
|
250
|
-
|
|
251
|
-
KERNEL = nr.FloatingPointKernel()
|
|
252
|
-
GAME = nr.RockPaperScissorsPlus(KERNEL)
|
|
253
|
-
R_type = partial(nr.MWU, learning_rate=1e-3)
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
def main():
|
|
257
|
-
RM = R_type(KERNEL, GAME.row_dimension, is_time_symmetric=False)
|
|
258
|
-
BM_RM = nr.BM(KERNEL, GAME.row_dimension, R_type, is_time_symmetric=False)
|
|
259
|
-
|
|
260
|
-
nr.symmetric_regret_minimization(GAME, RM, iteration_count=100000)
|
|
261
|
-
nr.symmetric_regret_minimization(GAME, BM_RM, iteration_count=100000)
|
|
262
|
-
x, _ = nr.linear_programming(GAME)
|
|
263
|
-
|
|
264
|
-
strategies = KERNEL.numpy.array(RM.strategies)
|
|
265
|
-
|
|
266
|
-
plt.clf()
|
|
267
|
-
plt.plot(strategies[:, 0], strategies[:, 1])
|
|
268
|
-
plt.plot(strategies[-1, 0], strategies[-1, 1], 'bo')
|
|
269
|
-
plt.plot(*x[:2], 'ro')
|
|
270
|
-
plt.xlabel('Probability of action 1')
|
|
271
|
-
plt.ylabel('Probability of action 2')
|
|
272
|
-
plt.title('No-external regret dynamics')
|
|
273
|
-
plt.show()
|
|
274
|
-
|
|
275
|
-
strategies = KERNEL.numpy.array(BM_RM.strategies)
|
|
276
|
-
|
|
277
|
-
plt.clf()
|
|
278
|
-
plt.plot(strategies[:, 0], strategies[:, 1])
|
|
279
|
-
plt.plot(strategies[-1, 0], strategies[-1, 1], 'bo')
|
|
280
|
-
plt.plot(*x[:2], 'ro')
|
|
281
|
-
plt.xlabel('Probability of action 1')
|
|
282
|
-
plt.ylabel('Probability of action 2')
|
|
283
|
-
plt.title('No-swap regret dynamics')
|
|
284
|
-
plt.show()
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
if __name__ == '__main__':
|
|
288
|
-
main()
|
|
289
|
-
|
|
290
239
|
Testing and Validation
|
|
291
240
|
----------------------
|
|
292
241
|
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
NoRegret
|
|
3
3
|
========
|
|
4
4
|
|
|
5
|
-
NoRegret is an open-source software library for no-regret learning dynamics and computational game solving, developed by the Universal, Open, Free, and Transparent Computer Poker Research Group. NoRegret implements an extensive array of regret minimizers and game solvers, and also supports GPU-acceleration. The library can be used in a variety of use cases, from solving games to conducting research in online convex optimization. NoRegret's reliability has been established through extensive doctests and unit tests, achieving
|
|
5
|
+
NoRegret is an open-source software library for no-regret learning dynamics and computational game solving, developed by the Universal, Open, Free, and Transparent Computer Poker Research Group. NoRegret implements an extensive array of regret minimizers and game solvers, and also supports GPU-acceleration. The library can be used in a variety of use cases, from solving games to conducting research in online convex optimization. NoRegret's reliability has been established through extensive doctests and unit tests, achieving 95% code coverage.
|
|
6
6
|
|
|
7
7
|
Features
|
|
8
8
|
--------
|
|
@@ -44,8 +44,8 @@ The code snippet below demonstrates how one can solve games via regret minimizat
|
|
|
44
44
|
KERNEL = nr.FloatingPointKernel()
|
|
45
45
|
GAMES = {
|
|
46
46
|
'Rock paper superscissors': nr.to_efg(nr.RockPaperSuperscissors(KERNEL)),
|
|
47
|
-
'Kuhn poker': nr.
|
|
48
|
-
'Leduc poker': nr.
|
|
47
|
+
'Kuhn poker': nr.to_efg(KERNEL, nr.from_open_spiel('kuhn_poker')),
|
|
48
|
+
'Leduc poker': nr.to_efg(KERNEL, nr.from_open_spiel('leduc_poker')),
|
|
49
49
|
}
|
|
50
50
|
PARAMETERS = {
|
|
51
51
|
'CFR': (nr.CFR, False, False),
|
|
@@ -130,7 +130,7 @@ The code snippet below demonstrates how one can solve games while leveraging GPU
|
|
|
130
130
|
import noregret as nr
|
|
131
131
|
|
|
132
132
|
KERNEL = nr.CUDAKernel()
|
|
133
|
-
GAME = nr.
|
|
133
|
+
GAME = nr.to_efg(KERNEL, nr.from_open_spiel('liars_dice'))
|
|
134
134
|
PARAMETERS = nr.CFR, True, False
|
|
135
135
|
|
|
136
136
|
|
|
@@ -170,8 +170,8 @@ The code snippet below demonstrates how one can solve games via linear programmi
|
|
|
170
170
|
KERNEL = nr.FloatingPointKernel()
|
|
171
171
|
GAMES = {
|
|
172
172
|
'Rock paper superscissors': nr.RockPaperSuperscissors(KERNEL),
|
|
173
|
-
'Kuhn poker': nr.
|
|
174
|
-
'Leduc poker': nr.
|
|
173
|
+
'Kuhn poker': nr.to_efg(KERNEL, nr.from_open_spiel('kuhn_poker')),
|
|
174
|
+
'Leduc poker': nr.to_efg(KERNEL, nr.from_open_spiel('leduc_poker')),
|
|
175
175
|
}
|
|
176
176
|
|
|
177
177
|
|
|
@@ -186,57 +186,6 @@ The code snippet below demonstrates how one can solve games via linear programmi
|
|
|
186
186
|
if __name__ == '__main__':
|
|
187
187
|
main()
|
|
188
188
|
|
|
189
|
-
Conduct Research in Online Convex Optimization
|
|
190
|
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
191
|
-
|
|
192
|
-
The code snippet below reproduces Leme, Piliouras, and Schneider (NeurIPS, 2024) using NoRegret.
|
|
193
|
-
|
|
194
|
-
.. code-block:: python
|
|
195
|
-
|
|
196
|
-
from functools import partial
|
|
197
|
-
|
|
198
|
-
import matplotlib.pyplot as plt
|
|
199
|
-
import noregret as nr
|
|
200
|
-
|
|
201
|
-
KERNEL = nr.FloatingPointKernel()
|
|
202
|
-
GAME = nr.RockPaperScissorsPlus(KERNEL)
|
|
203
|
-
R_type = partial(nr.MWU, learning_rate=1e-3)
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
def main():
|
|
207
|
-
RM = R_type(KERNEL, GAME.row_dimension, is_time_symmetric=False)
|
|
208
|
-
BM_RM = nr.BM(KERNEL, GAME.row_dimension, R_type, is_time_symmetric=False)
|
|
209
|
-
|
|
210
|
-
nr.symmetric_regret_minimization(GAME, RM, iteration_count=100000)
|
|
211
|
-
nr.symmetric_regret_minimization(GAME, BM_RM, iteration_count=100000)
|
|
212
|
-
x, _ = nr.linear_programming(GAME)
|
|
213
|
-
|
|
214
|
-
strategies = KERNEL.numpy.array(RM.strategies)
|
|
215
|
-
|
|
216
|
-
plt.clf()
|
|
217
|
-
plt.plot(strategies[:, 0], strategies[:, 1])
|
|
218
|
-
plt.plot(strategies[-1, 0], strategies[-1, 1], 'bo')
|
|
219
|
-
plt.plot(*x[:2], 'ro')
|
|
220
|
-
plt.xlabel('Probability of action 1')
|
|
221
|
-
plt.ylabel('Probability of action 2')
|
|
222
|
-
plt.title('No-external regret dynamics')
|
|
223
|
-
plt.show()
|
|
224
|
-
|
|
225
|
-
strategies = KERNEL.numpy.array(BM_RM.strategies)
|
|
226
|
-
|
|
227
|
-
plt.clf()
|
|
228
|
-
plt.plot(strategies[:, 0], strategies[:, 1])
|
|
229
|
-
plt.plot(strategies[-1, 0], strategies[-1, 1], 'bo')
|
|
230
|
-
plt.plot(*x[:2], 'ro')
|
|
231
|
-
plt.xlabel('Probability of action 1')
|
|
232
|
-
plt.ylabel('Probability of action 2')
|
|
233
|
-
plt.title('No-swap regret dynamics')
|
|
234
|
-
plt.show()
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
if __name__ == '__main__':
|
|
238
|
-
main()
|
|
239
|
-
|
|
240
189
|
Testing and Validation
|
|
241
190
|
----------------------
|
|
242
191
|
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
from noregret.games import (
|
|
3
3
|
AssuranceGame,
|
|
4
4
|
BattleOfTheSexes,
|
|
5
|
+
BlackBoxGame,
|
|
5
6
|
Chicken,
|
|
6
7
|
ExtensiveFormGame,
|
|
7
8
|
from_open_spiel,
|
|
@@ -36,6 +37,7 @@ from noregret.kernels import (
|
|
|
36
37
|
from noregret.regret_minimizers import (
|
|
37
38
|
BlumMansour,
|
|
38
39
|
CounterfactualRegretMinimization,
|
|
40
|
+
CounterfactualRegretMinimization2,
|
|
39
41
|
CounterfactualRegretMinimizationPlus,
|
|
40
42
|
DiscountedCounterfactualRegretMinimization,
|
|
41
43
|
DiscountedRegretMatching,
|
|
@@ -65,6 +67,8 @@ BM = BlumMansour
|
|
|
65
67
|
"""Alias for :class:`noregret.BlumMansour`."""
|
|
66
68
|
CFR = CounterfactualRegretMinimization
|
|
67
69
|
"""Alias for :class:`noregret.CounterfactualRegretMinimization`."""
|
|
70
|
+
CFR2 = CounterfactualRegretMinimization2
|
|
71
|
+
"""Alias for :class:`noregret.CounterfactualRegretMinimization2`."""
|
|
68
72
|
CFR_plus = CounterfactualRegretMinimizationPlus
|
|
69
73
|
"""Alias for :class:`noregret.CounterfactualRegretMinimizationPlus`."""
|
|
70
74
|
DCFR = DiscountedCounterfactualRegretMinimization
|
|
@@ -111,12 +115,14 @@ to_efg = to_extensive_form
|
|
|
111
115
|
__all__ = (
|
|
112
116
|
'AssuranceGame',
|
|
113
117
|
'BattleOfTheSexes',
|
|
118
|
+
'BlackBoxGame',
|
|
114
119
|
'BlumMansour',
|
|
115
120
|
'BM',
|
|
116
121
|
'CFR',
|
|
117
122
|
'CFR_plus',
|
|
118
123
|
'Chicken',
|
|
119
124
|
'CounterfactualRegretMinimization',
|
|
125
|
+
'CounterfactualRegretMinimization2',
|
|
120
126
|
'CounterfactualRegretMinimizationPlus',
|
|
121
127
|
'CUDAKernel',
|
|
122
128
|
'DCFR',
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""Module for games."""
|
|
2
|
+
from noregret.games.black_box import BlackBoxGame, from_open_spiel
|
|
2
3
|
from noregret.games.extensive_form import (
|
|
3
4
|
ExtensiveFormGame,
|
|
4
5
|
TwoPlayerExtensiveFormGame,
|
|
@@ -26,11 +27,12 @@ from noregret.games.normal_form import (
|
|
|
26
27
|
TwoPlayerNormalFormGame,
|
|
27
28
|
TwoPlayerZeroSumNormalFormGame,
|
|
28
29
|
)
|
|
29
|
-
from noregret.games.utilities import
|
|
30
|
+
from noregret.games.utilities import to_extensive_form
|
|
30
31
|
|
|
31
32
|
__all__ = (
|
|
32
33
|
'AssuranceGame',
|
|
33
34
|
'BattleOfTheSexes',
|
|
35
|
+
'BlackBoxGame',
|
|
34
36
|
'Chicken',
|
|
35
37
|
'ExtensiveFormGame',
|
|
36
38
|
'from_open_spiel',
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
"""Module for black box games."""
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from functools import partial
|
|
5
|
+
|
|
6
|
+
from ordered_set import OrderedSet
|
|
7
|
+
from pyspiel import GameType, load_game
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class BlackBoxGame(ABC):
|
|
12
|
+
"""Abstract base class for black box games."""
|
|
13
|
+
|
|
14
|
+
@property
|
|
15
|
+
@abstractmethod
|
|
16
|
+
def player_count(self):
|
|
17
|
+
"""Return the number of players.
|
|
18
|
+
|
|
19
|
+
:return: Number of players.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def is_two_player(self):
|
|
24
|
+
"""Return whether the game is two-player.
|
|
25
|
+
|
|
26
|
+
:return: Whether the game is two-player.
|
|
27
|
+
"""
|
|
28
|
+
return self.player_count == 2
|
|
29
|
+
|
|
30
|
+
@property
|
|
31
|
+
@abstractmethod
|
|
32
|
+
def is_zero_sum(self):
|
|
33
|
+
"""Return whether the game is zero-sum.
|
|
34
|
+
|
|
35
|
+
:return: Whether the game is zero-sum.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
@abstractmethod
|
|
40
|
+
def root_node(self):
|
|
41
|
+
"""Return the root node.
|
|
42
|
+
|
|
43
|
+
:return: Root node.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
@abstractmethod
|
|
47
|
+
def actions(self, node):
|
|
48
|
+
"""Return the actions given a node.
|
|
49
|
+
|
|
50
|
+
:param node: Node.
|
|
51
|
+
:return: Actions.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
@abstractmethod
|
|
55
|
+
def apply(self, node, action):
|
|
56
|
+
"""Return the child node given a node and an action.
|
|
57
|
+
|
|
58
|
+
:param node: Node.
|
|
59
|
+
:param action: Action.
|
|
60
|
+
:return: Child node.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
def children(self, node):
|
|
64
|
+
"""Return the children given a node.
|
|
65
|
+
|
|
66
|
+
:return: Children.
|
|
67
|
+
"""
|
|
68
|
+
return list(map(partial(self.apply, node), self.actions(node)))
|
|
69
|
+
|
|
70
|
+
def actions_and_children(self, node):
|
|
71
|
+
"""Return the actions and children given a node.
|
|
72
|
+
|
|
73
|
+
:return: Actions and children.
|
|
74
|
+
"""
|
|
75
|
+
A = self.actions(node)
|
|
76
|
+
|
|
77
|
+
return A, list(map(partial(self.apply, node), A))
|
|
78
|
+
|
|
79
|
+
@abstractmethod
|
|
80
|
+
def player(self, node):
|
|
81
|
+
"""Return the player given a node.
|
|
82
|
+
|
|
83
|
+
:param node: Node.
|
|
84
|
+
:return: Player.
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
@abstractmethod
|
|
88
|
+
def utility(self, node, player):
|
|
89
|
+
"""Return the utility given a player and a node.
|
|
90
|
+
|
|
91
|
+
:param node: Node.
|
|
92
|
+
:param player: Player.
|
|
93
|
+
:return: Utility.
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
def utilities(self, node):
|
|
97
|
+
"""Return the utilities given a node.
|
|
98
|
+
|
|
99
|
+
:param node: Node.
|
|
100
|
+
:return: Utilities.
|
|
101
|
+
"""
|
|
102
|
+
return list(map(partial(self.utility, node), range(self.player_count)))
|
|
103
|
+
|
|
104
|
+
@abstractmethod
|
|
105
|
+
def information_set(self, node):
|
|
106
|
+
"""Return the information set given a node.
|
|
107
|
+
|
|
108
|
+
:param node: Node.
|
|
109
|
+
:return: information set.
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
@abstractmethod
|
|
113
|
+
def chance_probability(self, node, action):
|
|
114
|
+
"""Return the chance probability given a node and an action.
|
|
115
|
+
|
|
116
|
+
:param node: Node.
|
|
117
|
+
:param action: Action.
|
|
118
|
+
:return: Chance probability.
|
|
119
|
+
"""
|
|
120
|
+
|
|
121
|
+
def chance_probabilities(self, node):
|
|
122
|
+
"""Return the chance probabilities given a node.
|
|
123
|
+
|
|
124
|
+
:param node: Node.
|
|
125
|
+
:return: Chance probabilities.
|
|
126
|
+
"""
|
|
127
|
+
A = self.actions(node)
|
|
128
|
+
|
|
129
|
+
return list(map(partial(self.chance_probability, node), A))
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
@dataclass
|
|
133
|
+
class _OpenSpielBlackBoxGame(BlackBoxGame):
|
|
134
|
+
game: str
|
|
135
|
+
_game: str = field(init=False)
|
|
136
|
+
|
|
137
|
+
def __post_init__(self):
|
|
138
|
+
self._game = load_game(self.game)
|
|
139
|
+
|
|
140
|
+
@property
|
|
141
|
+
def player_count(self):
|
|
142
|
+
return self._game.num_players()
|
|
143
|
+
|
|
144
|
+
@property
|
|
145
|
+
def is_zero_sum(self):
|
|
146
|
+
return self._game.get_type().utility == GameType.Utility.ZERO_SUM
|
|
147
|
+
|
|
148
|
+
@property
|
|
149
|
+
def root_node(self):
|
|
150
|
+
return self._game.new_initial_state()
|
|
151
|
+
|
|
152
|
+
def actions(self, node):
|
|
153
|
+
return OrderedSet(map(node.action_to_string, node.legal_actions()))
|
|
154
|
+
|
|
155
|
+
def apply(self, node, action):
|
|
156
|
+
return node.child(node.string_to_action(action))
|
|
157
|
+
|
|
158
|
+
def children(self, node):
|
|
159
|
+
return list(node.child(a) for a in node.legal_actions())
|
|
160
|
+
|
|
161
|
+
def actions_and_children(self, node):
|
|
162
|
+
actions = []
|
|
163
|
+
children = []
|
|
164
|
+
|
|
165
|
+
for a in node.legal_actions():
|
|
166
|
+
actions.append(node.action_to_string(a))
|
|
167
|
+
children.append(node.child(a))
|
|
168
|
+
|
|
169
|
+
return OrderedSet(actions), children
|
|
170
|
+
|
|
171
|
+
def player(self, node):
|
|
172
|
+
i = node.current_player()
|
|
173
|
+
|
|
174
|
+
return None if i == -1 else i
|
|
175
|
+
|
|
176
|
+
def utility(self, node, player):
|
|
177
|
+
return node.player_reward(player)
|
|
178
|
+
|
|
179
|
+
def utilities(self, node):
|
|
180
|
+
return node.rewards()
|
|
181
|
+
|
|
182
|
+
def information_set(self, node):
|
|
183
|
+
return node.information_state_string()
|
|
184
|
+
|
|
185
|
+
def chance_probability(self, node, action):
|
|
186
|
+
return node.chance_outcomes()[self.actions(node).index(action)][1]
|
|
187
|
+
|
|
188
|
+
def chance_probabilities(self, node):
|
|
189
|
+
return [p for _, p in node.chance_outcomes()]
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def from_open_spiel(game):
|
|
193
|
+
"""Load a game from OpenSpiel.
|
|
194
|
+
|
|
195
|
+
:param game: Game in OpenSpiel.
|
|
196
|
+
:return: Game.
|
|
197
|
+
"""
|
|
198
|
+
return _OpenSpielBlackBoxGame(game)
|
|
@@ -122,12 +122,12 @@ class TwoPlayerExtensiveFormGame(TwoPlayerMultilinearGame, ExtensiveFormGame):
|
|
|
122
122
|
def row_best_response_value(self, column_strategy):
|
|
123
123
|
u = self.row_utility(column_strategy)
|
|
124
124
|
|
|
125
|
-
return self.
|
|
125
|
+
return self.row_sequence_form_polytope.best_response_value(u)
|
|
126
126
|
|
|
127
127
|
def column_best_response_value(self, row_strategy):
|
|
128
128
|
v = self.column_utility(row_strategy)
|
|
129
129
|
|
|
130
|
-
return self.
|
|
130
|
+
return self.column_sequence_form_polytope.best_response_value(v)
|
|
131
131
|
|
|
132
132
|
|
|
133
133
|
@dataclass
|
|
@@ -22,6 +22,7 @@ class Game(ABC):
|
|
|
22
22
|
:return: Number of players.
|
|
23
23
|
"""
|
|
24
24
|
|
|
25
|
+
@property
|
|
25
26
|
@abstractmethod
|
|
26
27
|
def is_symmetric(self):
|
|
27
28
|
"""Return whether the game is symmetric.
|
|
@@ -97,12 +98,14 @@ class Game(ABC):
|
|
|
97
98
|
:param strategy_profile: Strategy profile.
|
|
98
99
|
:return: Nash gap.
|
|
99
100
|
"""
|
|
100
|
-
expected_utilities = self.expected_utilities(strategy_profile)
|
|
101
|
-
best_response_values = self.best_response_values(strategy_profile)
|
|
101
|
+
expected_utilities = self.expected_utilities(*strategy_profile)
|
|
102
|
+
best_response_values = self.best_response_values(*strategy_profile)
|
|
103
|
+
nash_gap = 0
|
|
102
104
|
|
|
103
|
-
|
|
105
|
+
for u, u_prime in zip(best_response_values, expected_utilities):
|
|
106
|
+
assert u >= u_prime
|
|
104
107
|
|
|
105
|
-
|
|
108
|
+
nash_gap += u - u_prime
|
|
106
109
|
|
|
107
110
|
return nash_gap
|
|
108
111
|
|
|
@@ -39,6 +39,7 @@ class MultilinearGame(Game, ABC):
|
|
|
39
39
|
"""
|
|
40
40
|
return tuple(self.dimension(i) for i in range(self.player_count))
|
|
41
41
|
|
|
42
|
+
@property
|
|
42
43
|
def is_symmetric(self):
|
|
43
44
|
raise NotImplementedError
|
|
44
45
|
|
|
@@ -100,6 +101,7 @@ class TwoPlayerMultilinearGame(TwoPlayerGame, MultilinearGame, ABC):
|
|
|
100
101
|
"""
|
|
101
102
|
return self.payoffs[1]
|
|
102
103
|
|
|
104
|
+
@property
|
|
103
105
|
def is_symmetric(self):
|
|
104
106
|
np = self.kernel.numpy
|
|
105
107
|
|
|
@@ -120,12 +122,6 @@ class TwoPlayerMultilinearGame(TwoPlayerGame, MultilinearGame, ABC):
|
|
|
120
122
|
def expected_column_utility(self, row_strategy, column_strategy):
|
|
121
123
|
return row_strategy @ self.column_payoffs @ column_strategy
|
|
122
124
|
|
|
123
|
-
def expected_utility(self, player, row_strategy, column_strategy):
|
|
124
|
-
return row_strategy @ self.payoffs[player] @ column_strategy
|
|
125
|
-
|
|
126
|
-
def expected_utilities(self, row_strategy, column_strategy):
|
|
127
|
-
return row_strategy @ self.payoffs @ column_strategy
|
|
128
|
-
|
|
129
125
|
|
|
130
126
|
@dataclass
|
|
131
127
|
class TwoPlayerZeroSumMultilinearGame(
|
|
@@ -142,7 +138,7 @@ class TwoPlayerZeroSumMultilinearGame(
|
|
|
142
138
|
def __post_init__(self):
|
|
143
139
|
super(MultilinearGame, self).__post_init__()
|
|
144
140
|
|
|
145
|
-
if self.payoffs.shape !=
|
|
141
|
+
if self.payoffs.shape != self.dimensions:
|
|
146
142
|
raise ValueError('inconsistent dimensions')
|
|
147
143
|
|
|
148
144
|
@property
|
|
@@ -90,10 +90,13 @@ class TwoPlayerNormalFormGame(TwoPlayerMultilinearGame, NormalFormGame):
|
|
|
90
90
|
"""
|
|
91
91
|
return len(self.column_actions)
|
|
92
92
|
|
|
93
|
-
def
|
|
93
|
+
def expected_utilities(self, row_strategy, column_strategy):
|
|
94
|
+
return row_strategy @ self.payoffs @ column_strategy
|
|
95
|
+
|
|
96
|
+
def row_best_response_value(self, column_strategy):
|
|
94
97
|
return self.row_utility(column_strategy).max()
|
|
95
98
|
|
|
96
|
-
def column_best_response_value(self,
|
|
99
|
+
def column_best_response_value(self, row_strategy):
|
|
97
100
|
return self.column_utility(row_strategy).max()
|
|
98
101
|
|
|
99
102
|
|