noregret 0.0.0.dev4__tar.gz → 0.0.0.dev5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/PKG-INFO +6 -57
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/README.rst +5 -56
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/__init__.py +6 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/__init__.py +3 -1
- noregret-0.0.0.dev5/noregret/games/black_box.py +200 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/games.py +1 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/multilinear.py +2 -0
- noregret-0.0.0.dev5/noregret/games/utilities.py +140 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/kernels.py +0 -1
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/regret_minimizers/__init__.py +2 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/regret_minimizers/probability_simplices.py +2 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/regret_minimizers/regret_minimizers.py +7 -2
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/regret_minimizers/sequence_form_polytopes.py +101 -12
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/sequence_form_polytopes.py +3 -3
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/solvers/regret_minimization.py +1 -1
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/tests/test_games.py +2 -2
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/tests/test_linear_programming.py +2 -2
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/tests/test_regret_minimization.py +40 -3
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret.egg-info/PKG-INFO +6 -57
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret.egg-info/SOURCES.txt +1 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/setup.py +1 -1
- noregret-0.0.0.dev4/noregret/games/utilities.py +0 -141
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/LICENSE +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/extensive_form/__init__.py +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/extensive_form/games.py +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/normal_form/__init__.py +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/normal_form/assurance-game.json +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/normal_form/battle-of-the-sexes.json +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/normal_form/chicken.json +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/normal_form/games.py +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/normal_form/gift-exchange-game.json +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/normal_form/matching-pennies.json +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/normal_form/prisoners-dilemma.json +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/normal_form/pure-coordination.json +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/normal_form/rock-paper-scissors-plus.json +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/normal_form/rock-paper-scissors.json +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/normal_form/rock-paper-superscissors.json +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/normal_form/stag-hunt.json +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/solvers/__init__.py +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/solvers/linear_programming.py +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/tests/__init__.py +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/tests/test_sequence_form_polytopes.py +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/utilities.py +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret.egg-info/dependency_links.txt +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret.egg-info/requires.txt +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret.egg-info/top_level.txt +0 -0
- {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: noregret
|
|
3
|
-
Version: 0.0.0.
|
|
3
|
+
Version: 0.0.0.dev5
|
|
4
4
|
Summary: No-regret learning dynamics
|
|
5
5
|
Home-page: https://github.com/uoftcprg/noregret
|
|
6
6
|
Author: Universal, Open, Free, and Transparent Computer Poker Research Group
|
|
@@ -94,8 +94,8 @@ The code snippet below demonstrates how one can solve games via regret minimizat
|
|
|
94
94
|
KERNEL = nr.FloatingPointKernel()
|
|
95
95
|
GAMES = {
|
|
96
96
|
'Rock paper superscissors': nr.to_efg(nr.RockPaperSuperscissors(KERNEL)),
|
|
97
|
-
'Kuhn poker': nr.
|
|
98
|
-
'Leduc poker': nr.
|
|
97
|
+
'Kuhn poker': nr.to_efg(KERNEL, nr.from_open_spiel('kuhn_poker')),
|
|
98
|
+
'Leduc poker': nr.to_efg(KERNEL, nr.from_open_spiel('leduc_poker')),
|
|
99
99
|
}
|
|
100
100
|
PARAMETERS = {
|
|
101
101
|
'CFR': (nr.CFR, False, False),
|
|
@@ -180,7 +180,7 @@ The code snippet below demonstrates how one can solve games while leveraging GPU
|
|
|
180
180
|
import noregret as nr
|
|
181
181
|
|
|
182
182
|
KERNEL = nr.CUDAKernel()
|
|
183
|
-
GAME = nr.
|
|
183
|
+
GAME = nr.to_efg(KERNEL, nr.from_open_spiel('liars_dice'))
|
|
184
184
|
PARAMETERS = nr.CFR, True, False
|
|
185
185
|
|
|
186
186
|
|
|
@@ -220,8 +220,8 @@ The code snippet below demonstrates how one can solve games via linear programmi
|
|
|
220
220
|
KERNEL = nr.FloatingPointKernel()
|
|
221
221
|
GAMES = {
|
|
222
222
|
'Rock paper superscissors': nr.RockPaperSuperscissors(KERNEL),
|
|
223
|
-
'Kuhn poker': nr.
|
|
224
|
-
'Leduc poker': nr.
|
|
223
|
+
'Kuhn poker': nr.to_efg(KERNEL, nr.from_open_spiel('kuhn_poker')),
|
|
224
|
+
'Leduc poker': nr.to_efg(KERNEL, nr.from_open_spiel('leduc_poker')),
|
|
225
225
|
}
|
|
226
226
|
|
|
227
227
|
|
|
@@ -236,57 +236,6 @@ The code snippet below demonstrates how one can solve games via linear programmi
|
|
|
236
236
|
if __name__ == '__main__':
|
|
237
237
|
main()
|
|
238
238
|
|
|
239
|
-
Conduct Research in Online Convex Optimization
|
|
240
|
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
241
|
-
|
|
242
|
-
The code snippet below reproduces Leme, Piliouras, and Schneider (NeurIPS, 2024) using NoRegret.
|
|
243
|
-
|
|
244
|
-
.. code-block:: python
|
|
245
|
-
|
|
246
|
-
from functools import partial
|
|
247
|
-
|
|
248
|
-
import matplotlib.pyplot as plt
|
|
249
|
-
import noregret as nr
|
|
250
|
-
|
|
251
|
-
KERNEL = nr.FloatingPointKernel()
|
|
252
|
-
GAME = nr.RockPaperScissorsPlus(KERNEL)
|
|
253
|
-
R_type = partial(nr.MWU, learning_rate=1e-3)
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
def main():
|
|
257
|
-
RM = R_type(KERNEL, GAME.row_dimension, is_time_symmetric=False)
|
|
258
|
-
BM_RM = nr.BM(KERNEL, GAME.row_dimension, R_type, is_time_symmetric=False)
|
|
259
|
-
|
|
260
|
-
nr.symmetric_regret_minimization(GAME, RM, iteration_count=100000)
|
|
261
|
-
nr.symmetric_regret_minimization(GAME, BM_RM, iteration_count=100000)
|
|
262
|
-
x, _ = nr.linear_programming(GAME)
|
|
263
|
-
|
|
264
|
-
strategies = KERNEL.numpy.array(RM.strategies)
|
|
265
|
-
|
|
266
|
-
plt.clf()
|
|
267
|
-
plt.plot(strategies[:, 0], strategies[:, 1])
|
|
268
|
-
plt.plot(strategies[-1, 0], strategies[-1, 1], 'bo')
|
|
269
|
-
plt.plot(*x[:2], 'ro')
|
|
270
|
-
plt.xlabel('Probability of action 1')
|
|
271
|
-
plt.ylabel('Probability of action 2')
|
|
272
|
-
plt.title('No-external regret dynamics')
|
|
273
|
-
plt.show()
|
|
274
|
-
|
|
275
|
-
strategies = KERNEL.numpy.array(BM_RM.strategies)
|
|
276
|
-
|
|
277
|
-
plt.clf()
|
|
278
|
-
plt.plot(strategies[:, 0], strategies[:, 1])
|
|
279
|
-
plt.plot(strategies[-1, 0], strategies[-1, 1], 'bo')
|
|
280
|
-
plt.plot(*x[:2], 'ro')
|
|
281
|
-
plt.xlabel('Probability of action 1')
|
|
282
|
-
plt.ylabel('Probability of action 2')
|
|
283
|
-
plt.title('No-swap regret dynamics')
|
|
284
|
-
plt.show()
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
if __name__ == '__main__':
|
|
288
|
-
main()
|
|
289
|
-
|
|
290
239
|
Testing and Validation
|
|
291
240
|
----------------------
|
|
292
241
|
|
|
@@ -44,8 +44,8 @@ The code snippet below demonstrates how one can solve games via regret minimizat
|
|
|
44
44
|
KERNEL = nr.FloatingPointKernel()
|
|
45
45
|
GAMES = {
|
|
46
46
|
'Rock paper superscissors': nr.to_efg(nr.RockPaperSuperscissors(KERNEL)),
|
|
47
|
-
'Kuhn poker': nr.
|
|
48
|
-
'Leduc poker': nr.
|
|
47
|
+
'Kuhn poker': nr.to_efg(KERNEL, nr.from_open_spiel('kuhn_poker')),
|
|
48
|
+
'Leduc poker': nr.to_efg(KERNEL, nr.from_open_spiel('leduc_poker')),
|
|
49
49
|
}
|
|
50
50
|
PARAMETERS = {
|
|
51
51
|
'CFR': (nr.CFR, False, False),
|
|
@@ -130,7 +130,7 @@ The code snippet below demonstrates how one can solve games while leveraging GPU
|
|
|
130
130
|
import noregret as nr
|
|
131
131
|
|
|
132
132
|
KERNEL = nr.CUDAKernel()
|
|
133
|
-
GAME = nr.
|
|
133
|
+
GAME = nr.to_efg(KERNEL, nr.from_open_spiel('liars_dice'))
|
|
134
134
|
PARAMETERS = nr.CFR, True, False
|
|
135
135
|
|
|
136
136
|
|
|
@@ -170,8 +170,8 @@ The code snippet below demonstrates how one can solve games via linear programmi
|
|
|
170
170
|
KERNEL = nr.FloatingPointKernel()
|
|
171
171
|
GAMES = {
|
|
172
172
|
'Rock paper superscissors': nr.RockPaperSuperscissors(KERNEL),
|
|
173
|
-
'Kuhn poker': nr.
|
|
174
|
-
'Leduc poker': nr.
|
|
173
|
+
'Kuhn poker': nr.to_efg(KERNEL, nr.from_open_spiel('kuhn_poker')),
|
|
174
|
+
'Leduc poker': nr.to_efg(KERNEL, nr.from_open_spiel('leduc_poker')),
|
|
175
175
|
}
|
|
176
176
|
|
|
177
177
|
|
|
@@ -186,57 +186,6 @@ The code snippet below demonstrates how one can solve games via linear programmi
|
|
|
186
186
|
if __name__ == '__main__':
|
|
187
187
|
main()
|
|
188
188
|
|
|
189
|
-
Conduct Research in Online Convex Optimization
|
|
190
|
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
191
|
-
|
|
192
|
-
The code snippet below reproduces Leme, Piliouras, and Schneider (NeurIPS, 2024) using NoRegret.
|
|
193
|
-
|
|
194
|
-
.. code-block:: python
|
|
195
|
-
|
|
196
|
-
from functools import partial
|
|
197
|
-
|
|
198
|
-
import matplotlib.pyplot as plt
|
|
199
|
-
import noregret as nr
|
|
200
|
-
|
|
201
|
-
KERNEL = nr.FloatingPointKernel()
|
|
202
|
-
GAME = nr.RockPaperScissorsPlus(KERNEL)
|
|
203
|
-
R_type = partial(nr.MWU, learning_rate=1e-3)
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
def main():
|
|
207
|
-
RM = R_type(KERNEL, GAME.row_dimension, is_time_symmetric=False)
|
|
208
|
-
BM_RM = nr.BM(KERNEL, GAME.row_dimension, R_type, is_time_symmetric=False)
|
|
209
|
-
|
|
210
|
-
nr.symmetric_regret_minimization(GAME, RM, iteration_count=100000)
|
|
211
|
-
nr.symmetric_regret_minimization(GAME, BM_RM, iteration_count=100000)
|
|
212
|
-
x, _ = nr.linear_programming(GAME)
|
|
213
|
-
|
|
214
|
-
strategies = KERNEL.numpy.array(RM.strategies)
|
|
215
|
-
|
|
216
|
-
plt.clf()
|
|
217
|
-
plt.plot(strategies[:, 0], strategies[:, 1])
|
|
218
|
-
plt.plot(strategies[-1, 0], strategies[-1, 1], 'bo')
|
|
219
|
-
plt.plot(*x[:2], 'ro')
|
|
220
|
-
plt.xlabel('Probability of action 1')
|
|
221
|
-
plt.ylabel('Probability of action 2')
|
|
222
|
-
plt.title('No-external regret dynamics')
|
|
223
|
-
plt.show()
|
|
224
|
-
|
|
225
|
-
strategies = KERNEL.numpy.array(BM_RM.strategies)
|
|
226
|
-
|
|
227
|
-
plt.clf()
|
|
228
|
-
plt.plot(strategies[:, 0], strategies[:, 1])
|
|
229
|
-
plt.plot(strategies[-1, 0], strategies[-1, 1], 'bo')
|
|
230
|
-
plt.plot(*x[:2], 'ro')
|
|
231
|
-
plt.xlabel('Probability of action 1')
|
|
232
|
-
plt.ylabel('Probability of action 2')
|
|
233
|
-
plt.title('No-swap regret dynamics')
|
|
234
|
-
plt.show()
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
if __name__ == '__main__':
|
|
238
|
-
main()
|
|
239
|
-
|
|
240
189
|
Testing and Validation
|
|
241
190
|
----------------------
|
|
242
191
|
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
from noregret.games import (
|
|
3
3
|
AssuranceGame,
|
|
4
4
|
BattleOfTheSexes,
|
|
5
|
+
BlackBoxGame,
|
|
5
6
|
Chicken,
|
|
6
7
|
ExtensiveFormGame,
|
|
7
8
|
from_open_spiel,
|
|
@@ -36,6 +37,7 @@ from noregret.kernels import (
|
|
|
36
37
|
from noregret.regret_minimizers import (
|
|
37
38
|
BlumMansour,
|
|
38
39
|
CounterfactualRegretMinimization,
|
|
40
|
+
CounterfactualRegretMinimization2,
|
|
39
41
|
CounterfactualRegretMinimizationPlus,
|
|
40
42
|
DiscountedCounterfactualRegretMinimization,
|
|
41
43
|
DiscountedRegretMatching,
|
|
@@ -65,6 +67,8 @@ BM = BlumMansour
|
|
|
65
67
|
"""Alias for :class:`noregret.BlumMansour`."""
|
|
66
68
|
CFR = CounterfactualRegretMinimization
|
|
67
69
|
"""Alias for :class:`noregret.CounterfactualRegretMinimization`."""
|
|
70
|
+
CFR2 = CounterfactualRegretMinimization2
|
|
71
|
+
"""Alias for :class:`noregret.CounterfactualRegretMinimization2`."""
|
|
68
72
|
CFR_plus = CounterfactualRegretMinimizationPlus
|
|
69
73
|
"""Alias for :class:`noregret.CounterfactualRegretMinimizationPlus`."""
|
|
70
74
|
DCFR = DiscountedCounterfactualRegretMinimization
|
|
@@ -111,12 +115,14 @@ to_efg = to_extensive_form
|
|
|
111
115
|
__all__ = (
|
|
112
116
|
'AssuranceGame',
|
|
113
117
|
'BattleOfTheSexes',
|
|
118
|
+
'BlackBoxGame',
|
|
114
119
|
'BlumMansour',
|
|
115
120
|
'BM',
|
|
116
121
|
'CFR',
|
|
117
122
|
'CFR_plus',
|
|
118
123
|
'Chicken',
|
|
119
124
|
'CounterfactualRegretMinimization',
|
|
125
|
+
'CounterfactualRegretMinimization2',
|
|
120
126
|
'CounterfactualRegretMinimizationPlus',
|
|
121
127
|
'CUDAKernel',
|
|
122
128
|
'DCFR',
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""Module for games."""
|
|
2
|
+
from noregret.games.black_box import BlackBoxGame, from_open_spiel
|
|
2
3
|
from noregret.games.extensive_form import (
|
|
3
4
|
ExtensiveFormGame,
|
|
4
5
|
TwoPlayerExtensiveFormGame,
|
|
@@ -26,11 +27,12 @@ from noregret.games.normal_form import (
|
|
|
26
27
|
TwoPlayerNormalFormGame,
|
|
27
28
|
TwoPlayerZeroSumNormalFormGame,
|
|
28
29
|
)
|
|
29
|
-
from noregret.games.utilities import
|
|
30
|
+
from noregret.games.utilities import to_extensive_form
|
|
30
31
|
|
|
31
32
|
__all__ = (
|
|
32
33
|
'AssuranceGame',
|
|
33
34
|
'BattleOfTheSexes',
|
|
35
|
+
'BlackBoxGame',
|
|
34
36
|
'Chicken',
|
|
35
37
|
'ExtensiveFormGame',
|
|
36
38
|
'from_open_spiel',
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
"""Module for black box games."""
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from functools import partial
|
|
5
|
+
|
|
6
|
+
from ordered_set import OrderedSet
|
|
7
|
+
from pyspiel import GameType, load_game
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class BlackBoxGame(ABC):
|
|
12
|
+
"""Abstract base class for black box games."""
|
|
13
|
+
|
|
14
|
+
@property
|
|
15
|
+
@abstractmethod
|
|
16
|
+
def player_count(self):
|
|
17
|
+
"""Return the number of players.
|
|
18
|
+
|
|
19
|
+
:return: Number of players.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def is_two_player(self):
|
|
24
|
+
"""Return whether the game is two-player.
|
|
25
|
+
|
|
26
|
+
:return: Whether the game is two-player.
|
|
27
|
+
"""
|
|
28
|
+
return self.player_count == 2
|
|
29
|
+
|
|
30
|
+
@property
|
|
31
|
+
@abstractmethod
|
|
32
|
+
def is_zero_sum(self):
|
|
33
|
+
"""Return whether the game is zero-sum.
|
|
34
|
+
|
|
35
|
+
:return: Whether the game is zero-sum.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
@abstractmethod
|
|
40
|
+
def root_node(self):
|
|
41
|
+
"""Return the root node.
|
|
42
|
+
|
|
43
|
+
:return: Root node.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
@abstractmethod
|
|
47
|
+
def actions(self, node):
|
|
48
|
+
"""Return the actions given a node.
|
|
49
|
+
|
|
50
|
+
:param node: Node.
|
|
51
|
+
:return: Actions.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
@abstractmethod
|
|
55
|
+
def apply(self, node, action):
|
|
56
|
+
"""Return the child node given a node and an action.
|
|
57
|
+
|
|
58
|
+
:param node: Node.
|
|
59
|
+
:param action: Action.
|
|
60
|
+
:return: Child node.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
def children(self, node):
|
|
64
|
+
"""Return the children given a node.
|
|
65
|
+
|
|
66
|
+
:return: Children.
|
|
67
|
+
"""
|
|
68
|
+
return list(map(partial(self.apply, node), self.actions(node)))
|
|
69
|
+
|
|
70
|
+
def actions_and_children(self, node):
|
|
71
|
+
"""Return the actions and children given a node.
|
|
72
|
+
|
|
73
|
+
:return: Actions and children.
|
|
74
|
+
"""
|
|
75
|
+
A = self.actions(node)
|
|
76
|
+
|
|
77
|
+
return A, list(map(partial(self.apply, node), A))
|
|
78
|
+
|
|
79
|
+
@abstractmethod
|
|
80
|
+
def player(self, node):
|
|
81
|
+
"""Return the player given a node.
|
|
82
|
+
|
|
83
|
+
:param node: Node.
|
|
84
|
+
:return: Player.
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
@abstractmethod
|
|
88
|
+
def utility(self, player, node):
|
|
89
|
+
"""Return the utility given a player and a node.
|
|
90
|
+
|
|
91
|
+
:param player: Player.
|
|
92
|
+
:param node: Node.
|
|
93
|
+
:return: Utility.
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
def utilities(self, node):
|
|
97
|
+
"""Return the utilities given a node.
|
|
98
|
+
|
|
99
|
+
:param node: Node.
|
|
100
|
+
:return: Utilities.
|
|
101
|
+
"""
|
|
102
|
+
P = range(self.player_count)
|
|
103
|
+
|
|
104
|
+
return list(map(self.utility(i, node) for i in P))
|
|
105
|
+
|
|
106
|
+
@abstractmethod
|
|
107
|
+
def information_set(self, node):
|
|
108
|
+
"""Return the information set given a node.
|
|
109
|
+
|
|
110
|
+
:param node: Node.
|
|
111
|
+
:return: information set.
|
|
112
|
+
"""
|
|
113
|
+
|
|
114
|
+
@abstractmethod
|
|
115
|
+
def chance_probability(self, node, action):
|
|
116
|
+
"""Return the chance probability given a node and an action.
|
|
117
|
+
|
|
118
|
+
:param node: Node.
|
|
119
|
+
:param action: Action.
|
|
120
|
+
:return: Chance probability.
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
def chance_probabilities(self, node):
|
|
124
|
+
"""Return the chance probabilities given a node.
|
|
125
|
+
|
|
126
|
+
:param node: Node.
|
|
127
|
+
:return: Chance probabilities.
|
|
128
|
+
"""
|
|
129
|
+
A = self.actions(node)
|
|
130
|
+
|
|
131
|
+
return list(map(self.chance_probability(node, a) for a in A))
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
@dataclass
|
|
135
|
+
class _OpenSpielBlackBoxGame(BlackBoxGame):
|
|
136
|
+
game: str
|
|
137
|
+
_game: str = field(init=False)
|
|
138
|
+
|
|
139
|
+
def __post_init__(self):
|
|
140
|
+
self._game = load_game(self.game)
|
|
141
|
+
|
|
142
|
+
@property
|
|
143
|
+
def player_count(self):
|
|
144
|
+
return self._game.num_players()
|
|
145
|
+
|
|
146
|
+
@property
|
|
147
|
+
def is_zero_sum(self):
|
|
148
|
+
return self._game.get_type().utility == GameType.Utility.ZERO_SUM
|
|
149
|
+
|
|
150
|
+
@property
|
|
151
|
+
def root_node(self):
|
|
152
|
+
return self._game.new_initial_state()
|
|
153
|
+
|
|
154
|
+
def actions(self, node):
|
|
155
|
+
return OrderedSet(map(node.action_to_string, node.legal_actions()))
|
|
156
|
+
|
|
157
|
+
def apply(self, node, action):
|
|
158
|
+
return node.child(node.string_to_action(action))
|
|
159
|
+
|
|
160
|
+
def children(self, node):
|
|
161
|
+
return list(node.child(a) for a in node.legal_actions())
|
|
162
|
+
|
|
163
|
+
def actions_and_children(self, node):
|
|
164
|
+
actions = []
|
|
165
|
+
children = []
|
|
166
|
+
|
|
167
|
+
for a in node.legal_actions():
|
|
168
|
+
actions.append(node.action_to_string(a))
|
|
169
|
+
children.append(node.child(a))
|
|
170
|
+
|
|
171
|
+
return actions, children
|
|
172
|
+
|
|
173
|
+
def player(self, node):
|
|
174
|
+
i = node.current_player()
|
|
175
|
+
|
|
176
|
+
return None if i == -1 else i
|
|
177
|
+
|
|
178
|
+
def utility(self, player, node):
|
|
179
|
+
return node.player_reward(player)
|
|
180
|
+
|
|
181
|
+
def utilities(self, node):
|
|
182
|
+
return node.rewards()
|
|
183
|
+
|
|
184
|
+
def information_set(self, node):
|
|
185
|
+
return node.information_state_string()
|
|
186
|
+
|
|
187
|
+
def chance_probability(self, node, action):
|
|
188
|
+
return node.chance_outcomes()[self.actions(node).index(action)][1]
|
|
189
|
+
|
|
190
|
+
def chance_probabilities(self, node):
|
|
191
|
+
return [p for _, p in node.chance_outcomes()]
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def from_open_spiel(game):
|
|
195
|
+
"""Load a game from OpenSpiel.
|
|
196
|
+
|
|
197
|
+
:param game: Game in OpenSpiel.
|
|
198
|
+
:return: Game.
|
|
199
|
+
"""
|
|
200
|
+
return _OpenSpielBlackBoxGame(game)
|
|
@@ -39,6 +39,7 @@ class MultilinearGame(Game, ABC):
|
|
|
39
39
|
"""
|
|
40
40
|
return tuple(self.dimension(i) for i in range(self.player_count))
|
|
41
41
|
|
|
42
|
+
@property
|
|
42
43
|
def is_symmetric(self):
|
|
43
44
|
raise NotImplementedError
|
|
44
45
|
|
|
@@ -100,6 +101,7 @@ class TwoPlayerMultilinearGame(TwoPlayerGame, MultilinearGame, ABC):
|
|
|
100
101
|
"""
|
|
101
102
|
return self.payoffs[1]
|
|
102
103
|
|
|
104
|
+
@property
|
|
103
105
|
def is_symmetric(self):
|
|
104
106
|
np = self.kernel.numpy
|
|
105
107
|
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
from collections import defaultdict
|
|
2
|
+
from functools import partial, singledispatch
|
|
3
|
+
from itertools import starmap
|
|
4
|
+
|
|
5
|
+
from ordered_set import OrderedSet
|
|
6
|
+
from scipy.sparse import lil_array
|
|
7
|
+
|
|
8
|
+
from noregret.games.black_box import BlackBoxGame
|
|
9
|
+
from noregret.games.extensive_form.games import (
|
|
10
|
+
ExtensiveFormGame,
|
|
11
|
+
TwoPlayerExtensiveFormGame,
|
|
12
|
+
TwoPlayerZeroSumExtensiveFormGame,
|
|
13
|
+
)
|
|
14
|
+
from noregret.games.games import Game
|
|
15
|
+
from noregret.games.normal_form.games import (
|
|
16
|
+
NormalFormGame,
|
|
17
|
+
TwoPlayerNormalFormGame,
|
|
18
|
+
TwoPlayerZeroSumNormalFormGame,
|
|
19
|
+
)
|
|
20
|
+
from noregret.sequence_form_polytopes import SequenceFormPolytope
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _nfg2efg(kernel, game, decision_points=str):
|
|
24
|
+
np = kernel.numpy
|
|
25
|
+
scipy = kernel.scipy
|
|
26
|
+
dtype = kernel.data_type
|
|
27
|
+
|
|
28
|
+
if isinstance(game, TwoPlayerZeroSumNormalFormGame):
|
|
29
|
+
type_ = TwoPlayerZeroSumExtensiveFormGame
|
|
30
|
+
elif isinstance(game, TwoPlayerNormalFormGame):
|
|
31
|
+
type_ = TwoPlayerExtensiveFormGame
|
|
32
|
+
else:
|
|
33
|
+
type_ = ExtensiveFormGame
|
|
34
|
+
|
|
35
|
+
d = game.dimensions
|
|
36
|
+
|
|
37
|
+
if isinstance(game, TwoPlayerZeroSumNormalFormGame):
|
|
38
|
+
payoffs = np.zeros(tuple(n + 1 for n in d), dtype)
|
|
39
|
+
payoffs[tuple(slice(1, None) for _ in d)] = game.payoffs
|
|
40
|
+
else:
|
|
41
|
+
payoffs = np.zeros((game.player_count, *(n + 1 for n in d)), dtype)
|
|
42
|
+
payoffs[:, *(slice(1, None) for _ in d)] = game.payoffs
|
|
43
|
+
|
|
44
|
+
payoffs = scipy.sparse.csr_array(payoffs)
|
|
45
|
+
sfps = []
|
|
46
|
+
|
|
47
|
+
for i, A_j in enumerate(game.actions):
|
|
48
|
+
j = decision_points(i)
|
|
49
|
+
sfp = SequenceFormPolytope(kernel, {j: A_j}, {j: None})
|
|
50
|
+
|
|
51
|
+
sfps.append(sfp)
|
|
52
|
+
|
|
53
|
+
sfps = tuple(sfps)
|
|
54
|
+
|
|
55
|
+
return type_(kernel, payoffs, sfps)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _bbg2efg(kernel, game):
|
|
59
|
+
scipy = kernel.scipy
|
|
60
|
+
dtype = kernel.data_type
|
|
61
|
+
P = range(game.player_count)
|
|
62
|
+
A_js = [defaultdict(OrderedSet) for _ in P]
|
|
63
|
+
p_js = [{} for _ in P]
|
|
64
|
+
raw_payoffs = [defaultdict(int) for _ in P]
|
|
65
|
+
|
|
66
|
+
def dfs(h, p, seqs, us):
|
|
67
|
+
A_j, h_primes = game.actions_and_children(h)
|
|
68
|
+
i = game.player(h)
|
|
69
|
+
us = us.copy()
|
|
70
|
+
|
|
71
|
+
for i_prime, v in enumerate(game.utilities(h)):
|
|
72
|
+
us[i_prime] += v
|
|
73
|
+
|
|
74
|
+
if not A_j:
|
|
75
|
+
seqs = tuple(seqs)
|
|
76
|
+
|
|
77
|
+
for i_prime, u in enumerate(us):
|
|
78
|
+
raw_payoffs[i_prime][seqs] += p * u
|
|
79
|
+
elif i is None:
|
|
80
|
+
p_primes = game.chance_probabilities(h)
|
|
81
|
+
|
|
82
|
+
for h_prime, p_prime in zip(h_primes, p_primes):
|
|
83
|
+
dfs(h_prime, p_prime * p, seqs, us)
|
|
84
|
+
else:
|
|
85
|
+
j = game.information_set(h)
|
|
86
|
+
p_j = seqs[i]
|
|
87
|
+
p_js[i][j] = p_j
|
|
88
|
+
|
|
89
|
+
for a, h_prime in zip(A_j, h_primes):
|
|
90
|
+
next_seqs = seqs.copy()
|
|
91
|
+
next_seqs[i] = j, a
|
|
92
|
+
|
|
93
|
+
A_js[i][j].add(a)
|
|
94
|
+
dfs(h_prime, p, next_seqs, us)
|
|
95
|
+
|
|
96
|
+
dfs(game.root_node, 1, [None for _ in P], [0 for _ in P])
|
|
97
|
+
|
|
98
|
+
SFP = partial(SequenceFormPolytope, kernel)
|
|
99
|
+
sfps = tuple(starmap(SFP, zip(A_js, p_js)))
|
|
100
|
+
dimensions = tuple(sfp.column_count for sfp in sfps)
|
|
101
|
+
|
|
102
|
+
if game.is_two_player and game.is_zero_sum:
|
|
103
|
+
type_ = TwoPlayerZeroSumExtensiveFormGame
|
|
104
|
+
payoffs = lil_array(dimensions, dtype=dtype)
|
|
105
|
+
|
|
106
|
+
for seqs, u in raw_payoffs[0].items():
|
|
107
|
+
indices = []
|
|
108
|
+
|
|
109
|
+
for sfp, seq in zip(sfps, seqs):
|
|
110
|
+
indices.append(sfp.column(seq))
|
|
111
|
+
|
|
112
|
+
payoffs[tuple(indices)] = u
|
|
113
|
+
|
|
114
|
+
payoffs = scipy.sparse.csr_array(payoffs)
|
|
115
|
+
else:
|
|
116
|
+
raise NotImplementedError
|
|
117
|
+
|
|
118
|
+
return type_(kernel, payoffs, sfps)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
@singledispatch
|
|
122
|
+
def to_extensive_form(kernel, game):
|
|
123
|
+
"""Convert a given game to an extensive-form game.
|
|
124
|
+
|
|
125
|
+
:param game: Game.
|
|
126
|
+
:return: Extensive-form game.
|
|
127
|
+
"""
|
|
128
|
+
if isinstance(game, NormalFormGame):
|
|
129
|
+
game = _nfg2efg(kernel, game)
|
|
130
|
+
elif isinstance(game, BlackBoxGame):
|
|
131
|
+
game = _bbg2efg(kernel, game)
|
|
132
|
+
else:
|
|
133
|
+
raise ValueError('unknown game')
|
|
134
|
+
|
|
135
|
+
return game
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
@to_extensive_form.register
|
|
139
|
+
def _(game: Game):
|
|
140
|
+
return to_extensive_form(game.kernel, game)
|
|
@@ -19,6 +19,7 @@ from noregret.regret_minimizers.probability_simplices import (
|
|
|
19
19
|
)
|
|
20
20
|
from noregret.regret_minimizers.sequence_form_polytopes import (
|
|
21
21
|
CounterfactualRegretMinimization,
|
|
22
|
+
CounterfactualRegretMinimization2,
|
|
22
23
|
CounterfactualRegretMinimizationPlus,
|
|
23
24
|
DiscountedCounterfactualRegretMinimization,
|
|
24
25
|
SequenceFormPolytopeRegretMinimizer,
|
|
@@ -27,6 +28,7 @@ from noregret.regret_minimizers.sequence_form_polytopes import (
|
|
|
27
28
|
__all__ = (
|
|
28
29
|
'BlumMansour',
|
|
29
30
|
'CounterfactualRegretMinimization',
|
|
31
|
+
'CounterfactualRegretMinimization2',
|
|
30
32
|
'CounterfactualRegretMinimizationPlus',
|
|
31
33
|
'DiscountedCounterfactualRegretMinimization',
|
|
32
34
|
'DiscountedRegretMatching',
|
{noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/regret_minimizers/probability_simplices.py
RENAMED
|
@@ -270,6 +270,8 @@ class BlumMansour(ProbabilitySimplexSwapRegretMinimizer):
|
|
|
270
270
|
"""External regret minimizeres."""
|
|
271
271
|
|
|
272
272
|
def __post_init__(self):
|
|
273
|
+
super().__post_init__()
|
|
274
|
+
|
|
273
275
|
n = self.dimension
|
|
274
276
|
R_type = partial(self.regret_minimizer_type, self.kernel)
|
|
275
277
|
self.external_regret_minimizers = tuple(map(R_type, repeat(n, n)))
|
|
@@ -38,13 +38,18 @@ class RegretMinimizer(ABC):
|
|
|
38
38
|
"""Strategies."""
|
|
39
39
|
utilities: list[Any] = field(default_factory=list)
|
|
40
40
|
"""Utilities."""
|
|
41
|
-
|
|
41
|
+
dimension: int = field(init=False)
|
|
42
|
+
"""Dimension."""
|
|
43
|
+
_next_strategy: Any = field(default=None, init=False)
|
|
44
|
+
|
|
45
|
+
def __post_init__(self):
|
|
46
|
+
pass
|
|
42
47
|
|
|
43
48
|
@property
|
|
44
49
|
def next_strategy(self):
|
|
45
50
|
"""Return the next strategy.
|
|
46
51
|
|
|
47
|
-
:return:
|
|
52
|
+
:return: Next strategy.
|
|
48
53
|
"""
|
|
49
54
|
return self._next_strategy
|
|
50
55
|
|
{noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/regret_minimizers/sequence_form_polytopes.py
RENAMED
|
@@ -5,14 +5,14 @@ from typing import Any
|
|
|
5
5
|
from abc import ABC
|
|
6
6
|
|
|
7
7
|
from noregret.regret_minimizers.probability_simplices import (
|
|
8
|
-
|
|
8
|
+
ProbabilitySimplexRegretMinimizer,
|
|
9
9
|
RegretMatching,
|
|
10
|
-
RegretMatchingPlus,
|
|
11
10
|
)
|
|
12
11
|
from noregret.regret_minimizers.regret_minimizers import (
|
|
13
12
|
DiscountedRegretMinimizer,
|
|
14
13
|
RegretMinimizer,
|
|
15
14
|
)
|
|
15
|
+
from noregret.sequence_form_polytopes import SequenceFormPolytope
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
@dataclass
|
|
@@ -20,7 +20,7 @@ class SequenceFormPolytopeRegretMinimizer(RegretMinimizer, ABC):
|
|
|
20
20
|
"""Abstract base class for regret minimizers operating over
|
|
21
21
|
sequence-form polytopes.
|
|
22
22
|
"""
|
|
23
|
-
sequence_form_polytope:
|
|
23
|
+
sequence_form_polytope: SequenceFormPolytope
|
|
24
24
|
"""Sequence-form polytope."""
|
|
25
25
|
_: KW_ONLY
|
|
26
26
|
previous_behavioral_strategy: Any = 0.0
|
|
@@ -73,8 +73,6 @@ class SequenceFormPolytopeRegretMinimizer(RegretMinimizer, ABC):
|
|
|
73
73
|
@dataclass
|
|
74
74
|
class CounterfactualRegretMinimization(SequenceFormPolytopeRegretMinimizer):
|
|
75
75
|
"""Class for counterfactual regret minimization (CFR)."""
|
|
76
|
-
regret_minimizer_type: Any = RegretMatching
|
|
77
|
-
"""Regret minimizer type."""
|
|
78
76
|
|
|
79
77
|
def _theta(self, m):
|
|
80
78
|
np = self.kernel.numpy
|
|
@@ -99,11 +97,9 @@ class CounterfactualRegretMinimization(SequenceFormPolytopeRegretMinimizer):
|
|
|
99
97
|
|
|
100
98
|
def output(self, prediction=False):
|
|
101
99
|
theta = self._theta(prediction)
|
|
102
|
-
|
|
103
|
-
self.next_behavioral_strategy =
|
|
104
|
-
self.next_strategy = self.sequence_form_polytope.to_sequence_form(
|
|
105
|
-
self.next_behavioral_strategy,
|
|
106
|
-
)
|
|
100
|
+
b = self.sequence_form_polytope.normalize(theta)
|
|
101
|
+
self.next_behavioral_strategy = b
|
|
102
|
+
self.next_strategy = self.sequence_form_polytope.to_sequence_form(b)
|
|
107
103
|
|
|
108
104
|
return self.next_strategy
|
|
109
105
|
|
|
@@ -111,7 +107,6 @@ class CounterfactualRegretMinimization(SequenceFormPolytopeRegretMinimizer):
|
|
|
111
107
|
@dataclass
|
|
112
108
|
class CounterfactualRegretMinimizationPlus(CounterfactualRegretMinimization):
|
|
113
109
|
"""Class for counterfactual regret minimization+ (CFR+)."""
|
|
114
|
-
regret_minimizer_type: Any = RegretMatchingPlus
|
|
115
110
|
_: KW_ONLY
|
|
116
111
|
floored_cumulative_counterfactual_regrets: Any = 0.0
|
|
117
112
|
"""Floored cumulative counterfactual regrets."""
|
|
@@ -156,7 +151,6 @@ class DiscountedCounterfactualRegretMinimization(
|
|
|
156
151
|
DiscountedRegretMinimizer,
|
|
157
152
|
):
|
|
158
153
|
"""Class for discounted counterfactual regret minimization+ (DCFR)."""
|
|
159
|
-
regret_minimizer_type: Any = DiscountedRegretMatching
|
|
160
154
|
_: KW_ONLY
|
|
161
155
|
discounted_counterfactual_regrets: Any = 0.0
|
|
162
156
|
"""Discounted counterfactual regrets."""
|
|
@@ -195,3 +189,98 @@ class DiscountedCounterfactualRegretMinimization(
|
|
|
195
189
|
T = self.iteration_count
|
|
196
190
|
r[r > 0] *= T ** self.alpha / (T ** self.alpha + 1)
|
|
197
191
|
r[r < 0] *= T ** self.beta / (T ** self.beta + 1)
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
@dataclass
|
|
195
|
+
class CounterfactualRegretMinimization2(SequenceFormPolytopeRegretMinimizer):
|
|
196
|
+
"""Class for counterfactual regret minimization (CFR).
|
|
197
|
+
|
|
198
|
+
This is an alternative to :class:`CounterfactualRegretMinimization`.
|
|
199
|
+
|
|
200
|
+
Do **not** use this class unless it is absolutely necessary.
|
|
201
|
+
|
|
202
|
+
Main advantage: Arbitrary local regret minimizers.
|
|
203
|
+
|
|
204
|
+
Main disadvantage: **Slow** and unparallelizable.
|
|
205
|
+
"""
|
|
206
|
+
regret_minimizer_type: type[ProbabilitySimplexRegretMinimizer] = (
|
|
207
|
+
RegretMatching
|
|
208
|
+
)
|
|
209
|
+
"""Regret minimizer type."""
|
|
210
|
+
_: KW_ONLY
|
|
211
|
+
regret_minimizers: dict[str, ProbabilitySimplexRegretMinimizer] = field(
|
|
212
|
+
default_factory=dict,
|
|
213
|
+
init=False,
|
|
214
|
+
)
|
|
215
|
+
"""Regret minimizers."""
|
|
216
|
+
|
|
217
|
+
def __post_init__(self):
|
|
218
|
+
super().__post_init__()
|
|
219
|
+
|
|
220
|
+
R_type = self.regret_minimizer_type
|
|
221
|
+
A = self.sequence_form_polytope.actions
|
|
222
|
+
J = self.sequence_form_polytope.decision_points
|
|
223
|
+
|
|
224
|
+
for j in J:
|
|
225
|
+
self.regret_minimizers[j] = R_type(self.kernel, len(A[j]))
|
|
226
|
+
|
|
227
|
+
def output(self, prediction=False):
|
|
228
|
+
np = self.kernel.numpy
|
|
229
|
+
dtype = self.kernel.data_type
|
|
230
|
+
A = self.sequence_form_polytope.actions
|
|
231
|
+
J = self.sequence_form_polytope.decision_points
|
|
232
|
+
seqs = self.sequence_form_polytope.non_empty_sequences
|
|
233
|
+
|
|
234
|
+
if prediction is False or prediction is True:
|
|
235
|
+
predictions = {j: prediction for j in J}
|
|
236
|
+
else:
|
|
237
|
+
predictions = {}
|
|
238
|
+
m = self.sequence_form_polytope.counterfactual_utilities(
|
|
239
|
+
prediction,
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
for j in J:
|
|
243
|
+
m_j = []
|
|
244
|
+
|
|
245
|
+
for a in A[j]:
|
|
246
|
+
m_j.append(m[seqs.index((j, a))])
|
|
247
|
+
|
|
248
|
+
predictions[j] = np.array(m_j, dtype)
|
|
249
|
+
|
|
250
|
+
b = np.empty(len(seqs), dtype)
|
|
251
|
+
|
|
252
|
+
for j, R in self.regret_minimizers.items():
|
|
253
|
+
x = R.output(predictions[j])
|
|
254
|
+
|
|
255
|
+
for a, p in zip(A[j], x):
|
|
256
|
+
b[seqs.index((j, a))] = p
|
|
257
|
+
|
|
258
|
+
self.next_behavioral_strategy = b
|
|
259
|
+
self.next_strategy = self.sequence_form_polytope.to_sequence_form(b)
|
|
260
|
+
|
|
261
|
+
return self.next_strategy
|
|
262
|
+
|
|
263
|
+
def observe(self, utility):
|
|
264
|
+
super().observe(utility)
|
|
265
|
+
|
|
266
|
+
np = self.kernel.numpy
|
|
267
|
+
dtype = self.kernel.data_type
|
|
268
|
+
A = self.sequence_form_polytope.actions
|
|
269
|
+
J = self.sequence_form_polytope.decision_points
|
|
270
|
+
seqs = self.sequence_form_polytope.non_empty_sequences
|
|
271
|
+
u = self.sequence_form_polytope.counterfactual_utilities(
|
|
272
|
+
self.previous_behavioral_strategy,
|
|
273
|
+
utility,
|
|
274
|
+
)
|
|
275
|
+
counterfactual_utilities = {}
|
|
276
|
+
|
|
277
|
+
for j in J:
|
|
278
|
+
u_j = []
|
|
279
|
+
|
|
280
|
+
for a in A[j]:
|
|
281
|
+
u_j.append(u[seqs.index((j, a))])
|
|
282
|
+
|
|
283
|
+
counterfactual_utilities[j] = np.array(u_j, dtype)
|
|
284
|
+
|
|
285
|
+
for j, R in self.regret_minimizers.items():
|
|
286
|
+
R.observe(counterfactual_utilities[j])
|
|
@@ -127,13 +127,13 @@ class SequenceFormPolytope:
|
|
|
127
127
|
|
|
128
128
|
:return: Non-empty sequences.
|
|
129
129
|
"""
|
|
130
|
-
|
|
130
|
+
seqs = OrderedSet()
|
|
131
131
|
|
|
132
132
|
for j in self.decision_points:
|
|
133
133
|
for a in self.actions[j]:
|
|
134
|
-
|
|
134
|
+
seqs.add((j, a))
|
|
135
135
|
|
|
136
|
-
return
|
|
136
|
+
return seqs
|
|
137
137
|
|
|
138
138
|
@property
|
|
139
139
|
def row_count(self):
|
|
@@ -37,8 +37,8 @@ class ExtensiveFormGameTestCase(TestCase):
|
|
|
37
37
|
nr.to_efg(nr.RockPaperScissors(KERNEL)),
|
|
38
38
|
nr.to_efg(nr.RockPaperScissorsPlus(KERNEL)),
|
|
39
39
|
nr.to_efg(nr.RockPaperSuperscissors(KERNEL)),
|
|
40
|
-
nr.
|
|
41
|
-
nr.
|
|
40
|
+
nr.to_efg(KERNEL, nr.from_open_spiel('kuhn_poker')),
|
|
41
|
+
nr.to_efg(KERNEL, nr.from_open_spiel('leduc_poker')),
|
|
42
42
|
)
|
|
43
43
|
|
|
44
44
|
def test_serialization(self):
|
|
@@ -14,8 +14,8 @@ class LinearProgrammingTestCase(TestCase):
|
|
|
14
14
|
(nr.to_efg(nr.RockPaperScissors(KERNEL)), 0),
|
|
15
15
|
(nr.to_efg(nr.RockPaperScissorsPlus(KERNEL)), 0),
|
|
16
16
|
(nr.to_efg(nr.RockPaperSuperscissors(KERNEL)), 0),
|
|
17
|
-
(nr.
|
|
18
|
-
(nr.
|
|
17
|
+
(nr.to_efg(KERNEL, nr.from_open_spiel('kuhn_poker')), -1 / 18),
|
|
18
|
+
(nr.to_efg(KERNEL, nr.from_open_spiel('leduc_poker')), -0.08560642408),
|
|
19
19
|
)
|
|
20
20
|
|
|
21
21
|
def test_linear_programming(self):
|
|
@@ -82,7 +82,7 @@ class ProbabilitySimplexRegretMinimizationTestCase(TestCase):
|
|
|
82
82
|
dtype = self.KERNEL.data_type
|
|
83
83
|
|
|
84
84
|
for game, value in self.SYMMETRIC_GAME_VALUES:
|
|
85
|
-
assert game.is_symmetric
|
|
85
|
+
assert game.is_symmetric
|
|
86
86
|
assert isinstance(game, nr.NFG_2p0s)
|
|
87
87
|
|
|
88
88
|
for R_type in self.REGRET_MINIMIZER_TYPES:
|
|
@@ -110,8 +110,8 @@ class SequenceFormPolytopeRegretMinimizationTestCase(TestCase):
|
|
|
110
110
|
(nr.to_efg(nr.RockPaperScissors(KERNEL)), 0),
|
|
111
111
|
(nr.to_efg(nr.RockPaperScissorsPlus(KERNEL)), 0),
|
|
112
112
|
(nr.to_efg(nr.RockPaperSuperscissors(KERNEL)), 0),
|
|
113
|
-
(nr.
|
|
114
|
-
(nr.
|
|
113
|
+
(nr.to_efg(KERNEL, nr.from_open_spiel('kuhn_poker')), -1 / 18),
|
|
114
|
+
(nr.to_efg(KERNEL, nr.from_open_spiel('leduc_poker')), -0.08560642408),
|
|
115
115
|
)
|
|
116
116
|
REGRET_MINIMIZION_PARAMETERS = (
|
|
117
117
|
(partial(nr.CFR, KERNEL), False, False),
|
|
@@ -150,5 +150,42 @@ class SequenceFormPolytopeRegretMinimizationTestCase(TestCase):
|
|
|
150
150
|
self.assertEqual(v.dtype, dtype)
|
|
151
151
|
|
|
152
152
|
|
|
153
|
+
class SequenceFormPolytopeRegretMinimization2TestCase(TestCase):
|
|
154
|
+
KERNEL = nr.FloatingPointKernel()
|
|
155
|
+
GAMES = (
|
|
156
|
+
nr.to_efg(nr.MatchingPennies(KERNEL)),
|
|
157
|
+
nr.to_efg(nr.RockPaperScissors(KERNEL)),
|
|
158
|
+
nr.to_efg(nr.RockPaperScissorsPlus(KERNEL)),
|
|
159
|
+
nr.to_efg(nr.RockPaperSuperscissors(KERNEL)),
|
|
160
|
+
nr.to_efg(KERNEL, nr.from_open_spiel('kuhn_poker')),
|
|
161
|
+
nr.to_efg(KERNEL, nr.from_open_spiel('leduc_poker')),
|
|
162
|
+
)
|
|
163
|
+
PLACES = 6
|
|
164
|
+
|
|
165
|
+
def test_equivalence(self):
|
|
166
|
+
for game in self.GAMES:
|
|
167
|
+
assert isinstance(game, nr.EFG_2p0s)
|
|
168
|
+
|
|
169
|
+
x_bar, y_bar = nr.regret_minimization(
|
|
170
|
+
game,
|
|
171
|
+
nr.CFR(self.KERNEL, game.row_sequence_form_polytope),
|
|
172
|
+
nr.CFR(self.KERNEL, game.column_sequence_form_polytope),
|
|
173
|
+
progress_bar=False,
|
|
174
|
+
)
|
|
175
|
+
e = game.exploitability(x_bar, y_bar)
|
|
176
|
+
v = game.expected_row_utility(x_bar, y_bar)
|
|
177
|
+
x_bar2, y_bar2 = nr.regret_minimization(
|
|
178
|
+
game,
|
|
179
|
+
nr.CFR2(self.KERNEL, game.row_sequence_form_polytope),
|
|
180
|
+
nr.CFR2(self.KERNEL, game.column_sequence_form_polytope),
|
|
181
|
+
progress_bar=False,
|
|
182
|
+
)
|
|
183
|
+
e2 = game.exploitability(x_bar2, y_bar2)
|
|
184
|
+
v2 = game.expected_row_utility(x_bar2, y_bar2)
|
|
185
|
+
|
|
186
|
+
self.assertAlmostEqual(e, e2, self.PLACES)
|
|
187
|
+
self.assertAlmostEqual(v, v2, self.PLACES)
|
|
188
|
+
|
|
189
|
+
|
|
153
190
|
if __name__ == '__main__':
|
|
154
191
|
main() # pragma: no cover
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: noregret
|
|
3
|
-
Version: 0.0.0.
|
|
3
|
+
Version: 0.0.0.dev5
|
|
4
4
|
Summary: No-regret learning dynamics
|
|
5
5
|
Home-page: https://github.com/uoftcprg/noregret
|
|
6
6
|
Author: Universal, Open, Free, and Transparent Computer Poker Research Group
|
|
@@ -94,8 +94,8 @@ The code snippet below demonstrates how one can solve games via regret minimizat
|
|
|
94
94
|
KERNEL = nr.FloatingPointKernel()
|
|
95
95
|
GAMES = {
|
|
96
96
|
'Rock paper superscissors': nr.to_efg(nr.RockPaperSuperscissors(KERNEL)),
|
|
97
|
-
'Kuhn poker': nr.
|
|
98
|
-
'Leduc poker': nr.
|
|
97
|
+
'Kuhn poker': nr.to_efg(KERNEL, nr.from_open_spiel('kuhn_poker')),
|
|
98
|
+
'Leduc poker': nr.to_efg(KERNEL, nr.from_open_spiel('leduc_poker')),
|
|
99
99
|
}
|
|
100
100
|
PARAMETERS = {
|
|
101
101
|
'CFR': (nr.CFR, False, False),
|
|
@@ -180,7 +180,7 @@ The code snippet below demonstrates how one can solve games while leveraging GPU
|
|
|
180
180
|
import noregret as nr
|
|
181
181
|
|
|
182
182
|
KERNEL = nr.CUDAKernel()
|
|
183
|
-
GAME = nr.
|
|
183
|
+
GAME = nr.to_efg(KERNEL, nr.from_open_spiel('liars_dice'))
|
|
184
184
|
PARAMETERS = nr.CFR, True, False
|
|
185
185
|
|
|
186
186
|
|
|
@@ -220,8 +220,8 @@ The code snippet below demonstrates how one can solve games via linear programmi
|
|
|
220
220
|
KERNEL = nr.FloatingPointKernel()
|
|
221
221
|
GAMES = {
|
|
222
222
|
'Rock paper superscissors': nr.RockPaperSuperscissors(KERNEL),
|
|
223
|
-
'Kuhn poker': nr.
|
|
224
|
-
'Leduc poker': nr.
|
|
223
|
+
'Kuhn poker': nr.to_efg(KERNEL, nr.from_open_spiel('kuhn_poker')),
|
|
224
|
+
'Leduc poker': nr.to_efg(KERNEL, nr.from_open_spiel('leduc_poker')),
|
|
225
225
|
}
|
|
226
226
|
|
|
227
227
|
|
|
@@ -236,57 +236,6 @@ The code snippet below demonstrates how one can solve games via linear programmi
|
|
|
236
236
|
if __name__ == '__main__':
|
|
237
237
|
main()
|
|
238
238
|
|
|
239
|
-
Conduct Research in Online Convex Optimization
|
|
240
|
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
241
|
-
|
|
242
|
-
The code snippet below reproduces Leme, Piliouras, and Schneider (NeurIPS, 2024) using NoRegret.
|
|
243
|
-
|
|
244
|
-
.. code-block:: python
|
|
245
|
-
|
|
246
|
-
from functools import partial
|
|
247
|
-
|
|
248
|
-
import matplotlib.pyplot as plt
|
|
249
|
-
import noregret as nr
|
|
250
|
-
|
|
251
|
-
KERNEL = nr.FloatingPointKernel()
|
|
252
|
-
GAME = nr.RockPaperScissorsPlus(KERNEL)
|
|
253
|
-
R_type = partial(nr.MWU, learning_rate=1e-3)
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
def main():
|
|
257
|
-
RM = R_type(KERNEL, GAME.row_dimension, is_time_symmetric=False)
|
|
258
|
-
BM_RM = nr.BM(KERNEL, GAME.row_dimension, R_type, is_time_symmetric=False)
|
|
259
|
-
|
|
260
|
-
nr.symmetric_regret_minimization(GAME, RM, iteration_count=100000)
|
|
261
|
-
nr.symmetric_regret_minimization(GAME, BM_RM, iteration_count=100000)
|
|
262
|
-
x, _ = nr.linear_programming(GAME)
|
|
263
|
-
|
|
264
|
-
strategies = KERNEL.numpy.array(RM.strategies)
|
|
265
|
-
|
|
266
|
-
plt.clf()
|
|
267
|
-
plt.plot(strategies[:, 0], strategies[:, 1])
|
|
268
|
-
plt.plot(strategies[-1, 0], strategies[-1, 1], 'bo')
|
|
269
|
-
plt.plot(*x[:2], 'ro')
|
|
270
|
-
plt.xlabel('Probability of action 1')
|
|
271
|
-
plt.ylabel('Probability of action 2')
|
|
272
|
-
plt.title('No-external regret dynamics')
|
|
273
|
-
plt.show()
|
|
274
|
-
|
|
275
|
-
strategies = KERNEL.numpy.array(BM_RM.strategies)
|
|
276
|
-
|
|
277
|
-
plt.clf()
|
|
278
|
-
plt.plot(strategies[:, 0], strategies[:, 1])
|
|
279
|
-
plt.plot(strategies[-1, 0], strategies[-1, 1], 'bo')
|
|
280
|
-
plt.plot(*x[:2], 'ro')
|
|
281
|
-
plt.xlabel('Probability of action 1')
|
|
282
|
-
plt.ylabel('Probability of action 2')
|
|
283
|
-
plt.title('No-swap regret dynamics')
|
|
284
|
-
plt.show()
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
if __name__ == '__main__':
|
|
288
|
-
main()
|
|
289
|
-
|
|
290
239
|
Testing and Validation
|
|
291
240
|
----------------------
|
|
292
241
|
|
|
@@ -1,141 +0,0 @@
|
|
|
1
|
-
from collections import defaultdict
|
|
2
|
-
from functools import partial
|
|
3
|
-
from itertools import starmap
|
|
4
|
-
|
|
5
|
-
from ordered_set import OrderedSet
|
|
6
|
-
from pyspiel import GameType, load_game
|
|
7
|
-
from scipy.sparse import lil_array
|
|
8
|
-
|
|
9
|
-
from noregret.games.normal_form.games import (
|
|
10
|
-
NormalFormGame,
|
|
11
|
-
TwoPlayerNormalFormGame,
|
|
12
|
-
TwoPlayerZeroSumNormalFormGame,
|
|
13
|
-
)
|
|
14
|
-
from noregret.games.extensive_form.games import (
|
|
15
|
-
ExtensiveFormGame,
|
|
16
|
-
TwoPlayerExtensiveFormGame,
|
|
17
|
-
TwoPlayerZeroSumExtensiveFormGame,
|
|
18
|
-
)
|
|
19
|
-
from noregret.sequence_form_polytopes import SequenceFormPolytope
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def _nfg2efg(game, decision_points):
|
|
23
|
-
kernel = game.kernel
|
|
24
|
-
np = kernel.numpy
|
|
25
|
-
scipy = kernel.scipy
|
|
26
|
-
dtype = kernel.data_type
|
|
27
|
-
|
|
28
|
-
if isinstance(game, TwoPlayerZeroSumNormalFormGame):
|
|
29
|
-
type_ = TwoPlayerZeroSumExtensiveFormGame
|
|
30
|
-
elif isinstance(game, TwoPlayerNormalFormGame):
|
|
31
|
-
type_ = TwoPlayerExtensiveFormGame
|
|
32
|
-
else:
|
|
33
|
-
type_ = ExtensiveFormGame
|
|
34
|
-
|
|
35
|
-
d = game.dimensions
|
|
36
|
-
|
|
37
|
-
if isinstance(game, TwoPlayerZeroSumNormalFormGame):
|
|
38
|
-
payoffs = np.zeros(tuple(n + 1 for n in d), dtype)
|
|
39
|
-
payoffs[tuple(slice(1, None) for _ in d)] = game.payoffs
|
|
40
|
-
else:
|
|
41
|
-
payoffs = np.zeros((game.player_count, *(n + 1 for n in d)), dtype)
|
|
42
|
-
payoffs[:, *(slice(1, None) for _ in d)] = game.payoffs
|
|
43
|
-
|
|
44
|
-
payoffs = scipy.sparse.csr_array(payoffs)
|
|
45
|
-
sequence_form_polytopes = []
|
|
46
|
-
|
|
47
|
-
for i, A_j in enumerate(game.actions):
|
|
48
|
-
j = decision_points(i)
|
|
49
|
-
sfp = SequenceFormPolytope(kernel, {j: A_j}, {j: None})
|
|
50
|
-
|
|
51
|
-
sequence_form_polytopes.append(sfp)
|
|
52
|
-
|
|
53
|
-
sequence_form_polytopes = tuple(sequence_form_polytopes)
|
|
54
|
-
|
|
55
|
-
return type_(kernel, payoffs, sequence_form_polytopes)
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
def to_extensive_form(game, decision_points=str):
|
|
59
|
-
"""Convert a given game to an extensive-form game.
|
|
60
|
-
|
|
61
|
-
:param game: Game.
|
|
62
|
-
:param decision_points: Decision points, defaults to ``str''.
|
|
63
|
-
:return: Extensive-form game.
|
|
64
|
-
"""
|
|
65
|
-
if isinstance(game, NormalFormGame):
|
|
66
|
-
game = _nfg2efg(game, decision_points)
|
|
67
|
-
else:
|
|
68
|
-
raise ValueError('unknown game')
|
|
69
|
-
|
|
70
|
-
return game
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
def from_open_spiel(kernel, game):
|
|
74
|
-
"""Load a game from OpenSpiel.
|
|
75
|
-
|
|
76
|
-
:param kernel: Kernel.
|
|
77
|
-
:param game: Game in OpenSpiel.
|
|
78
|
-
:return: Game.
|
|
79
|
-
"""
|
|
80
|
-
dtype = kernel.data_type
|
|
81
|
-
scipy = kernel.scipy
|
|
82
|
-
game = load_game(game)
|
|
83
|
-
player_count = game.num_players()
|
|
84
|
-
actions = [defaultdict(OrderedSet) for _ in range(player_count)]
|
|
85
|
-
parent_sequences = [{} for _ in range(player_count)]
|
|
86
|
-
raw_payoffs = [defaultdict(int) for _ in range(player_count)]
|
|
87
|
-
|
|
88
|
-
def dfs(state, chance_probability, sequences):
|
|
89
|
-
if state.is_terminal():
|
|
90
|
-
key = tuple(sequences)
|
|
91
|
-
|
|
92
|
-
for i, u in enumerate(state.rewards()):
|
|
93
|
-
raw_payoffs[i][key] += chance_probability * u
|
|
94
|
-
elif state.is_chance_node():
|
|
95
|
-
for a, p in state.chance_outcomes():
|
|
96
|
-
dfs(state.child(a), p * chance_probability, sequences)
|
|
97
|
-
else:
|
|
98
|
-
i = state.current_player()
|
|
99
|
-
j = state.information_state_string()
|
|
100
|
-
p_j = sequences[i]
|
|
101
|
-
parent_sequences[i][j] = p_j
|
|
102
|
-
|
|
103
|
-
for a in state.legal_actions():
|
|
104
|
-
next_state = state.child(a)
|
|
105
|
-
a = state.action_to_string(a)
|
|
106
|
-
next_sequences = sequences.copy()
|
|
107
|
-
next_sequences[i] = j, a
|
|
108
|
-
|
|
109
|
-
actions[i][j].add(a)
|
|
110
|
-
dfs(next_state, chance_probability, next_sequences)
|
|
111
|
-
|
|
112
|
-
dfs(game.new_initial_state(), 1, [None] * player_count)
|
|
113
|
-
|
|
114
|
-
sequence_form_polytopes = tuple(
|
|
115
|
-
starmap(
|
|
116
|
-
partial(SequenceFormPolytope, kernel),
|
|
117
|
-
zip(actions, parent_sequences),
|
|
118
|
-
),
|
|
119
|
-
)
|
|
120
|
-
dimensions = tuple(sfp.column_count for sfp in sequence_form_polytopes)
|
|
121
|
-
|
|
122
|
-
if (
|
|
123
|
-
player_count == 2
|
|
124
|
-
and game.get_type().utility == GameType.Utility.ZERO_SUM
|
|
125
|
-
):
|
|
126
|
-
type_ = TwoPlayerZeroSumExtensiveFormGame
|
|
127
|
-
payoffs = lil_array(dimensions, dtype=dtype)
|
|
128
|
-
|
|
129
|
-
for sequences, payoff in raw_payoffs[0].items():
|
|
130
|
-
indices = []
|
|
131
|
-
|
|
132
|
-
for sfp, sequence in zip(sequence_form_polytopes, sequences):
|
|
133
|
-
indices.append(sfp.column(sequence))
|
|
134
|
-
|
|
135
|
-
payoffs[tuple(indices)] = payoff
|
|
136
|
-
|
|
137
|
-
payoffs = scipy.sparse.csr_array(payoffs)
|
|
138
|
-
else:
|
|
139
|
-
raise NotImplementedError
|
|
140
|
-
|
|
141
|
-
return type_(kernel, payoffs, sequence_form_polytopes)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/normal_form/battle-of-the-sexes.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/normal_form/gift-exchange-game.json
RENAMED
|
File without changes
|
{noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/normal_form/matching-pennies.json
RENAMED
|
File without changes
|
{noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/normal_form/prisoners-dilemma.json
RENAMED
|
File without changes
|
{noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/normal_form/pure-coordination.json
RENAMED
|
File without changes
|
{noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/normal_form/rock-paper-scissors-plus.json
RENAMED
|
File without changes
|
{noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/normal_form/rock-paper-scissors.json
RENAMED
|
File without changes
|
{noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/normal_form/rock-paper-superscissors.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|