noregret 0.0.0.dev8__tar.gz → 0.0.0.dev10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/PKG-INFO +1 -1
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/__init__.py +23 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/__init__.py +10 -1
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/black_box.py +134 -9
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/extensive_form.py +5 -5
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/regret_minimizers/__init__.py +6 -0
- noregret-0.0.0.dev10/noregret/regret_minimizers/stochastic.py +201 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/solvers/__init__.py +2 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/solvers/regret_minimization.py +83 -22
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/tests/test_games.py +55 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/tests/test_linear_programming.py +3 -3
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/tests/test_regret_minimization.py +71 -18
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret.egg-info/PKG-INFO +1 -1
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret.egg-info/SOURCES.txt +1 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/setup.py +1 -1
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/LICENSE +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/README.rst +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/examples/assurance-game.json +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/examples/battle-of-the-sexes.json +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/examples/chicken.json +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/examples/gift-exchange-game.json +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/examples/matching-pennies.json +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/examples/prisoners-dilemma.json +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/examples/pure-coordination.json +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/examples/rock-paper-scissors-plus.json +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/examples/rock-paper-scissors.json +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/examples/rock-paper-superscissors.json +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/examples/stag-hunt.json +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/games.py +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/multilinear.py +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/normal_form.py +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/kernels.py +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/regret_minimizers/probability_simplices.py +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/regret_minimizers/regret_minimizers.py +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/regret_minimizers/sequence_form_polytopes.py +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/sequence_form_polytopes.py +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/solvers/linear_programming.py +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/tests/__init__.py +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/tests/test_sequence_form_polytopes.py +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/utilities.py +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret.egg-info/dependency_links.txt +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret.egg-info/requires.txt +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret.egg-info/top_level.txt +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/setup.cfg +0 -0
|
@@ -17,7 +17,9 @@ from noregret.games import (
|
|
|
17
17
|
RockPaperScissors,
|
|
18
18
|
RockPaperScissorsPlus,
|
|
19
19
|
RockPaperSuperscissors,
|
|
20
|
+
Simulation,
|
|
20
21
|
StagHunt,
|
|
22
|
+
StrategyProfile,
|
|
21
23
|
to_extensive_form_game,
|
|
22
24
|
TwoPlayerExtensiveFormGame,
|
|
23
25
|
TwoPlayerGame,
|
|
@@ -27,6 +29,7 @@ from noregret.games import (
|
|
|
27
29
|
TwoPlayerZeroSumGame,
|
|
28
30
|
TwoPlayerZeroSumMultilinearGame,
|
|
29
31
|
TwoPlayerZeroSumNormalFormGame,
|
|
32
|
+
UniformStrategyProfile,
|
|
30
33
|
)
|
|
31
34
|
from noregret.kernels import (
|
|
32
35
|
CUDAKernel,
|
|
@@ -46,6 +49,7 @@ from noregret.regret_minimizers import (
|
|
|
46
49
|
EuclideanRegularization,
|
|
47
50
|
FollowTheRegularizedLeader,
|
|
48
51
|
MirrorDescent,
|
|
52
|
+
MonteCarloCounterfactualRegretMinimization,
|
|
49
53
|
MultiplicativeWeightsUpdate,
|
|
50
54
|
OnlineGradientDescent,
|
|
51
55
|
ProbabilitySimplexRegretMinimizer,
|
|
@@ -54,12 +58,14 @@ from noregret.regret_minimizers import (
|
|
|
54
58
|
RegretMatchingPlus,
|
|
55
59
|
RegretMinimizer,
|
|
56
60
|
SequenceFormPolytopeRegretMinimizer,
|
|
61
|
+
StochasticRegretMinimizer,
|
|
57
62
|
SwapRegretMinimizer,
|
|
58
63
|
)
|
|
59
64
|
from noregret.sequence_form_polytopes import SequenceFormPolytope
|
|
60
65
|
from noregret.solvers import (
|
|
61
66
|
linear_programming,
|
|
62
67
|
regret_minimization,
|
|
68
|
+
stochastic_regret_minimization,
|
|
63
69
|
symmetric_regret_minimization,
|
|
64
70
|
)
|
|
65
71
|
from noregret.utilities import import_object, tuple_or_none
|
|
@@ -94,6 +100,10 @@ FTRL = FollowTheRegularizedLeader
|
|
|
94
100
|
"""Alias for :class:`noregret.FollowTheRegularizedLeader`."""
|
|
95
101
|
lp = linear_programming
|
|
96
102
|
"""Alias for :func:`noregret.linear_programming`."""
|
|
103
|
+
MCCFR = MonteCarloCounterfactualRegretMinimization
|
|
104
|
+
"""Alias for
|
|
105
|
+
:class:`noregret.MonteCarloCounterfactualRegretMinimization`.
|
|
106
|
+
"""
|
|
97
107
|
MD = MirrorDescent
|
|
98
108
|
"""Alias for :class:`noregret.MirrorDescent`."""
|
|
99
109
|
MWU = MultiplicativeWeightsUpdate
|
|
@@ -112,6 +122,10 @@ RM = RegretMatching
|
|
|
112
122
|
"""Alias for :class:`noregret.RegretMatching`."""
|
|
113
123
|
rm = regret_minimization
|
|
114
124
|
"""Alias for :func:`noregret.regret_minimization`."""
|
|
125
|
+
Sim = Simulation
|
|
126
|
+
"""Alias for :class:`noregret.Simulation`."""
|
|
127
|
+
stochastic_rm = stochastic_regret_minimization
|
|
128
|
+
"""Alias for :func:`noregret.stochastic_regret_minimization`."""
|
|
115
129
|
symmetric_rm = symmetric_regret_minimization
|
|
116
130
|
"""Alias for :func:`noregret.symmetric_regret_minimization`."""
|
|
117
131
|
to_efg = to_extensive_form_game
|
|
@@ -155,8 +169,10 @@ __all__ = (
|
|
|
155
169
|
'lp',
|
|
156
170
|
'MatchingPennies',
|
|
157
171
|
'matrix_game',
|
|
172
|
+
'MCCFR',
|
|
158
173
|
'MD',
|
|
159
174
|
'MirrorDescent',
|
|
175
|
+
'MonteCarloCounterfactualRegretMinimization',
|
|
160
176
|
'MultilinearGame',
|
|
161
177
|
'MultiplicativeWeightsUpdate',
|
|
162
178
|
'MWU',
|
|
@@ -184,7 +200,13 @@ __all__ = (
|
|
|
184
200
|
'SequenceFormPolytope',
|
|
185
201
|
'SequenceFormPolytopeRegretMinimizer',
|
|
186
202
|
'Serializable',
|
|
203
|
+
'Sim',
|
|
204
|
+
'Simulation',
|
|
187
205
|
'StagHunt',
|
|
206
|
+
'stochastic_regret_minimization',
|
|
207
|
+
'StochasticRegretMinimizer',
|
|
208
|
+
'stochastic_rm',
|
|
209
|
+
'StrategyProfile',
|
|
188
210
|
'SwapRegretMinimizer',
|
|
189
211
|
'symmetric_regret_minimization',
|
|
190
212
|
'symmetric_rm',
|
|
@@ -199,4 +221,5 @@ __all__ = (
|
|
|
199
221
|
'TwoPlayerZeroSumGame',
|
|
200
222
|
'TwoPlayerZeroSumMultilinearGame',
|
|
201
223
|
'TwoPlayerZeroSumNormalFormGame',
|
|
224
|
+
'UniformStrategyProfile',
|
|
202
225
|
)
|
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
"""Module for games."""
|
|
2
|
-
from noregret.games.black_box import
|
|
2
|
+
from noregret.games.black_box import (
|
|
3
|
+
BlackBoxGame,
|
|
4
|
+
open_spiel_game,
|
|
5
|
+
Simulation,
|
|
6
|
+
StrategyProfile,
|
|
7
|
+
UniformStrategyProfile,
|
|
8
|
+
)
|
|
3
9
|
from noregret.games.extensive_form import (
|
|
4
10
|
ExtensiveFormGame,
|
|
5
11
|
to_extensive_form_game,
|
|
@@ -48,7 +54,9 @@ __all__ = (
|
|
|
48
54
|
'RockPaperScissors',
|
|
49
55
|
'RockPaperScissorsPlus',
|
|
50
56
|
'RockPaperSuperscissors',
|
|
57
|
+
'Simulation',
|
|
51
58
|
'StagHunt',
|
|
59
|
+
'StrategyProfile',
|
|
52
60
|
'to_extensive_form_game',
|
|
53
61
|
'TwoPlayerExtensiveFormGame',
|
|
54
62
|
'TwoPlayerGame',
|
|
@@ -58,4 +66,5 @@ __all__ = (
|
|
|
58
66
|
'TwoPlayerZeroSumGame',
|
|
59
67
|
'TwoPlayerZeroSumMultilinearGame',
|
|
60
68
|
'TwoPlayerZeroSumNormalFormGame',
|
|
69
|
+
'UniformStrategyProfile',
|
|
61
70
|
)
|
|
@@ -2,13 +2,47 @@
|
|
|
2
2
|
from abc import ABC, abstractmethod
|
|
3
3
|
from dataclasses import dataclass, field
|
|
4
4
|
from functools import partial
|
|
5
|
+
from typing import Any
|
|
5
6
|
|
|
6
7
|
from ordered_set import OrderedSet
|
|
7
|
-
from pyspiel import GameType, load_game
|
|
8
|
+
from pyspiel import exploitability, GameType, load_game
|
|
8
9
|
|
|
9
10
|
from noregret.kernels import Kernel
|
|
10
11
|
|
|
11
12
|
|
|
13
|
+
@dataclass
|
|
14
|
+
class Simulation:
|
|
15
|
+
"""Class for simulations."""
|
|
16
|
+
kernel: Kernel
|
|
17
|
+
"""Kernel."""
|
|
18
|
+
players: list[int]
|
|
19
|
+
"""Players."""
|
|
20
|
+
decision_points: list[str | None]
|
|
21
|
+
"""Decision points."""
|
|
22
|
+
actions: list[str]
|
|
23
|
+
"""Actions."""
|
|
24
|
+
utilities: Any
|
|
25
|
+
"""Utilities."""
|
|
26
|
+
|
|
27
|
+
def sequences(self, player=None):
|
|
28
|
+
"""Return sequences given an optional player.
|
|
29
|
+
|
|
30
|
+
:param player: Optional player.
|
|
31
|
+
:return: Sequences.
|
|
32
|
+
"""
|
|
33
|
+
for i, j, a in zip(self.players, self.decision_points, self.actions):
|
|
34
|
+
if i is not None and (player is None or i == player):
|
|
35
|
+
yield j, a
|
|
36
|
+
|
|
37
|
+
def utility(self, player):
|
|
38
|
+
"""Return the utility given a player.
|
|
39
|
+
|
|
40
|
+
:param player: Player.
|
|
41
|
+
:return: Utility.
|
|
42
|
+
"""
|
|
43
|
+
return self.utilities[player]
|
|
44
|
+
|
|
45
|
+
|
|
12
46
|
@dataclass
|
|
13
47
|
class BlackBoxGame(ABC):
|
|
14
48
|
"""Abstract base class for black box games."""
|
|
@@ -139,6 +173,54 @@ class BlackBoxGame(ABC):
|
|
|
139
173
|
|
|
140
174
|
return np.array(ps, dtype)
|
|
141
175
|
|
|
176
|
+
def exploitability(self, strategy_profile):
|
|
177
|
+
"""Return exploitability given a strategy profile.
|
|
178
|
+
|
|
179
|
+
:param strategy_profile: Strategy profile.
|
|
180
|
+
:return: Exploitability.
|
|
181
|
+
"""
|
|
182
|
+
if not self.is_two_player or not self.is_zero_sum:
|
|
183
|
+
raise ValueError('not 2p0s')
|
|
184
|
+
|
|
185
|
+
raise NotImplementedError
|
|
186
|
+
|
|
187
|
+
def simulate(self, strategy_profile):
|
|
188
|
+
"""Run a simulation given a strategy profile.
|
|
189
|
+
|
|
190
|
+
:param strategy_profile: Strategy profile.
|
|
191
|
+
:return: Simulation.
|
|
192
|
+
"""
|
|
193
|
+
np = self.kernel.numpy
|
|
194
|
+
is_ = []
|
|
195
|
+
js = []
|
|
196
|
+
as_ = []
|
|
197
|
+
h = self.root_node
|
|
198
|
+
|
|
199
|
+
while A := self.actions(h):
|
|
200
|
+
i = self.player(h)
|
|
201
|
+
|
|
202
|
+
if i is None:
|
|
203
|
+
j = None
|
|
204
|
+
ps = self.chance_probabilities(h)
|
|
205
|
+
else:
|
|
206
|
+
j = self.information_set(h)
|
|
207
|
+
ps = strategy_profile(h)
|
|
208
|
+
|
|
209
|
+
a = np.random.choice(A, p=ps).item()
|
|
210
|
+
h = self.apply(h, a)
|
|
211
|
+
|
|
212
|
+
is_.append(i)
|
|
213
|
+
js.append(j)
|
|
214
|
+
as_.append(a)
|
|
215
|
+
|
|
216
|
+
is_ = tuple(is_)
|
|
217
|
+
js = tuple(js)
|
|
218
|
+
as_ = tuple(as_)
|
|
219
|
+
us = self.utilities(h)
|
|
220
|
+
simulation = Simulation(self.kernel, is_, js, as_, us)
|
|
221
|
+
|
|
222
|
+
return simulation
|
|
223
|
+
|
|
142
224
|
|
|
143
225
|
@dataclass
|
|
144
226
|
class _OpenSpielBlackBoxGame(BlackBoxGame):
|
|
@@ -167,17 +249,14 @@ class _OpenSpielBlackBoxGame(BlackBoxGame):
|
|
|
167
249
|
return node.child(node.string_to_action(action))
|
|
168
250
|
|
|
169
251
|
def children(self, node):
|
|
170
|
-
return list(node.child
|
|
252
|
+
return list(map(node.child, node.legal_actions()))
|
|
171
253
|
|
|
172
254
|
def actions_and_children(self, node):
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
for a in node.legal_actions():
|
|
177
|
-
actions.append(node.action_to_string(a))
|
|
178
|
-
children.append(node.child(a))
|
|
255
|
+
A = node.legal_actions()
|
|
256
|
+
actions = OrderedSet(map(node.action_to_string, A))
|
|
257
|
+
children = list(map(node.child, A))
|
|
179
258
|
|
|
180
|
-
return
|
|
259
|
+
return actions, children
|
|
181
260
|
|
|
182
261
|
def player(self, node):
|
|
183
262
|
i = node.current_player()
|
|
@@ -212,6 +291,27 @@ class _OpenSpielBlackBoxGame(BlackBoxGame):
|
|
|
212
291
|
|
|
213
292
|
return np.array([p for _, p in node.chance_outcomes()], dtype)
|
|
214
293
|
|
|
294
|
+
def _sigma(self, strategy_profile, h, sigma):
|
|
295
|
+
A = h.legal_actions()
|
|
296
|
+
h_primes = list(map(h.child, A))
|
|
297
|
+
i = self.player(h)
|
|
298
|
+
|
|
299
|
+
if A and i is not None and (j := self.information_set(h)) not in sigma:
|
|
300
|
+
sigma[j] = list(zip(A, strategy_profile(h).tolist()))
|
|
301
|
+
|
|
302
|
+
for h_prime in h_primes:
|
|
303
|
+
self._sigma(strategy_profile, h_prime, sigma)
|
|
304
|
+
|
|
305
|
+
def _sigma2(self, strategy_profile):
|
|
306
|
+
sigma = {}
|
|
307
|
+
|
|
308
|
+
self._sigma(strategy_profile, self.root_node, sigma)
|
|
309
|
+
|
|
310
|
+
return sigma
|
|
311
|
+
|
|
312
|
+
def exploitability(self, strategy_profile):
|
|
313
|
+
return exploitability(self._game, self._sigma2(strategy_profile))
|
|
314
|
+
|
|
215
315
|
|
|
216
316
|
def open_spiel_game(kernel, game):
|
|
217
317
|
"""Load a game from OpenSpiel.
|
|
@@ -221,3 +321,28 @@ def open_spiel_game(kernel, game):
|
|
|
221
321
|
:return: Game.
|
|
222
322
|
"""
|
|
223
323
|
return _OpenSpielBlackBoxGame(kernel, game)
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
@dataclass
|
|
327
|
+
class StrategyProfile(ABC):
|
|
328
|
+
"""Abstract base class for strategy profiles."""
|
|
329
|
+
kernel: Kernel
|
|
330
|
+
"""Kernel."""
|
|
331
|
+
game: BlackBoxGame
|
|
332
|
+
"""Game."""
|
|
333
|
+
|
|
334
|
+
@abstractmethod
|
|
335
|
+
def __call__(self, node):
|
|
336
|
+
pass
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
@dataclass
|
|
340
|
+
class UniformStrategyProfile(StrategyProfile):
|
|
341
|
+
"""Class for uniform strategy profiles."""
|
|
342
|
+
|
|
343
|
+
def __call__(self, node):
|
|
344
|
+
np = self.kernel.numpy
|
|
345
|
+
dtype = self.kernel.data_type
|
|
346
|
+
n = len(self.game.actions(node))
|
|
347
|
+
|
|
348
|
+
return np.full(n, 1 / n, dtype)
|
|
@@ -178,9 +178,9 @@ def _nfg2efg(ker, game, decision_points='p{}'.format):
|
|
|
178
178
|
payoffs = scipy.sparse.csr_array(payoffs)
|
|
179
179
|
sfps = []
|
|
180
180
|
|
|
181
|
-
for i,
|
|
181
|
+
for i, A in enumerate(game.actions):
|
|
182
182
|
j = decision_points(i)
|
|
183
|
-
sfp = SequenceFormPolytope(ker, {j:
|
|
183
|
+
sfp = SequenceFormPolytope(ker, {j: A}, {j: None})
|
|
184
184
|
|
|
185
185
|
sfps.append(sfp)
|
|
186
186
|
|
|
@@ -198,11 +198,11 @@ def _bbg2efg(ker, game):
|
|
|
198
198
|
raw_payoffs = defaultdict(int)
|
|
199
199
|
|
|
200
200
|
def dfs(h, p, seqs, us):
|
|
201
|
-
|
|
201
|
+
A, h_primes = game.actions_and_children(h)
|
|
202
202
|
i = game.player(h)
|
|
203
203
|
us = us + game.utilities(h)
|
|
204
204
|
|
|
205
|
-
if not
|
|
205
|
+
if not A:
|
|
206
206
|
raw_payoffs[tuple(seqs)] += p * us
|
|
207
207
|
elif i is None:
|
|
208
208
|
p_primes = game.chance_probabilities(h)
|
|
@@ -214,7 +214,7 @@ def _bbg2efg(ker, game):
|
|
|
214
214
|
p_j = seqs[i]
|
|
215
215
|
p_js[i][j] = p_j
|
|
216
216
|
|
|
217
|
-
for a, h_prime in zip(
|
|
217
|
+
for a, h_prime in zip(A, h_primes):
|
|
218
218
|
next_seqs = seqs.copy()
|
|
219
219
|
next_seqs[i] = j, a
|
|
220
220
|
|
|
@@ -24,6 +24,10 @@ from noregret.regret_minimizers.sequence_form_polytopes import (
|
|
|
24
24
|
DiscountedCounterfactualRegretMinimization,
|
|
25
25
|
SequenceFormPolytopeRegretMinimizer,
|
|
26
26
|
)
|
|
27
|
+
from noregret.regret_minimizers.stochastic import (
|
|
28
|
+
MonteCarloCounterfactualRegretMinimization,
|
|
29
|
+
StochasticRegretMinimizer,
|
|
30
|
+
)
|
|
27
31
|
|
|
28
32
|
__all__ = (
|
|
29
33
|
'BlumMansour',
|
|
@@ -36,6 +40,7 @@ __all__ = (
|
|
|
36
40
|
'EuclideanRegularization',
|
|
37
41
|
'FollowTheRegularizedLeader',
|
|
38
42
|
'MirrorDescent',
|
|
43
|
+
'MonteCarloCounterfactualRegretMinimization',
|
|
39
44
|
'MultiplicativeWeightsUpdate',
|
|
40
45
|
'OnlineGradientDescent',
|
|
41
46
|
'ProbabilitySimplexRegretMinimizer',
|
|
@@ -44,5 +49,6 @@ __all__ = (
|
|
|
44
49
|
'RegretMatchingPlus',
|
|
45
50
|
'RegretMinimizer',
|
|
46
51
|
'SequenceFormPolytopeRegretMinimizer',
|
|
52
|
+
'StochasticRegretMinimizer',
|
|
47
53
|
'SwapRegretMinimizer',
|
|
48
54
|
)
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
"""Module for regret minimizers operating over sequence-form polytopes."""
|
|
2
|
+
from abc import ABC
|
|
3
|
+
from collections.abc import Callable
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from noregret.games.black_box import BlackBoxGame
|
|
8
|
+
from noregret.kernels import Kernel
|
|
9
|
+
from noregret.regret_minimizers.probability_simplices import (
|
|
10
|
+
ProbabilitySimplexRegretMinimizer,
|
|
11
|
+
RegretMatching,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class StochasticRegretMinimizer(ABC):
|
|
17
|
+
"""Abstract base class for stochastic regret minimizers."""
|
|
18
|
+
kernel: Kernel
|
|
19
|
+
"""Kernel."""
|
|
20
|
+
game: BlackBoxGame
|
|
21
|
+
"""Game."""
|
|
22
|
+
regret_minimizer_type: type[ProbabilitySimplexRegretMinimizer]
|
|
23
|
+
"""Regret minimizer type."""
|
|
24
|
+
reference_strategy_profile: Callable[[Any], Any] | None = None
|
|
25
|
+
"""Reference strategy profile."""
|
|
26
|
+
sample_count: int = field(default=0, init=False)
|
|
27
|
+
"""number of samples."""
|
|
28
|
+
next_sample_count: int = field(default=0, init=False)
|
|
29
|
+
"""Next number of samples."""
|
|
30
|
+
node_visit_count: int = field(default=0, init=False)
|
|
31
|
+
"""Number of node visits."""
|
|
32
|
+
next_node_visit_count: int = field(default=0, init=False)
|
|
33
|
+
"""Next number of node visits."""
|
|
34
|
+
regret_minimizers: dict[str, ProbabilitySimplexRegretMinimizer] = field(
|
|
35
|
+
default_factory=dict,
|
|
36
|
+
init=False,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
def regret_minimizer(self, node):
|
|
40
|
+
"""Return the regret minimizer given an information set.
|
|
41
|
+
|
|
42
|
+
:param node: Node.
|
|
43
|
+
:return: Regret minimizer.
|
|
44
|
+
"""
|
|
45
|
+
j = self.game.information_set(node)
|
|
46
|
+
|
|
47
|
+
if j not in self.regret_minimizers:
|
|
48
|
+
self.regret_minimizers[j] = self.regret_minimizer_type(
|
|
49
|
+
self.kernel,
|
|
50
|
+
len(self.game.actions(node)),
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
return self.regret_minimizers[j]
|
|
54
|
+
|
|
55
|
+
def average_action_probabilities(self, node):
|
|
56
|
+
"""Return the average action probabilities given a node.
|
|
57
|
+
|
|
58
|
+
:param node: Node.
|
|
59
|
+
:return: Average action probabilities.
|
|
60
|
+
"""
|
|
61
|
+
np = self.kernel.numpy
|
|
62
|
+
dtype = self.kernel.data_type
|
|
63
|
+
R = self.regret_minimizer(node)
|
|
64
|
+
ps = R.average_strategy
|
|
65
|
+
|
|
66
|
+
if np.isscalar(ps):
|
|
67
|
+
ps = np.full(R.dimension, 1 / R.dimension, dtype)
|
|
68
|
+
|
|
69
|
+
return ps
|
|
70
|
+
|
|
71
|
+
def _action_probabilities(self, h):
|
|
72
|
+
R = self.regret_minimizer(h)
|
|
73
|
+
ps = R.next_strategy
|
|
74
|
+
|
|
75
|
+
if ps is None:
|
|
76
|
+
ps = R.output()
|
|
77
|
+
|
|
78
|
+
return ps
|
|
79
|
+
|
|
80
|
+
def _external_sampling(self, i, us, h):
|
|
81
|
+
np = self.kernel.numpy
|
|
82
|
+
dtype = self.kernel.data_type
|
|
83
|
+
self.next_node_visit_count += 1
|
|
84
|
+
u = self.game.utility(h, i)
|
|
85
|
+
A = self.game.actions(h)
|
|
86
|
+
|
|
87
|
+
if A:
|
|
88
|
+
i_prime = self.game.player(h)
|
|
89
|
+
|
|
90
|
+
if i_prime is None:
|
|
91
|
+
ps = self.game.chance_probabilities(h)
|
|
92
|
+
else:
|
|
93
|
+
ps = self._action_probabilities(h)
|
|
94
|
+
|
|
95
|
+
if i_prime == i:
|
|
96
|
+
u_primes = []
|
|
97
|
+
|
|
98
|
+
for a in A:
|
|
99
|
+
h_prime = self.game.apply(h, a)
|
|
100
|
+
|
|
101
|
+
u_primes.append(self._external_sampling(i, us, h_prime))
|
|
102
|
+
|
|
103
|
+
j = self.game.information_set(h)
|
|
104
|
+
us[j] = np.array(u_primes, dtype)
|
|
105
|
+
u += us[j] @ ps
|
|
106
|
+
else:
|
|
107
|
+
a = np.random.choice(A, p=ps).item()
|
|
108
|
+
h_prime = self.game.apply(h, a)
|
|
109
|
+
u += self._external_sampling(i, us, h_prime)
|
|
110
|
+
|
|
111
|
+
return u
|
|
112
|
+
|
|
113
|
+
def _external_sampling2(self, player):
|
|
114
|
+
us = {}
|
|
115
|
+
|
|
116
|
+
self._external_sampling(player, us, self.game.root_node)
|
|
117
|
+
|
|
118
|
+
return us
|
|
119
|
+
|
|
120
|
+
def _outcome_sampling(self, i, us, h, p):
|
|
121
|
+
np = self.kernel.numpy
|
|
122
|
+
dtype = self.kernel.data_type
|
|
123
|
+
self.next_node_visit_count += 1
|
|
124
|
+
u = self.game.utility(h, i) / p
|
|
125
|
+
A = self.game.actions(h)
|
|
126
|
+
|
|
127
|
+
if A:
|
|
128
|
+
i_prime = self.game.player(h)
|
|
129
|
+
|
|
130
|
+
if i_prime is None:
|
|
131
|
+
ps = self.game.chance_probabilities(h)
|
|
132
|
+
elif i_prime == i:
|
|
133
|
+
ps = self.reference_strategy_profile(h)
|
|
134
|
+
else:
|
|
135
|
+
ps = self._action_probabilities(h)
|
|
136
|
+
|
|
137
|
+
k = np.random.choice(len(A), p=ps)
|
|
138
|
+
a = A[k]
|
|
139
|
+
h_prime = self.game.apply(h, a)
|
|
140
|
+
p_prime = ps[k] * p
|
|
141
|
+
u_prime = ps[k] * self._outcome_sampling(i, us, h_prime, p_prime)
|
|
142
|
+
u += u_prime
|
|
143
|
+
|
|
144
|
+
if i_prime == i:
|
|
145
|
+
self.regret_minimizer(h)
|
|
146
|
+
|
|
147
|
+
j = self.game.information_set(h)
|
|
148
|
+
us[j] = np.zeros(len(A), dtype)
|
|
149
|
+
us[j][k] = u_prime
|
|
150
|
+
|
|
151
|
+
return u
|
|
152
|
+
|
|
153
|
+
def _outcome_sampling2(self, player):
|
|
154
|
+
us = {}
|
|
155
|
+
|
|
156
|
+
self._outcome_sampling(player, us, self.game.root_node, 1)
|
|
157
|
+
|
|
158
|
+
return us
|
|
159
|
+
|
|
160
|
+
def sample(self, player):
|
|
161
|
+
"""Sample.
|
|
162
|
+
|
|
163
|
+
:param player: Player.
|
|
164
|
+
:return: Utilities.
|
|
165
|
+
"""
|
|
166
|
+
self.next_sample_count += 1
|
|
167
|
+
|
|
168
|
+
if self.reference_strategy_profile is None:
|
|
169
|
+
us = self._external_sampling2(player)
|
|
170
|
+
else:
|
|
171
|
+
us = self._outcome_sampling2(player)
|
|
172
|
+
|
|
173
|
+
return us
|
|
174
|
+
|
|
175
|
+
def observe(self, utilities):
|
|
176
|
+
"""Observe utilities.
|
|
177
|
+
|
|
178
|
+
:param utilities: Utilities.
|
|
179
|
+
:return: ``None``.
|
|
180
|
+
"""
|
|
181
|
+
self.sample_count = self.next_sample_count
|
|
182
|
+
self.node_visit_count = self.next_node_visit_count
|
|
183
|
+
|
|
184
|
+
for j, u in utilities.items():
|
|
185
|
+
R = self.regret_minimizers[j]
|
|
186
|
+
|
|
187
|
+
if R.next_strategy is None:
|
|
188
|
+
R.output()
|
|
189
|
+
|
|
190
|
+
R.observe(u)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
@dataclass
|
|
194
|
+
class MonteCarloCounterfactualRegretMinimization(
|
|
195
|
+
StochasticRegretMinimizer,
|
|
196
|
+
ABC,
|
|
197
|
+
):
|
|
198
|
+
"""Class for Monte Carlo counterfactual regret minimization (MCCFR)."""
|
|
199
|
+
regret_minimizer_type: type[ProbabilitySimplexRegretMinimizer] = (
|
|
200
|
+
RegretMatching
|
|
201
|
+
)
|
|
@@ -2,11 +2,13 @@
|
|
|
2
2
|
from noregret.solvers.linear_programming import linear_programming
|
|
3
3
|
from noregret.solvers.regret_minimization import (
|
|
4
4
|
regret_minimization,
|
|
5
|
+
stochastic_regret_minimization,
|
|
5
6
|
symmetric_regret_minimization,
|
|
6
7
|
)
|
|
7
8
|
|
|
8
9
|
__all__ = (
|
|
9
10
|
'linear_programming',
|
|
10
11
|
'regret_minimization',
|
|
12
|
+
'stochastic_regret_minimization',
|
|
11
13
|
'symmetric_regret_minimization',
|
|
12
14
|
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""Module or regret minimization."""
|
|
2
2
|
from collections.abc import Iterable, Mapping
|
|
3
|
-
from itertools import count
|
|
3
|
+
from itertools import count, repeat
|
|
4
4
|
|
|
5
5
|
from tqdm import tqdm
|
|
6
6
|
|
|
@@ -29,10 +29,6 @@ def regret_minimization(
|
|
|
29
29
|
:param progress_bar: Whether to show a progress bar.
|
|
30
30
|
:return: Average strategy profile.
|
|
31
31
|
"""
|
|
32
|
-
np = game.kernel.numpy
|
|
33
|
-
|
|
34
|
-
if len(regret_minimizers) != game.player_count:
|
|
35
|
-
raise ValueError('inconsistent number of regret minimizers')
|
|
36
32
|
|
|
37
33
|
def average_strategy_profile():
|
|
38
34
|
average_strategy_profile = []
|
|
@@ -45,6 +41,11 @@ def regret_minimization(
|
|
|
45
41
|
def exploitability():
|
|
46
42
|
return game.exploitability(*average_strategy_profile())
|
|
47
43
|
|
|
44
|
+
np = game.kernel.numpy
|
|
45
|
+
|
|
46
|
+
if len(regret_minimizers) != game.player_count:
|
|
47
|
+
raise ValueError('inconsistent number of regret minimizers')
|
|
48
|
+
|
|
48
49
|
if iteration_count is None or np.isposinf(iteration_count):
|
|
49
50
|
iterations = count()
|
|
50
51
|
else:
|
|
@@ -57,24 +58,24 @@ def regret_minimization(
|
|
|
57
58
|
elif isinstance(progress_bar, Iterable):
|
|
58
59
|
iterations = tqdm(iterations, *progress_bar)
|
|
59
60
|
|
|
60
|
-
|
|
61
|
+
sigma = []
|
|
61
62
|
|
|
62
63
|
for R in regret_minimizers:
|
|
63
|
-
|
|
64
|
+
sigma.append(R.output(prediction))
|
|
64
65
|
|
|
65
66
|
for t in iterations:
|
|
66
67
|
if alternation:
|
|
67
68
|
for i, R in enumerate(regret_minimizers):
|
|
68
|
-
R.observe(game.utility(i, *
|
|
69
|
+
R.observe(game.utility(i, *sigma[:i], *sigma[i + 1:]))
|
|
69
70
|
|
|
70
|
-
|
|
71
|
+
sigma[i] = R.output(prediction)
|
|
71
72
|
else:
|
|
72
|
-
|
|
73
|
+
us = game.utilities(*sigma)
|
|
73
74
|
|
|
74
|
-
for i, (R, u) in enumerate(zip(regret_minimizers,
|
|
75
|
+
for i, (R, u) in enumerate(zip(regret_minimizers, us)):
|
|
75
76
|
R.observe(u)
|
|
76
77
|
|
|
77
|
-
|
|
78
|
+
sigma[i] = R.output(prediction)
|
|
78
79
|
|
|
79
80
|
if not checkpoints or t in checkpoints:
|
|
80
81
|
if update is not None:
|
|
@@ -116,19 +117,18 @@ def symmetric_regret_minimization(
|
|
|
116
117
|
:param progress_bar: Whether to show a progress bar.
|
|
117
118
|
:return: Average strategy profile.
|
|
118
119
|
"""
|
|
119
|
-
np = game.kernel.numpy
|
|
120
|
-
|
|
121
|
-
if not game.is_symmetric:
|
|
122
|
-
raise ValueError('game is asymmetric')
|
|
123
|
-
|
|
124
|
-
R = regret_minimizer
|
|
125
120
|
|
|
126
121
|
def average_strategy_profile():
|
|
127
|
-
return [
|
|
122
|
+
return [regret_minimizer.average_strategy] * game.player_count
|
|
128
123
|
|
|
129
124
|
def exploitability():
|
|
130
125
|
return game.exploitability(*average_strategy_profile())
|
|
131
126
|
|
|
127
|
+
np = game.kernel.numpy
|
|
128
|
+
|
|
129
|
+
if not game.is_symmetric:
|
|
130
|
+
raise ValueError('game is asymmetric')
|
|
131
|
+
|
|
132
132
|
if iteration_count is None or np.isposinf(iteration_count):
|
|
133
133
|
iterations = count()
|
|
134
134
|
else:
|
|
@@ -141,12 +141,14 @@ def symmetric_regret_minimization(
|
|
|
141
141
|
elif isinstance(progress_bar, Iterable):
|
|
142
142
|
iterations = tqdm(iterations, *progress_bar)
|
|
143
143
|
|
|
144
|
-
|
|
144
|
+
sigma_1 = regret_minimizer.output(prediction)
|
|
145
145
|
|
|
146
146
|
for t in iterations:
|
|
147
|
-
|
|
147
|
+
u = game.utility(0, *repeat(sigma_1, game.player_count - 1))
|
|
148
148
|
|
|
149
|
-
|
|
149
|
+
regret_minimizer.observe(u)
|
|
150
|
+
|
|
151
|
+
sigma_1 = regret_minimizer.output(prediction)
|
|
150
152
|
|
|
151
153
|
if not checkpoints or t in checkpoints:
|
|
152
154
|
if update is not None:
|
|
@@ -164,3 +166,62 @@ def symmetric_regret_minimization(
|
|
|
164
166
|
break
|
|
165
167
|
|
|
166
168
|
return average_strategy_profile()
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def stochastic_regret_minimization(
|
|
172
|
+
game,
|
|
173
|
+
regret_minimizer,
|
|
174
|
+
alternation=False,
|
|
175
|
+
sample_count=1000000,
|
|
176
|
+
checkpoints=(),
|
|
177
|
+
update=None,
|
|
178
|
+
progress_bar=True,
|
|
179
|
+
):
|
|
180
|
+
"""Solve a game using stochastic regret minimization.
|
|
181
|
+
|
|
182
|
+
:param game: Game.
|
|
183
|
+
:param regret_minimizer: Regret minimizer.
|
|
184
|
+
:param alternation: Whether to alternate, defaults to ``True''.
|
|
185
|
+
:param sample_count: Number of samples, defaults to ``1000000''.
|
|
186
|
+
:param checkpoints: Checkpoints.
|
|
187
|
+
:param update: Update.
|
|
188
|
+
:param progress_bar: Whether to show a progress bar.
|
|
189
|
+
:return: Average action probabilities.
|
|
190
|
+
"""
|
|
191
|
+
np = game.kernel.numpy
|
|
192
|
+
|
|
193
|
+
if sample_count is None or np.isposinf(sample_count):
|
|
194
|
+
samples = count()
|
|
195
|
+
else:
|
|
196
|
+
samples = range(sample_count)
|
|
197
|
+
|
|
198
|
+
if progress_bar is True:
|
|
199
|
+
samples = tqdm(samples)
|
|
200
|
+
elif isinstance(progress_bar, Mapping):
|
|
201
|
+
samples = tqdm(samples, **progress_bar)
|
|
202
|
+
elif isinstance(progress_bar, Iterable):
|
|
203
|
+
samples = tqdm(samples, *progress_bar)
|
|
204
|
+
|
|
205
|
+
for s in samples:
|
|
206
|
+
if alternation:
|
|
207
|
+
for i in range(game.player_count):
|
|
208
|
+
regret_minimizer.observe(regret_minimizer.sample(i))
|
|
209
|
+
else:
|
|
210
|
+
uss = []
|
|
211
|
+
|
|
212
|
+
for i in range(game.player_count):
|
|
213
|
+
uss.append(regret_minimizer.sample(i))
|
|
214
|
+
|
|
215
|
+
for us in uss:
|
|
216
|
+
regret_minimizer.observe(us)
|
|
217
|
+
|
|
218
|
+
if not checkpoints or s in checkpoints:
|
|
219
|
+
if update is not None:
|
|
220
|
+
status = update()
|
|
221
|
+
else:
|
|
222
|
+
status = False
|
|
223
|
+
|
|
224
|
+
if status:
|
|
225
|
+
break
|
|
226
|
+
|
|
227
|
+
return regret_minimizer.average_action_probabilities
|
|
@@ -6,6 +6,7 @@ import noregret as nr
|
|
|
6
6
|
|
|
7
7
|
class GameTestCaseMixin(ABC):
|
|
8
8
|
KER = None
|
|
9
|
+
GAMES = None
|
|
9
10
|
|
|
10
11
|
@abstractmethod
|
|
11
12
|
def uniform_strategy_profile(self, game):
|
|
@@ -145,12 +146,43 @@ class ExtensiveFormGameTestCase(GameTestCaseMixin, TestCase):
|
|
|
145
146
|
self.assertEqual(sfp.parent_sequences, sfp2.parent_sequences)
|
|
146
147
|
|
|
147
148
|
|
|
149
|
+
class SimulationTestCase(TestCase):
|
|
150
|
+
KER = nr.FPKer()
|
|
151
|
+
|
|
152
|
+
def test_sequences(self):
|
|
153
|
+
np = self.KER.numpy
|
|
154
|
+
dtype = self.KER.data_type
|
|
155
|
+
sim = nr.Sim(
|
|
156
|
+
self.KER,
|
|
157
|
+
(0, None, 0, 1),
|
|
158
|
+
('', None, 'ab', 'b'),
|
|
159
|
+
('a', 'b', 'c', 'd'),
|
|
160
|
+
np.array([1, -1], dtype),
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
self.assertEqual(
|
|
164
|
+
tuple(sim.sequences()),
|
|
165
|
+
(('', 'a'), ('ab', 'c'), ('b', 'd')),
|
|
166
|
+
)
|
|
167
|
+
self.assertEqual(tuple(sim.sequences(0)), (('', 'a'), ('ab', 'c')))
|
|
168
|
+
self.assertEqual(tuple(sim.sequences(1)), (('b', 'd'),))
|
|
169
|
+
|
|
170
|
+
def test_utility(self):
|
|
171
|
+
np = self.KER.numpy
|
|
172
|
+
dtype = self.KER.data_type
|
|
173
|
+
sim = nr.Sim(self.KER, (), (), (), np.array([1, -1], dtype))
|
|
174
|
+
|
|
175
|
+
self.assertEqual(sim.utility(0), 1)
|
|
176
|
+
self.assertEqual(sim.utility(1), -1)
|
|
177
|
+
|
|
178
|
+
|
|
148
179
|
class BlackBoxGameTestCase(TestCase):
|
|
149
180
|
KER = nr.FPKer()
|
|
150
181
|
GAMES = (
|
|
151
182
|
nr.open_spiel_game(KER, 'kuhn_poker'),
|
|
152
183
|
nr.open_spiel_game(KER, 'leduc_poker'),
|
|
153
184
|
)
|
|
185
|
+
SEED = 42
|
|
154
186
|
|
|
155
187
|
def test_actions_and_children(self):
|
|
156
188
|
for game in self.GAMES:
|
|
@@ -197,6 +229,29 @@ class BlackBoxGameTestCase(TestCase):
|
|
|
197
229
|
|
|
198
230
|
np.testing.assert_equal(ps, ps2)
|
|
199
231
|
|
|
232
|
+
def test_exploitability(self):
|
|
233
|
+
for game in self.GAMES:
|
|
234
|
+
sigma = nr.UniformStrategyProfile(self.KER, game)
|
|
235
|
+
epsilon = game.exploitability(sigma)
|
|
236
|
+
|
|
237
|
+
game = nr.to_efg(self.KER, game)
|
|
238
|
+
sfps = game.sequence_form_polytopes
|
|
239
|
+
bs = [sfp.behavioral_form_uniform_strategy for sfp in sfps]
|
|
240
|
+
sigma = [sfp.to_sequence_form(b) for sfp, b in zip(sfps, bs)]
|
|
241
|
+
epsilon2 = game.exploitability(*sigma)
|
|
242
|
+
|
|
243
|
+
self.assertAlmostEqual(epsilon, epsilon2)
|
|
244
|
+
|
|
245
|
+
def test_simulation(self):
|
|
246
|
+
np = self.KER.numpy
|
|
247
|
+
|
|
248
|
+
for game in self.GAMES:
|
|
249
|
+
np.random.seed(self.SEED)
|
|
250
|
+
|
|
251
|
+
sigma = nr.UniformStrategyProfile(self.KER, game)
|
|
252
|
+
|
|
253
|
+
game.simulate(sigma)
|
|
254
|
+
|
|
200
255
|
|
|
201
256
|
if __name__ == '__main__':
|
|
202
257
|
main() # pragma: no cover
|
|
@@ -23,12 +23,12 @@ class LinearProgrammingTestCase(TestCase):
|
|
|
23
23
|
|
|
24
24
|
for game, value in self.GAME_VALUES:
|
|
25
25
|
x, y = nr.lp(game)
|
|
26
|
-
|
|
26
|
+
epsilon = game.exploitability(x, y)
|
|
27
27
|
v = game.expected_row_utility(x, y)
|
|
28
28
|
|
|
29
|
-
self.assertAlmostEqual(
|
|
29
|
+
self.assertAlmostEqual(epsilon, 0)
|
|
30
30
|
self.assertAlmostEqual(v, value)
|
|
31
|
-
self.assertEqual(
|
|
31
|
+
self.assertEqual(epsilon.dtype, dtype)
|
|
32
32
|
self.assertEqual(v.dtype, dtype)
|
|
33
33
|
|
|
34
34
|
|
|
@@ -44,12 +44,12 @@ class ProbabilitySimplexRegretMinimizationTestCase(TestCase):
|
|
|
44
44
|
target_exploitability=self.TARGET_EXPLOITABILITY,
|
|
45
45
|
progress_bar=False,
|
|
46
46
|
)
|
|
47
|
-
|
|
47
|
+
epsilon = game.exploitability(x_bar, y_bar)
|
|
48
48
|
v = game.expected_row_utility(x_bar, y_bar)
|
|
49
49
|
|
|
50
|
-
self.assertLess(
|
|
50
|
+
self.assertLess(epsilon, self.TARGET_EXPLOITABILITY)
|
|
51
51
|
self.assertAlmostEqual(v, value, delta=self.DELTA)
|
|
52
|
-
self.assertEqual(
|
|
52
|
+
self.assertEqual(epsilon.dtype, dtype)
|
|
53
53
|
self.assertEqual(v.dtype, dtype)
|
|
54
54
|
|
|
55
55
|
def test_last_iterate_convergence(self):
|
|
@@ -70,12 +70,12 @@ class ProbabilitySimplexRegretMinimizationTestCase(TestCase):
|
|
|
70
70
|
target_exploitability=self.TARGET_EXPLOITABILITY,
|
|
71
71
|
progress_bar=False,
|
|
72
72
|
)
|
|
73
|
-
|
|
73
|
+
epsilon = game.exploitability(x_bar, y_bar)
|
|
74
74
|
v = game.expected_row_utility(x_bar, y_bar)
|
|
75
75
|
|
|
76
|
-
self.assertLess(
|
|
76
|
+
self.assertLess(epsilon, self.TARGET_EXPLOITABILITY)
|
|
77
77
|
self.assertAlmostEqual(v, value, delta=self.DELTA)
|
|
78
|
-
self.assertEqual(
|
|
78
|
+
self.assertEqual(epsilon.dtype, dtype)
|
|
79
79
|
self.assertEqual(v.dtype, dtype)
|
|
80
80
|
|
|
81
81
|
def test_frequent_iterate_convergence(self):
|
|
@@ -94,12 +94,12 @@ class ProbabilitySimplexRegretMinimizationTestCase(TestCase):
|
|
|
94
94
|
target_exploitability=self.TARGET_EXPLOITABILITY,
|
|
95
95
|
progress_bar=False,
|
|
96
96
|
)
|
|
97
|
-
|
|
97
|
+
epsilon = game.exploitability(x_bar, y_bar)
|
|
98
98
|
v = game.expected_row_utility(x_bar, y_bar)
|
|
99
99
|
|
|
100
|
-
self.assertLess(
|
|
100
|
+
self.assertLess(epsilon, self.TARGET_EXPLOITABILITY)
|
|
101
101
|
self.assertAlmostEqual(v, value, delta=self.DELTA)
|
|
102
|
-
self.assertEqual(
|
|
102
|
+
self.assertEqual(epsilon.dtype, dtype)
|
|
103
103
|
self.assertEqual(v.dtype, dtype)
|
|
104
104
|
|
|
105
105
|
|
|
@@ -141,12 +141,12 @@ class SequenceFormPolytopeRegretMinimizationTestCase(TestCase):
|
|
|
141
141
|
target_exploitability=self.TARGET_EXPLOITABILITY,
|
|
142
142
|
progress_bar=False,
|
|
143
143
|
)
|
|
144
|
-
|
|
144
|
+
epsilon = game.exploitability(x_bar, y_bar)
|
|
145
145
|
v = game.expected_row_utility(x_bar, y_bar)
|
|
146
146
|
|
|
147
|
-
self.assertLess(
|
|
147
|
+
self.assertLess(epsilon, self.TARGET_EXPLOITABILITY)
|
|
148
148
|
self.assertAlmostEqual(v, value, delta=self.DELTA)
|
|
149
|
-
self.assertEqual(
|
|
149
|
+
self.assertEqual(epsilon.dtype, dtype)
|
|
150
150
|
self.assertEqual(v.dtype, dtype)
|
|
151
151
|
|
|
152
152
|
|
|
@@ -172,7 +172,7 @@ class SequenceFormPolytopeRegretMinimization2TestCase(TestCase):
|
|
|
172
172
|
nr.CFR(self.KER, game.column_sequence_form_polytope),
|
|
173
173
|
progress_bar=False,
|
|
174
174
|
)
|
|
175
|
-
|
|
175
|
+
epsilon = game.exploitability(x_bar, y_bar)
|
|
176
176
|
v = game.expected_row_utility(x_bar, y_bar)
|
|
177
177
|
x_bar2, y_bar2 = nr.rm(
|
|
178
178
|
game,
|
|
@@ -180,10 +180,10 @@ class SequenceFormPolytopeRegretMinimization2TestCase(TestCase):
|
|
|
180
180
|
nr.CFR2(self.KER, game.column_sequence_form_polytope),
|
|
181
181
|
progress_bar=False,
|
|
182
182
|
)
|
|
183
|
-
|
|
183
|
+
epsilon2 = game.exploitability(x_bar2, y_bar2)
|
|
184
184
|
v2 = game.expected_row_utility(x_bar2, y_bar2)
|
|
185
185
|
|
|
186
|
-
self.assertAlmostEqual(
|
|
186
|
+
self.assertAlmostEqual(epsilon, epsilon2, self.PLACES)
|
|
187
187
|
self.assertAlmostEqual(v, v2, self.PLACES)
|
|
188
188
|
|
|
189
189
|
x_bar, y_bar = nr.rm(
|
|
@@ -193,7 +193,7 @@ class SequenceFormPolytopeRegretMinimization2TestCase(TestCase):
|
|
|
193
193
|
prediction=True,
|
|
194
194
|
progress_bar=False,
|
|
195
195
|
)
|
|
196
|
-
|
|
196
|
+
epsilon = game.exploitability(x_bar, y_bar)
|
|
197
197
|
v = game.expected_row_utility(x_bar, y_bar)
|
|
198
198
|
x_bar2, y_bar2 = nr.rm(
|
|
199
199
|
game,
|
|
@@ -202,12 +202,65 @@ class SequenceFormPolytopeRegretMinimization2TestCase(TestCase):
|
|
|
202
202
|
prediction=True,
|
|
203
203
|
progress_bar=False,
|
|
204
204
|
)
|
|
205
|
-
|
|
205
|
+
epsilon2 = game.exploitability(x_bar2, y_bar2)
|
|
206
206
|
v2 = game.expected_row_utility(x_bar2, y_bar2)
|
|
207
207
|
|
|
208
|
-
self.assertAlmostEqual(
|
|
208
|
+
self.assertAlmostEqual(epsilon, epsilon2, self.PLACES)
|
|
209
209
|
self.assertAlmostEqual(v, v2, self.PLACES)
|
|
210
210
|
|
|
211
211
|
|
|
212
|
+
class StochasticRegretMinimizationTestCase(TestCase):
|
|
213
|
+
KER = nr.FPKer()
|
|
214
|
+
GAME = nr.open_spiel_game(KER, 'kuhn_poker')
|
|
215
|
+
SAMPLE_COUNT = 100000
|
|
216
|
+
TARGET_EXPLOITABILITY = 1e-1
|
|
217
|
+
SEED = 42
|
|
218
|
+
|
|
219
|
+
def test_external_sampling(self):
|
|
220
|
+
np = self.KER.numpy
|
|
221
|
+
|
|
222
|
+
assert self.GAME.is_two_player and self.GAME.is_zero_sum
|
|
223
|
+
|
|
224
|
+
np.random.seed(self.SEED)
|
|
225
|
+
|
|
226
|
+
R = nr.MCCFR(self.KER, self.GAME)
|
|
227
|
+
sigma = nr.stochastic_rm(
|
|
228
|
+
self.GAME,
|
|
229
|
+
R,
|
|
230
|
+
alternation=True,
|
|
231
|
+
sample_count=self.SAMPLE_COUNT,
|
|
232
|
+
progress_bar=False,
|
|
233
|
+
)
|
|
234
|
+
epsilon = self.GAME.exploitability(sigma)
|
|
235
|
+
|
|
236
|
+
self.assertLess(epsilon, self.TARGET_EXPLOITABILITY)
|
|
237
|
+
|
|
238
|
+
def test_outcome_sampling(self):
|
|
239
|
+
np = self.KER.numpy
|
|
240
|
+
|
|
241
|
+
assert self.GAME.is_two_player and self.GAME.is_zero_sum
|
|
242
|
+
|
|
243
|
+
np.random.seed(self.SEED)
|
|
244
|
+
|
|
245
|
+
R = nr.MCCFR(
|
|
246
|
+
self.KER,
|
|
247
|
+
self.GAME,
|
|
248
|
+
reference_strategy_profile=nr.UniformStrategyProfile(
|
|
249
|
+
self.KER,
|
|
250
|
+
self.GAME,
|
|
251
|
+
),
|
|
252
|
+
)
|
|
253
|
+
sigma = nr.stochastic_rm(
|
|
254
|
+
self.GAME,
|
|
255
|
+
R,
|
|
256
|
+
alternation=True,
|
|
257
|
+
sample_count=self.SAMPLE_COUNT,
|
|
258
|
+
progress_bar=False,
|
|
259
|
+
)
|
|
260
|
+
epsilon = self.GAME.exploitability(sigma)
|
|
261
|
+
|
|
262
|
+
self.assertLess(epsilon, self.TARGET_EXPLOITABILITY)
|
|
263
|
+
|
|
264
|
+
|
|
212
265
|
if __name__ == '__main__':
|
|
213
266
|
main() # pragma: no cover
|
|
@@ -31,6 +31,7 @@ noregret/regret_minimizers/__init__.py
|
|
|
31
31
|
noregret/regret_minimizers/probability_simplices.py
|
|
32
32
|
noregret/regret_minimizers/regret_minimizers.py
|
|
33
33
|
noregret/regret_minimizers/sequence_form_polytopes.py
|
|
34
|
+
noregret/regret_minimizers/stochastic.py
|
|
34
35
|
noregret/solvers/__init__.py
|
|
35
36
|
noregret/solvers/linear_programming.py
|
|
36
37
|
noregret/solvers/regret_minimization.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/examples/battle-of-the-sexes.json
RENAMED
|
File without changes
|
|
File without changes
|
{noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/examples/gift-exchange-game.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/examples/rock-paper-scissors-plus.json
RENAMED
|
File without changes
|
{noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/examples/rock-paper-scissors.json
RENAMED
|
File without changes
|
{noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/examples/rock-paper-superscissors.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/regret_minimizers/probability_simplices.py
RENAMED
|
File without changes
|
{noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/regret_minimizers/regret_minimizers.py
RENAMED
|
File without changes
|
{noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/regret_minimizers/sequence_form_polytopes.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|