noregret 0.0.0.dev8__tar.gz → 0.0.0.dev9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/PKG-INFO +1 -1
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/__init__.py +20 -1
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/__init__.py +8 -1
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/black_box.py +58 -9
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/extensive_form.py +5 -5
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/regret_minimizers/__init__.py +6 -0
- noregret-0.0.0.dev9/noregret/regret_minimizers/stochastic.py +202 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/solvers/__init__.py +2 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/solvers/regret_minimization.py +83 -22
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/tests/test_games.py +13 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/tests/test_linear_programming.py +3 -3
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/tests/test_regret_minimization.py +68 -18
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/utilities.py +14 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret.egg-info/PKG-INFO +1 -1
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret.egg-info/SOURCES.txt +1 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/setup.py +1 -1
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/LICENSE +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/README.rst +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/examples/assurance-game.json +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/examples/battle-of-the-sexes.json +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/examples/chicken.json +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/examples/gift-exchange-game.json +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/examples/matching-pennies.json +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/examples/prisoners-dilemma.json +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/examples/pure-coordination.json +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/examples/rock-paper-scissors-plus.json +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/examples/rock-paper-scissors.json +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/examples/rock-paper-superscissors.json +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/examples/stag-hunt.json +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/games.py +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/multilinear.py +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/normal_form.py +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/kernels.py +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/regret_minimizers/probability_simplices.py +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/regret_minimizers/regret_minimizers.py +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/regret_minimizers/sequence_form_polytopes.py +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/sequence_form_polytopes.py +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/solvers/linear_programming.py +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/tests/__init__.py +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/tests/test_sequence_form_polytopes.py +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret.egg-info/dependency_links.txt +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret.egg-info/requires.txt +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret.egg-info/top_level.txt +0 -0
- {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/setup.cfg +0 -0
|
@@ -18,6 +18,7 @@ from noregret.games import (
|
|
|
18
18
|
RockPaperScissorsPlus,
|
|
19
19
|
RockPaperSuperscissors,
|
|
20
20
|
StagHunt,
|
|
21
|
+
StrategyProfile,
|
|
21
22
|
to_extensive_form_game,
|
|
22
23
|
TwoPlayerExtensiveFormGame,
|
|
23
24
|
TwoPlayerGame,
|
|
@@ -27,6 +28,7 @@ from noregret.games import (
|
|
|
27
28
|
TwoPlayerZeroSumGame,
|
|
28
29
|
TwoPlayerZeroSumMultilinearGame,
|
|
29
30
|
TwoPlayerZeroSumNormalFormGame,
|
|
31
|
+
UniformStrategyProfile,
|
|
30
32
|
)
|
|
31
33
|
from noregret.kernels import (
|
|
32
34
|
CUDAKernel,
|
|
@@ -46,6 +48,7 @@ from noregret.regret_minimizers import (
|
|
|
46
48
|
EuclideanRegularization,
|
|
47
49
|
FollowTheRegularizedLeader,
|
|
48
50
|
MirrorDescent,
|
|
51
|
+
MonteCarloCounterfactualRegretMinimization,
|
|
49
52
|
MultiplicativeWeightsUpdate,
|
|
50
53
|
OnlineGradientDescent,
|
|
51
54
|
ProbabilitySimplexRegretMinimizer,
|
|
@@ -54,15 +57,17 @@ from noregret.regret_minimizers import (
|
|
|
54
57
|
RegretMatchingPlus,
|
|
55
58
|
RegretMinimizer,
|
|
56
59
|
SequenceFormPolytopeRegretMinimizer,
|
|
60
|
+
StochasticRegretMinimizer,
|
|
57
61
|
SwapRegretMinimizer,
|
|
58
62
|
)
|
|
59
63
|
from noregret.sequence_form_polytopes import SequenceFormPolytope
|
|
60
64
|
from noregret.solvers import (
|
|
61
65
|
linear_programming,
|
|
62
66
|
regret_minimization,
|
|
67
|
+
stochastic_regret_minimization,
|
|
63
68
|
symmetric_regret_minimization,
|
|
64
69
|
)
|
|
65
|
-
from noregret.utilities import import_object, tuple_or_none
|
|
70
|
+
from noregret.utilities import import_object, sample, tuple_or_none
|
|
66
71
|
|
|
67
72
|
BM = BlumMansour
|
|
68
73
|
"""Alias for :class:`noregret.BlumMansour`."""
|
|
@@ -94,6 +99,10 @@ FTRL = FollowTheRegularizedLeader
|
|
|
94
99
|
"""Alias for :class:`noregret.FollowTheRegularizedLeader`."""
|
|
95
100
|
lp = linear_programming
|
|
96
101
|
"""Alias for :func:`noregret.linear_programming`."""
|
|
102
|
+
MCCFR = MonteCarloCounterfactualRegretMinimization
|
|
103
|
+
"""Alias for
|
|
104
|
+
:class:`noregret.MonteCarloCounterfactualRegretMinimization`.
|
|
105
|
+
"""
|
|
97
106
|
MD = MirrorDescent
|
|
98
107
|
"""Alias for :class:`noregret.MirrorDescent`."""
|
|
99
108
|
MWU = MultiplicativeWeightsUpdate
|
|
@@ -112,6 +121,8 @@ RM = RegretMatching
|
|
|
112
121
|
"""Alias for :class:`noregret.RegretMatching`."""
|
|
113
122
|
rm = regret_minimization
|
|
114
123
|
"""Alias for :func:`noregret.regret_minimization`."""
|
|
124
|
+
stochastic_rm = stochastic_regret_minimization
|
|
125
|
+
"""Alias for :func:`noregret.stochastic_regret_minimization`."""
|
|
115
126
|
symmetric_rm = symmetric_regret_minimization
|
|
116
127
|
"""Alias for :func:`noregret.symmetric_regret_minimization`."""
|
|
117
128
|
to_efg = to_extensive_form_game
|
|
@@ -155,8 +166,10 @@ __all__ = (
|
|
|
155
166
|
'lp',
|
|
156
167
|
'MatchingPennies',
|
|
157
168
|
'matrix_game',
|
|
169
|
+
'MCCFR',
|
|
158
170
|
'MD',
|
|
159
171
|
'MirrorDescent',
|
|
172
|
+
'MonteCarloCounterfactualRegretMinimization',
|
|
160
173
|
'MultilinearGame',
|
|
161
174
|
'MultiplicativeWeightsUpdate',
|
|
162
175
|
'MWU',
|
|
@@ -181,10 +194,15 @@ __all__ = (
|
|
|
181
194
|
'RockPaperScissors',
|
|
182
195
|
'RockPaperScissorsPlus',
|
|
183
196
|
'RockPaperSuperscissors',
|
|
197
|
+
'sample',
|
|
184
198
|
'SequenceFormPolytope',
|
|
185
199
|
'SequenceFormPolytopeRegretMinimizer',
|
|
186
200
|
'Serializable',
|
|
187
201
|
'StagHunt',
|
|
202
|
+
'stochastic_regret_minimization',
|
|
203
|
+
'StochasticRegretMinimizer',
|
|
204
|
+
'stochastic_rm',
|
|
205
|
+
'StrategyProfile',
|
|
188
206
|
'SwapRegretMinimizer',
|
|
189
207
|
'symmetric_regret_minimization',
|
|
190
208
|
'symmetric_rm',
|
|
@@ -199,4 +217,5 @@ __all__ = (
|
|
|
199
217
|
'TwoPlayerZeroSumGame',
|
|
200
218
|
'TwoPlayerZeroSumMultilinearGame',
|
|
201
219
|
'TwoPlayerZeroSumNormalFormGame',
|
|
220
|
+
'UniformStrategyProfile',
|
|
202
221
|
)
|
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
"""Module for games."""
|
|
2
|
-
from noregret.games.black_box import
|
|
2
|
+
from noregret.games.black_box import (
|
|
3
|
+
BlackBoxGame,
|
|
4
|
+
open_spiel_game,
|
|
5
|
+
StrategyProfile,
|
|
6
|
+
UniformStrategyProfile,
|
|
7
|
+
)
|
|
3
8
|
from noregret.games.extensive_form import (
|
|
4
9
|
ExtensiveFormGame,
|
|
5
10
|
to_extensive_form_game,
|
|
@@ -49,6 +54,7 @@ __all__ = (
|
|
|
49
54
|
'RockPaperScissorsPlus',
|
|
50
55
|
'RockPaperSuperscissors',
|
|
51
56
|
'StagHunt',
|
|
57
|
+
'StrategyProfile',
|
|
52
58
|
'to_extensive_form_game',
|
|
53
59
|
'TwoPlayerExtensiveFormGame',
|
|
54
60
|
'TwoPlayerGame',
|
|
@@ -58,4 +64,5 @@ __all__ = (
|
|
|
58
64
|
'TwoPlayerZeroSumGame',
|
|
59
65
|
'TwoPlayerZeroSumMultilinearGame',
|
|
60
66
|
'TwoPlayerZeroSumNormalFormGame',
|
|
67
|
+
'UniformStrategyProfile',
|
|
61
68
|
)
|
|
@@ -4,7 +4,7 @@ from dataclasses import dataclass, field
|
|
|
4
4
|
from functools import partial
|
|
5
5
|
|
|
6
6
|
from ordered_set import OrderedSet
|
|
7
|
-
from pyspiel import GameType, load_game
|
|
7
|
+
from pyspiel import exploitability, GameType, load_game
|
|
8
8
|
|
|
9
9
|
from noregret.kernels import Kernel
|
|
10
10
|
|
|
@@ -139,6 +139,12 @@ class BlackBoxGame(ABC):
|
|
|
139
139
|
|
|
140
140
|
return np.array(ps, dtype)
|
|
141
141
|
|
|
142
|
+
def exploitability(self, strategy_profile):
|
|
143
|
+
if not self.is_two_player or not self.is_zero_sum:
|
|
144
|
+
raise ValueError('not 2p0s')
|
|
145
|
+
|
|
146
|
+
raise NotImplementedError
|
|
147
|
+
|
|
142
148
|
|
|
143
149
|
@dataclass
|
|
144
150
|
class _OpenSpielBlackBoxGame(BlackBoxGame):
|
|
@@ -167,17 +173,14 @@ class _OpenSpielBlackBoxGame(BlackBoxGame):
|
|
|
167
173
|
return node.child(node.string_to_action(action))
|
|
168
174
|
|
|
169
175
|
def children(self, node):
|
|
170
|
-
return list(node.child
|
|
176
|
+
return list(map(node.child, node.legal_actions()))
|
|
171
177
|
|
|
172
178
|
def actions_and_children(self, node):
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
for a in node.legal_actions():
|
|
177
|
-
actions.append(node.action_to_string(a))
|
|
178
|
-
children.append(node.child(a))
|
|
179
|
+
A = node.legal_actions()
|
|
180
|
+
actions = OrderedSet(map(node.action_to_string, A))
|
|
181
|
+
children = list(map(node.child, A))
|
|
179
182
|
|
|
180
|
-
return
|
|
183
|
+
return actions, children
|
|
181
184
|
|
|
182
185
|
def player(self, node):
|
|
183
186
|
i = node.current_player()
|
|
@@ -212,6 +215,27 @@ class _OpenSpielBlackBoxGame(BlackBoxGame):
|
|
|
212
215
|
|
|
213
216
|
return np.array([p for _, p in node.chance_outcomes()], dtype)
|
|
214
217
|
|
|
218
|
+
def _sigma(self, strategy_profile, h, sigma):
|
|
219
|
+
A = h.legal_actions()
|
|
220
|
+
h_primes = list(map(h.child, A))
|
|
221
|
+
i = self.player(h)
|
|
222
|
+
|
|
223
|
+
if A and i is not None and (j := self.information_set(h)) not in sigma:
|
|
224
|
+
sigma[j] = list(zip(A, strategy_profile(h).tolist()))
|
|
225
|
+
|
|
226
|
+
for h_prime in h_primes:
|
|
227
|
+
self._sigma(strategy_profile, h_prime, sigma)
|
|
228
|
+
|
|
229
|
+
def _sigma2(self, strategy_profile):
|
|
230
|
+
sigma = {}
|
|
231
|
+
|
|
232
|
+
self._sigma(strategy_profile, self.root_node, sigma)
|
|
233
|
+
|
|
234
|
+
return sigma
|
|
235
|
+
|
|
236
|
+
def exploitability(self, strategy_profile):
|
|
237
|
+
return exploitability(self._game, self._sigma2(strategy_profile))
|
|
238
|
+
|
|
215
239
|
|
|
216
240
|
def open_spiel_game(kernel, game):
|
|
217
241
|
"""Load a game from OpenSpiel.
|
|
@@ -221,3 +245,28 @@ def open_spiel_game(kernel, game):
|
|
|
221
245
|
:return: Game.
|
|
222
246
|
"""
|
|
223
247
|
return _OpenSpielBlackBoxGame(kernel, game)
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
@dataclass
|
|
251
|
+
class StrategyProfile(ABC):
|
|
252
|
+
"""Abstract base class for strategy profiles."""
|
|
253
|
+
kernel: Kernel
|
|
254
|
+
"""Kernel."""
|
|
255
|
+
game: BlackBoxGame
|
|
256
|
+
"""Game."""
|
|
257
|
+
|
|
258
|
+
@abstractmethod
|
|
259
|
+
def __call__(self, node):
|
|
260
|
+
pass
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
@dataclass
|
|
264
|
+
class UniformStrategyProfile(StrategyProfile):
|
|
265
|
+
"""Class for uniform strategy profiles."""
|
|
266
|
+
|
|
267
|
+
def __call__(self, node):
|
|
268
|
+
np = self.kernel.numpy
|
|
269
|
+
dtype = self.kernel.data_type
|
|
270
|
+
n = len(self.game.actions(node))
|
|
271
|
+
|
|
272
|
+
return np.full(n, 1 / n, dtype)
|
|
@@ -178,9 +178,9 @@ def _nfg2efg(ker, game, decision_points='p{}'.format):
|
|
|
178
178
|
payoffs = scipy.sparse.csr_array(payoffs)
|
|
179
179
|
sfps = []
|
|
180
180
|
|
|
181
|
-
for i,
|
|
181
|
+
for i, A in enumerate(game.actions):
|
|
182
182
|
j = decision_points(i)
|
|
183
|
-
sfp = SequenceFormPolytope(ker, {j:
|
|
183
|
+
sfp = SequenceFormPolytope(ker, {j: A}, {j: None})
|
|
184
184
|
|
|
185
185
|
sfps.append(sfp)
|
|
186
186
|
|
|
@@ -198,11 +198,11 @@ def _bbg2efg(ker, game):
|
|
|
198
198
|
raw_payoffs = defaultdict(int)
|
|
199
199
|
|
|
200
200
|
def dfs(h, p, seqs, us):
|
|
201
|
-
|
|
201
|
+
A, h_primes = game.actions_and_children(h)
|
|
202
202
|
i = game.player(h)
|
|
203
203
|
us = us + game.utilities(h)
|
|
204
204
|
|
|
205
|
-
if not
|
|
205
|
+
if not A:
|
|
206
206
|
raw_payoffs[tuple(seqs)] += p * us
|
|
207
207
|
elif i is None:
|
|
208
208
|
p_primes = game.chance_probabilities(h)
|
|
@@ -214,7 +214,7 @@ def _bbg2efg(ker, game):
|
|
|
214
214
|
p_j = seqs[i]
|
|
215
215
|
p_js[i][j] = p_j
|
|
216
216
|
|
|
217
|
-
for a, h_prime in zip(
|
|
217
|
+
for a, h_prime in zip(A, h_primes):
|
|
218
218
|
next_seqs = seqs.copy()
|
|
219
219
|
next_seqs[i] = j, a
|
|
220
220
|
|
|
@@ -24,6 +24,10 @@ from noregret.regret_minimizers.sequence_form_polytopes import (
|
|
|
24
24
|
DiscountedCounterfactualRegretMinimization,
|
|
25
25
|
SequenceFormPolytopeRegretMinimizer,
|
|
26
26
|
)
|
|
27
|
+
from noregret.regret_minimizers.stochastic import (
|
|
28
|
+
MonteCarloCounterfactualRegretMinimization,
|
|
29
|
+
StochasticRegretMinimizer,
|
|
30
|
+
)
|
|
27
31
|
|
|
28
32
|
__all__ = (
|
|
29
33
|
'BlumMansour',
|
|
@@ -36,6 +40,7 @@ __all__ = (
|
|
|
36
40
|
'EuclideanRegularization',
|
|
37
41
|
'FollowTheRegularizedLeader',
|
|
38
42
|
'MirrorDescent',
|
|
43
|
+
'MonteCarloCounterfactualRegretMinimization',
|
|
39
44
|
'MultiplicativeWeightsUpdate',
|
|
40
45
|
'OnlineGradientDescent',
|
|
41
46
|
'ProbabilitySimplexRegretMinimizer',
|
|
@@ -44,5 +49,6 @@ __all__ = (
|
|
|
44
49
|
'RegretMatchingPlus',
|
|
45
50
|
'RegretMinimizer',
|
|
46
51
|
'SequenceFormPolytopeRegretMinimizer',
|
|
52
|
+
'StochasticRegretMinimizer',
|
|
47
53
|
'SwapRegretMinimizer',
|
|
48
54
|
)
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
"""Module for regret minimizers operating over sequence-form polytopes."""
|
|
2
|
+
from abc import ABC
|
|
3
|
+
from collections.abc import Callable
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from noregret.games.black_box import BlackBoxGame
|
|
8
|
+
from noregret.kernels import Kernel
|
|
9
|
+
from noregret.regret_minimizers.probability_simplices import (
|
|
10
|
+
ProbabilitySimplexRegretMinimizer,
|
|
11
|
+
RegretMatching,
|
|
12
|
+
)
|
|
13
|
+
from noregret.utilities import sample
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class StochasticRegretMinimizer(ABC):
|
|
18
|
+
"""Abstract base class for stochastic regret minimizers."""
|
|
19
|
+
kernel: Kernel
|
|
20
|
+
"""Kernel."""
|
|
21
|
+
game: BlackBoxGame
|
|
22
|
+
"""Game."""
|
|
23
|
+
regret_minimizer_type: type[ProbabilitySimplexRegretMinimizer]
|
|
24
|
+
"""Regret minimizer type."""
|
|
25
|
+
reference_strategy_profile: Callable[[Any], Any] | None = None
|
|
26
|
+
"""Reference strategy profile."""
|
|
27
|
+
sample_count: int = field(default=0, init=False)
|
|
28
|
+
"""number of samples."""
|
|
29
|
+
next_sample_count: int = field(default=0, init=False)
|
|
30
|
+
"""Next number of samples."""
|
|
31
|
+
node_visit_count: int = field(default=0, init=False)
|
|
32
|
+
"""Number of node visits."""
|
|
33
|
+
next_node_visit_count: int = field(default=0, init=False)
|
|
34
|
+
"""Next number of node visits."""
|
|
35
|
+
regret_minimizers: dict[str, ProbabilitySimplexRegretMinimizer] = field(
|
|
36
|
+
default_factory=dict,
|
|
37
|
+
init=False,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
def regret_minimizer(self, node):
|
|
41
|
+
"""Return the regret minimizer given an information set.
|
|
42
|
+
|
|
43
|
+
:param node: Node.
|
|
44
|
+
:return: Regret minimizer.
|
|
45
|
+
"""
|
|
46
|
+
j = self.game.information_set(node)
|
|
47
|
+
|
|
48
|
+
if j not in self.regret_minimizers:
|
|
49
|
+
self.regret_minimizers[j] = self.regret_minimizer_type(
|
|
50
|
+
self.kernel,
|
|
51
|
+
len(self.game.actions(node)),
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
return self.regret_minimizers[j]
|
|
55
|
+
|
|
56
|
+
def average_action_probabilities(self, node):
|
|
57
|
+
"""Return the average action probabilities given a node.
|
|
58
|
+
|
|
59
|
+
:param node: Node.
|
|
60
|
+
:return: Average action probabilities.
|
|
61
|
+
"""
|
|
62
|
+
np = self.kernel.numpy
|
|
63
|
+
dtype = self.kernel.data_type
|
|
64
|
+
R = self.regret_minimizer(node)
|
|
65
|
+
ps = R.average_strategy
|
|
66
|
+
|
|
67
|
+
if np.isscalar(ps):
|
|
68
|
+
ps = np.full(R.dimension, 1 / R.dimension, dtype)
|
|
69
|
+
|
|
70
|
+
return ps
|
|
71
|
+
|
|
72
|
+
def _action_probabilities(self, h):
|
|
73
|
+
R = self.regret_minimizer(h)
|
|
74
|
+
ps = R.next_strategy
|
|
75
|
+
|
|
76
|
+
if ps is None:
|
|
77
|
+
ps = R.output()
|
|
78
|
+
|
|
79
|
+
return ps
|
|
80
|
+
|
|
81
|
+
def _external_sampling(self, i, us, h):
|
|
82
|
+
np = self.kernel.numpy
|
|
83
|
+
dtype = self.kernel.data_type
|
|
84
|
+
self.next_node_visit_count += 1
|
|
85
|
+
u = self.game.utility(h, i)
|
|
86
|
+
A = self.game.actions(h)
|
|
87
|
+
|
|
88
|
+
if A:
|
|
89
|
+
i_prime = self.game.player(h)
|
|
90
|
+
|
|
91
|
+
if i_prime is None:
|
|
92
|
+
ps = self.game.chance_probabilities(h)
|
|
93
|
+
else:
|
|
94
|
+
ps = self._action_probabilities(h)
|
|
95
|
+
|
|
96
|
+
if i_prime == i:
|
|
97
|
+
u_primes = []
|
|
98
|
+
|
|
99
|
+
for a in A:
|
|
100
|
+
h_prime = self.game.apply(h, a)
|
|
101
|
+
|
|
102
|
+
u_primes.append(self._external_sampling(i, us, h_prime))
|
|
103
|
+
|
|
104
|
+
j = self.game.information_set(h)
|
|
105
|
+
us[j] = np.array(u_primes, dtype)
|
|
106
|
+
u += us[j] @ ps
|
|
107
|
+
else:
|
|
108
|
+
a = sample(A, ps)
|
|
109
|
+
h_prime = self.game.apply(h, a)
|
|
110
|
+
u += self._external_sampling(i, us, h_prime)
|
|
111
|
+
|
|
112
|
+
return u
|
|
113
|
+
|
|
114
|
+
def _external_sampling2(self, player):
|
|
115
|
+
us = {}
|
|
116
|
+
|
|
117
|
+
self._external_sampling(player, us, self.game.root_node)
|
|
118
|
+
|
|
119
|
+
return us
|
|
120
|
+
|
|
121
|
+
def _outcome_sampling(self, i, us, h, p):
|
|
122
|
+
np = self.kernel.numpy
|
|
123
|
+
dtype = self.kernel.data_type
|
|
124
|
+
self.next_node_visit_count += 1
|
|
125
|
+
u = self.game.utility(h, i) / p
|
|
126
|
+
A = self.game.actions(h)
|
|
127
|
+
|
|
128
|
+
if A:
|
|
129
|
+
i_prime = self.game.player(h)
|
|
130
|
+
|
|
131
|
+
if i_prime is None:
|
|
132
|
+
ps = self.game.chance_probabilities(h)
|
|
133
|
+
elif i_prime == i:
|
|
134
|
+
ps = self.reference_strategy_profile(h)
|
|
135
|
+
else:
|
|
136
|
+
ps = self._action_probabilities(h)
|
|
137
|
+
|
|
138
|
+
k = sample(range(len(A)), ps)
|
|
139
|
+
a = A[k]
|
|
140
|
+
h_prime = self.game.apply(h, a)
|
|
141
|
+
p_prime = ps[k] * p
|
|
142
|
+
u_prime = ps[k] * self._outcome_sampling(i, us, h_prime, p_prime)
|
|
143
|
+
u += u_prime
|
|
144
|
+
|
|
145
|
+
if i_prime == i:
|
|
146
|
+
self.regret_minimizer(h)
|
|
147
|
+
|
|
148
|
+
j = self.game.information_set(h)
|
|
149
|
+
us[j] = np.zeros(len(A), dtype)
|
|
150
|
+
us[j][k] = u_prime
|
|
151
|
+
|
|
152
|
+
return u
|
|
153
|
+
|
|
154
|
+
def _outcome_sampling2(self, player):
|
|
155
|
+
us = {}
|
|
156
|
+
|
|
157
|
+
self._outcome_sampling(player, us, self.game.root_node, 1)
|
|
158
|
+
|
|
159
|
+
return us
|
|
160
|
+
|
|
161
|
+
def sample(self, player):
|
|
162
|
+
"""Sample.
|
|
163
|
+
|
|
164
|
+
:param player: Player.
|
|
165
|
+
:return: Utilities.
|
|
166
|
+
"""
|
|
167
|
+
self.next_sample_count += 1
|
|
168
|
+
|
|
169
|
+
if self.reference_strategy_profile is None:
|
|
170
|
+
us = self._external_sampling2(player)
|
|
171
|
+
else:
|
|
172
|
+
us = self._outcome_sampling2(player)
|
|
173
|
+
|
|
174
|
+
return us
|
|
175
|
+
|
|
176
|
+
def observe(self, utilities):
|
|
177
|
+
"""Observe utilities.
|
|
178
|
+
|
|
179
|
+
:param utilities: Utilities.
|
|
180
|
+
:return: ``None``.
|
|
181
|
+
"""
|
|
182
|
+
self.sample_count = self.next_sample_count
|
|
183
|
+
self.node_visit_count = self.next_node_visit_count
|
|
184
|
+
|
|
185
|
+
for j, u in utilities.items():
|
|
186
|
+
R = self.regret_minimizers[j]
|
|
187
|
+
|
|
188
|
+
if R.next_strategy is None:
|
|
189
|
+
R.output()
|
|
190
|
+
|
|
191
|
+
R.observe(u)
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
@dataclass
|
|
195
|
+
class MonteCarloCounterfactualRegretMinimization(
|
|
196
|
+
StochasticRegretMinimizer,
|
|
197
|
+
ABC,
|
|
198
|
+
):
|
|
199
|
+
"""Class for Monte Carlo counterfactual regret minimization (MCCFR)."""
|
|
200
|
+
regret_minimizer_type: type[ProbabilitySimplexRegretMinimizer] = (
|
|
201
|
+
RegretMatching
|
|
202
|
+
)
|
|
@@ -2,11 +2,13 @@
|
|
|
2
2
|
from noregret.solvers.linear_programming import linear_programming
|
|
3
3
|
from noregret.solvers.regret_minimization import (
|
|
4
4
|
regret_minimization,
|
|
5
|
+
stochastic_regret_minimization,
|
|
5
6
|
symmetric_regret_minimization,
|
|
6
7
|
)
|
|
7
8
|
|
|
8
9
|
__all__ = (
|
|
9
10
|
'linear_programming',
|
|
10
11
|
'regret_minimization',
|
|
12
|
+
'stochastic_regret_minimization',
|
|
11
13
|
'symmetric_regret_minimization',
|
|
12
14
|
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""Module or regret minimization."""
|
|
2
2
|
from collections.abc import Iterable, Mapping
|
|
3
|
-
from itertools import count
|
|
3
|
+
from itertools import count, repeat
|
|
4
4
|
|
|
5
5
|
from tqdm import tqdm
|
|
6
6
|
|
|
@@ -29,10 +29,6 @@ def regret_minimization(
|
|
|
29
29
|
:param progress_bar: Whether to show a progress bar.
|
|
30
30
|
:return: Average strategy profile.
|
|
31
31
|
"""
|
|
32
|
-
np = game.kernel.numpy
|
|
33
|
-
|
|
34
|
-
if len(regret_minimizers) != game.player_count:
|
|
35
|
-
raise ValueError('inconsistent number of regret minimizers')
|
|
36
32
|
|
|
37
33
|
def average_strategy_profile():
|
|
38
34
|
average_strategy_profile = []
|
|
@@ -45,6 +41,11 @@ def regret_minimization(
|
|
|
45
41
|
def exploitability():
|
|
46
42
|
return game.exploitability(*average_strategy_profile())
|
|
47
43
|
|
|
44
|
+
np = game.kernel.numpy
|
|
45
|
+
|
|
46
|
+
if len(regret_minimizers) != game.player_count:
|
|
47
|
+
raise ValueError('inconsistent number of regret minimizers')
|
|
48
|
+
|
|
48
49
|
if iteration_count is None or np.isposinf(iteration_count):
|
|
49
50
|
iterations = count()
|
|
50
51
|
else:
|
|
@@ -57,24 +58,24 @@ def regret_minimization(
|
|
|
57
58
|
elif isinstance(progress_bar, Iterable):
|
|
58
59
|
iterations = tqdm(iterations, *progress_bar)
|
|
59
60
|
|
|
60
|
-
|
|
61
|
+
sigma = []
|
|
61
62
|
|
|
62
63
|
for R in regret_minimizers:
|
|
63
|
-
|
|
64
|
+
sigma.append(R.output(prediction))
|
|
64
65
|
|
|
65
66
|
for t in iterations:
|
|
66
67
|
if alternation:
|
|
67
68
|
for i, R in enumerate(regret_minimizers):
|
|
68
|
-
R.observe(game.utility(i, *
|
|
69
|
+
R.observe(game.utility(i, *sigma[:i], *sigma[i + 1:]))
|
|
69
70
|
|
|
70
|
-
|
|
71
|
+
sigma[i] = R.output(prediction)
|
|
71
72
|
else:
|
|
72
|
-
|
|
73
|
+
us = game.utilities(*sigma)
|
|
73
74
|
|
|
74
|
-
for i, (R, u) in enumerate(zip(regret_minimizers,
|
|
75
|
+
for i, (R, u) in enumerate(zip(regret_minimizers, us)):
|
|
75
76
|
R.observe(u)
|
|
76
77
|
|
|
77
|
-
|
|
78
|
+
sigma[i] = R.output(prediction)
|
|
78
79
|
|
|
79
80
|
if not checkpoints or t in checkpoints:
|
|
80
81
|
if update is not None:
|
|
@@ -116,19 +117,18 @@ def symmetric_regret_minimization(
|
|
|
116
117
|
:param progress_bar: Whether to show a progress bar.
|
|
117
118
|
:return: Average strategy profile.
|
|
118
119
|
"""
|
|
119
|
-
np = game.kernel.numpy
|
|
120
|
-
|
|
121
|
-
if not game.is_symmetric:
|
|
122
|
-
raise ValueError('game is asymmetric')
|
|
123
|
-
|
|
124
|
-
R = regret_minimizer
|
|
125
120
|
|
|
126
121
|
def average_strategy_profile():
|
|
127
|
-
return [
|
|
122
|
+
return [regret_minimizer.average_strategy] * game.player_count
|
|
128
123
|
|
|
129
124
|
def exploitability():
|
|
130
125
|
return game.exploitability(*average_strategy_profile())
|
|
131
126
|
|
|
127
|
+
np = game.kernel.numpy
|
|
128
|
+
|
|
129
|
+
if not game.is_symmetric:
|
|
130
|
+
raise ValueError('game is asymmetric')
|
|
131
|
+
|
|
132
132
|
if iteration_count is None or np.isposinf(iteration_count):
|
|
133
133
|
iterations = count()
|
|
134
134
|
else:
|
|
@@ -141,12 +141,14 @@ def symmetric_regret_minimization(
|
|
|
141
141
|
elif isinstance(progress_bar, Iterable):
|
|
142
142
|
iterations = tqdm(iterations, *progress_bar)
|
|
143
143
|
|
|
144
|
-
|
|
144
|
+
sigma_1 = regret_minimizer.output(prediction)
|
|
145
145
|
|
|
146
146
|
for t in iterations:
|
|
147
|
-
|
|
147
|
+
u = game.utility(0, *repeat(sigma_1, game.player_count - 1))
|
|
148
148
|
|
|
149
|
-
|
|
149
|
+
regret_minimizer.observe(u)
|
|
150
|
+
|
|
151
|
+
sigma_1 = regret_minimizer.output(prediction)
|
|
150
152
|
|
|
151
153
|
if not checkpoints or t in checkpoints:
|
|
152
154
|
if update is not None:
|
|
@@ -164,3 +166,62 @@ def symmetric_regret_minimization(
|
|
|
164
166
|
break
|
|
165
167
|
|
|
166
168
|
return average_strategy_profile()
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def stochastic_regret_minimization(
|
|
172
|
+
game,
|
|
173
|
+
regret_minimizer,
|
|
174
|
+
alternation=False,
|
|
175
|
+
sample_count=1000000,
|
|
176
|
+
checkpoints=(),
|
|
177
|
+
update=None,
|
|
178
|
+
progress_bar=True,
|
|
179
|
+
):
|
|
180
|
+
"""Solve a game using stochastic regret minimization.
|
|
181
|
+
|
|
182
|
+
:param game: Game.
|
|
183
|
+
:param regret_minimizer: Regret minimizer.
|
|
184
|
+
:param alternation: Whether to alternate, defaults to ``True''.
|
|
185
|
+
:param sample_count: Number of samples, defaults to ``1000000''.
|
|
186
|
+
:param checkpoints: Checkpoints.
|
|
187
|
+
:param update: Update.
|
|
188
|
+
:param progress_bar: Whether to show a progress bar.
|
|
189
|
+
:return: Average action probabilities.
|
|
190
|
+
"""
|
|
191
|
+
np = game.kernel.numpy
|
|
192
|
+
|
|
193
|
+
if sample_count is None or np.isposinf(sample_count):
|
|
194
|
+
samples = count()
|
|
195
|
+
else:
|
|
196
|
+
samples = range(sample_count)
|
|
197
|
+
|
|
198
|
+
if progress_bar is True:
|
|
199
|
+
samples = tqdm(samples)
|
|
200
|
+
elif isinstance(progress_bar, Mapping):
|
|
201
|
+
samples = tqdm(samples, **progress_bar)
|
|
202
|
+
elif isinstance(progress_bar, Iterable):
|
|
203
|
+
samples = tqdm(samples, *progress_bar)
|
|
204
|
+
|
|
205
|
+
for s in samples:
|
|
206
|
+
if alternation:
|
|
207
|
+
for i in range(game.player_count):
|
|
208
|
+
regret_minimizer.observe(regret_minimizer.sample(i))
|
|
209
|
+
else:
|
|
210
|
+
uss = []
|
|
211
|
+
|
|
212
|
+
for i in range(game.player_count):
|
|
213
|
+
uss.append(regret_minimizer.sample(i))
|
|
214
|
+
|
|
215
|
+
for us in uss:
|
|
216
|
+
regret_minimizer.observe(us)
|
|
217
|
+
|
|
218
|
+
if not checkpoints or s in checkpoints:
|
|
219
|
+
if update is not None:
|
|
220
|
+
status = update()
|
|
221
|
+
else:
|
|
222
|
+
status = False
|
|
223
|
+
|
|
224
|
+
if status:
|
|
225
|
+
break
|
|
226
|
+
|
|
227
|
+
return regret_minimizer.average_action_probabilities
|
|
@@ -197,6 +197,19 @@ class BlackBoxGameTestCase(TestCase):
|
|
|
197
197
|
|
|
198
198
|
np.testing.assert_equal(ps, ps2)
|
|
199
199
|
|
|
200
|
+
def test_exploitability(self):
|
|
201
|
+
for game in self.GAMES:
|
|
202
|
+
sigma = nr.UniformStrategyProfile(self.KER, game)
|
|
203
|
+
epsilon = game.exploitability(sigma)
|
|
204
|
+
|
|
205
|
+
game = nr.to_efg(self.KER, game)
|
|
206
|
+
sfps = game.sequence_form_polytopes
|
|
207
|
+
bs = [sfp.behavioral_form_uniform_strategy for sfp in sfps]
|
|
208
|
+
sigma = [sfp.to_sequence_form(b) for sfp, b in zip(sfps, bs)]
|
|
209
|
+
epsilon2 = game.exploitability(*sigma)
|
|
210
|
+
|
|
211
|
+
self.assertAlmostEqual(epsilon, epsilon2)
|
|
212
|
+
|
|
200
213
|
|
|
201
214
|
if __name__ == '__main__':
|
|
202
215
|
main() # pragma: no cover
|
|
@@ -23,12 +23,12 @@ class LinearProgrammingTestCase(TestCase):
|
|
|
23
23
|
|
|
24
24
|
for game, value in self.GAME_VALUES:
|
|
25
25
|
x, y = nr.lp(game)
|
|
26
|
-
|
|
26
|
+
epsilon = game.exploitability(x, y)
|
|
27
27
|
v = game.expected_row_utility(x, y)
|
|
28
28
|
|
|
29
|
-
self.assertAlmostEqual(
|
|
29
|
+
self.assertAlmostEqual(epsilon, 0)
|
|
30
30
|
self.assertAlmostEqual(v, value)
|
|
31
|
-
self.assertEqual(
|
|
31
|
+
self.assertEqual(epsilon.dtype, dtype)
|
|
32
32
|
self.assertEqual(v.dtype, dtype)
|
|
33
33
|
|
|
34
34
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from functools import partial
|
|
2
2
|
from math import inf
|
|
3
|
+
from random import seed
|
|
3
4
|
from unittest import main, TestCase
|
|
4
5
|
|
|
5
6
|
import noregret as nr
|
|
@@ -44,12 +45,12 @@ class ProbabilitySimplexRegretMinimizationTestCase(TestCase):
|
|
|
44
45
|
target_exploitability=self.TARGET_EXPLOITABILITY,
|
|
45
46
|
progress_bar=False,
|
|
46
47
|
)
|
|
47
|
-
|
|
48
|
+
epsilon = game.exploitability(x_bar, y_bar)
|
|
48
49
|
v = game.expected_row_utility(x_bar, y_bar)
|
|
49
50
|
|
|
50
|
-
self.assertLess(
|
|
51
|
+
self.assertLess(epsilon, self.TARGET_EXPLOITABILITY)
|
|
51
52
|
self.assertAlmostEqual(v, value, delta=self.DELTA)
|
|
52
|
-
self.assertEqual(
|
|
53
|
+
self.assertEqual(epsilon.dtype, dtype)
|
|
53
54
|
self.assertEqual(v.dtype, dtype)
|
|
54
55
|
|
|
55
56
|
def test_last_iterate_convergence(self):
|
|
@@ -70,12 +71,12 @@ class ProbabilitySimplexRegretMinimizationTestCase(TestCase):
|
|
|
70
71
|
target_exploitability=self.TARGET_EXPLOITABILITY,
|
|
71
72
|
progress_bar=False,
|
|
72
73
|
)
|
|
73
|
-
|
|
74
|
+
epsilon = game.exploitability(x_bar, y_bar)
|
|
74
75
|
v = game.expected_row_utility(x_bar, y_bar)
|
|
75
76
|
|
|
76
|
-
self.assertLess(
|
|
77
|
+
self.assertLess(epsilon, self.TARGET_EXPLOITABILITY)
|
|
77
78
|
self.assertAlmostEqual(v, value, delta=self.DELTA)
|
|
78
|
-
self.assertEqual(
|
|
79
|
+
self.assertEqual(epsilon.dtype, dtype)
|
|
79
80
|
self.assertEqual(v.dtype, dtype)
|
|
80
81
|
|
|
81
82
|
def test_frequent_iterate_convergence(self):
|
|
@@ -94,12 +95,12 @@ class ProbabilitySimplexRegretMinimizationTestCase(TestCase):
|
|
|
94
95
|
target_exploitability=self.TARGET_EXPLOITABILITY,
|
|
95
96
|
progress_bar=False,
|
|
96
97
|
)
|
|
97
|
-
|
|
98
|
+
epsilon = game.exploitability(x_bar, y_bar)
|
|
98
99
|
v = game.expected_row_utility(x_bar, y_bar)
|
|
99
100
|
|
|
100
|
-
self.assertLess(
|
|
101
|
+
self.assertLess(epsilon, self.TARGET_EXPLOITABILITY)
|
|
101
102
|
self.assertAlmostEqual(v, value, delta=self.DELTA)
|
|
102
|
-
self.assertEqual(
|
|
103
|
+
self.assertEqual(epsilon.dtype, dtype)
|
|
103
104
|
self.assertEqual(v.dtype, dtype)
|
|
104
105
|
|
|
105
106
|
|
|
@@ -141,12 +142,12 @@ class SequenceFormPolytopeRegretMinimizationTestCase(TestCase):
|
|
|
141
142
|
target_exploitability=self.TARGET_EXPLOITABILITY,
|
|
142
143
|
progress_bar=False,
|
|
143
144
|
)
|
|
144
|
-
|
|
145
|
+
epsilon = game.exploitability(x_bar, y_bar)
|
|
145
146
|
v = game.expected_row_utility(x_bar, y_bar)
|
|
146
147
|
|
|
147
|
-
self.assertLess(
|
|
148
|
+
self.assertLess(epsilon, self.TARGET_EXPLOITABILITY)
|
|
148
149
|
self.assertAlmostEqual(v, value, delta=self.DELTA)
|
|
149
|
-
self.assertEqual(
|
|
150
|
+
self.assertEqual(epsilon.dtype, dtype)
|
|
150
151
|
self.assertEqual(v.dtype, dtype)
|
|
151
152
|
|
|
152
153
|
|
|
@@ -172,7 +173,7 @@ class SequenceFormPolytopeRegretMinimization2TestCase(TestCase):
|
|
|
172
173
|
nr.CFR(self.KER, game.column_sequence_form_polytope),
|
|
173
174
|
progress_bar=False,
|
|
174
175
|
)
|
|
175
|
-
|
|
176
|
+
epsilon = game.exploitability(x_bar, y_bar)
|
|
176
177
|
v = game.expected_row_utility(x_bar, y_bar)
|
|
177
178
|
x_bar2, y_bar2 = nr.rm(
|
|
178
179
|
game,
|
|
@@ -180,10 +181,10 @@ class SequenceFormPolytopeRegretMinimization2TestCase(TestCase):
|
|
|
180
181
|
nr.CFR2(self.KER, game.column_sequence_form_polytope),
|
|
181
182
|
progress_bar=False,
|
|
182
183
|
)
|
|
183
|
-
|
|
184
|
+
epsilon2 = game.exploitability(x_bar2, y_bar2)
|
|
184
185
|
v2 = game.expected_row_utility(x_bar2, y_bar2)
|
|
185
186
|
|
|
186
|
-
self.assertAlmostEqual(
|
|
187
|
+
self.assertAlmostEqual(epsilon, epsilon2, self.PLACES)
|
|
187
188
|
self.assertAlmostEqual(v, v2, self.PLACES)
|
|
188
189
|
|
|
189
190
|
x_bar, y_bar = nr.rm(
|
|
@@ -193,7 +194,7 @@ class SequenceFormPolytopeRegretMinimization2TestCase(TestCase):
|
|
|
193
194
|
prediction=True,
|
|
194
195
|
progress_bar=False,
|
|
195
196
|
)
|
|
196
|
-
|
|
197
|
+
epsilon = game.exploitability(x_bar, y_bar)
|
|
197
198
|
v = game.expected_row_utility(x_bar, y_bar)
|
|
198
199
|
x_bar2, y_bar2 = nr.rm(
|
|
199
200
|
game,
|
|
@@ -202,12 +203,61 @@ class SequenceFormPolytopeRegretMinimization2TestCase(TestCase):
|
|
|
202
203
|
prediction=True,
|
|
203
204
|
progress_bar=False,
|
|
204
205
|
)
|
|
205
|
-
|
|
206
|
+
epsilon2 = game.exploitability(x_bar2, y_bar2)
|
|
206
207
|
v2 = game.expected_row_utility(x_bar2, y_bar2)
|
|
207
208
|
|
|
208
|
-
self.assertAlmostEqual(
|
|
209
|
+
self.assertAlmostEqual(epsilon, epsilon2, self.PLACES)
|
|
209
210
|
self.assertAlmostEqual(v, v2, self.PLACES)
|
|
210
211
|
|
|
211
212
|
|
|
213
|
+
class StochasticRegretMinimizationTestCase(TestCase):
|
|
214
|
+
KER = nr.FPKer()
|
|
215
|
+
GAME = nr.open_spiel_game(KER, 'kuhn_poker')
|
|
216
|
+
SAMPLE_COUNT = 100000
|
|
217
|
+
TARGET_EXPLOITABILITY = 1e-1
|
|
218
|
+
SEED = 42
|
|
219
|
+
|
|
220
|
+
def test_external_sampling(self):
|
|
221
|
+
assert self.GAME.is_two_player and self.GAME.is_zero_sum
|
|
222
|
+
|
|
223
|
+
seed(self.SEED)
|
|
224
|
+
|
|
225
|
+
R = nr.MCCFR(self.KER, self.GAME)
|
|
226
|
+
sigma = nr.stochastic_rm(
|
|
227
|
+
self.GAME,
|
|
228
|
+
R,
|
|
229
|
+
alternation=True,
|
|
230
|
+
sample_count=self.SAMPLE_COUNT,
|
|
231
|
+
progress_bar=False,
|
|
232
|
+
)
|
|
233
|
+
epsilon = self.GAME.exploitability(sigma)
|
|
234
|
+
|
|
235
|
+
self.assertLess(epsilon, self.TARGET_EXPLOITABILITY)
|
|
236
|
+
|
|
237
|
+
def test_outcome_sampling(self):
|
|
238
|
+
assert self.GAME.is_two_player and self.GAME.is_zero_sum
|
|
239
|
+
|
|
240
|
+
seed(self.SEED)
|
|
241
|
+
|
|
242
|
+
R = nr.MCCFR(
|
|
243
|
+
self.KER,
|
|
244
|
+
self.GAME,
|
|
245
|
+
reference_strategy_profile=nr.UniformStrategyProfile(
|
|
246
|
+
self.KER,
|
|
247
|
+
self.GAME,
|
|
248
|
+
),
|
|
249
|
+
)
|
|
250
|
+
sigma = nr.stochastic_rm(
|
|
251
|
+
self.GAME,
|
|
252
|
+
R,
|
|
253
|
+
alternation=True,
|
|
254
|
+
sample_count=self.SAMPLE_COUNT,
|
|
255
|
+
progress_bar=False,
|
|
256
|
+
)
|
|
257
|
+
epsilon = self.GAME.exploitability(sigma)
|
|
258
|
+
|
|
259
|
+
self.assertLess(epsilon, self.TARGET_EXPLOITABILITY)
|
|
260
|
+
|
|
261
|
+
|
|
212
262
|
if __name__ == '__main__':
|
|
213
263
|
main() # pragma: no cover
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Module for utilities."""
|
|
2
2
|
from importlib import import_module
|
|
3
|
+
from random import choices
|
|
3
4
|
|
|
4
5
|
|
|
5
6
|
def import_object(object_path):
|
|
@@ -34,3 +35,16 @@ def tuple_or_none(values):
|
|
|
34
35
|
:return: Tuple or ``None``.
|
|
35
36
|
"""
|
|
36
37
|
return None if values is None else tuple(values)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def sample(values, probabilities):
|
|
41
|
+
"""Sample a random value as per the probabilities.
|
|
42
|
+
|
|
43
|
+
>>> sample(range(5), [0, 0, 1, 0, 0])
|
|
44
|
+
2
|
|
45
|
+
|
|
46
|
+
:param values: Values to be sampled from.
|
|
47
|
+
:param probabilities: The probabilities of sampling each value.
|
|
48
|
+
:return: The sampled value.
|
|
49
|
+
"""
|
|
50
|
+
return choices(values, probabilities)[0]
|
|
@@ -31,6 +31,7 @@ noregret/regret_minimizers/__init__.py
|
|
|
31
31
|
noregret/regret_minimizers/probability_simplices.py
|
|
32
32
|
noregret/regret_minimizers/regret_minimizers.py
|
|
33
33
|
noregret/regret_minimizers/sequence_form_polytopes.py
|
|
34
|
+
noregret/regret_minimizers/stochastic.py
|
|
34
35
|
noregret/solvers/__init__.py
|
|
35
36
|
noregret/solvers/linear_programming.py
|
|
36
37
|
noregret/solvers/regret_minimization.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/examples/battle-of-the-sexes.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/examples/rock-paper-scissors-plus.json
RENAMED
|
File without changes
|
{noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/examples/rock-paper-scissors.json
RENAMED
|
File without changes
|
{noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/examples/rock-paper-superscissors.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/regret_minimizers/probability_simplices.py
RENAMED
|
File without changes
|
|
File without changes
|
{noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/regret_minimizers/sequence_form_polytopes.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|