noregret 0.0.0.dev8__tar.gz → 0.0.0.dev9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/PKG-INFO +1 -1
  2. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/__init__.py +20 -1
  3. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/__init__.py +8 -1
  4. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/black_box.py +58 -9
  5. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/extensive_form.py +5 -5
  6. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/regret_minimizers/__init__.py +6 -0
  7. noregret-0.0.0.dev9/noregret/regret_minimizers/stochastic.py +202 -0
  8. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/solvers/__init__.py +2 -0
  9. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/solvers/regret_minimization.py +83 -22
  10. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/tests/test_games.py +13 -0
  11. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/tests/test_linear_programming.py +3 -3
  12. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/tests/test_regret_minimization.py +68 -18
  13. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/utilities.py +14 -0
  14. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret.egg-info/PKG-INFO +1 -1
  15. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret.egg-info/SOURCES.txt +1 -0
  16. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/setup.py +1 -1
  17. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/LICENSE +0 -0
  18. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/README.rst +0 -0
  19. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/examples/assurance-game.json +0 -0
  20. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/examples/battle-of-the-sexes.json +0 -0
  21. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/examples/chicken.json +0 -0
  22. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/examples/gift-exchange-game.json +0 -0
  23. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/examples/matching-pennies.json +0 -0
  24. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/examples/prisoners-dilemma.json +0 -0
  25. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/examples/pure-coordination.json +0 -0
  26. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/examples/rock-paper-scissors-plus.json +0 -0
  27. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/examples/rock-paper-scissors.json +0 -0
  28. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/examples/rock-paper-superscissors.json +0 -0
  29. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/examples/stag-hunt.json +0 -0
  30. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/games.py +0 -0
  31. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/multilinear.py +0 -0
  32. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/normal_form.py +0 -0
  33. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/kernels.py +0 -0
  34. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/regret_minimizers/probability_simplices.py +0 -0
  35. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/regret_minimizers/regret_minimizers.py +0 -0
  36. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/regret_minimizers/sequence_form_polytopes.py +0 -0
  37. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/sequence_form_polytopes.py +0 -0
  38. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/solvers/linear_programming.py +0 -0
  39. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/tests/__init__.py +0 -0
  40. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/tests/test_sequence_form_polytopes.py +0 -0
  41. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret.egg-info/dependency_links.txt +0 -0
  42. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret.egg-info/requires.txt +0 -0
  43. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret.egg-info/top_level.txt +0 -0
  44. {noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: noregret
3
- Version: 0.0.0.dev8
3
+ Version: 0.0.0.dev9
4
4
  Summary: No-regret learning dynamics
5
5
  Home-page: https://github.com/uoftcprg/noregret
6
6
  Author: Universal, Open, Free, and Transparent Computer Poker Research Group
@@ -18,6 +18,7 @@ from noregret.games import (
18
18
  RockPaperScissorsPlus,
19
19
  RockPaperSuperscissors,
20
20
  StagHunt,
21
+ StrategyProfile,
21
22
  to_extensive_form_game,
22
23
  TwoPlayerExtensiveFormGame,
23
24
  TwoPlayerGame,
@@ -27,6 +28,7 @@ from noregret.games import (
27
28
  TwoPlayerZeroSumGame,
28
29
  TwoPlayerZeroSumMultilinearGame,
29
30
  TwoPlayerZeroSumNormalFormGame,
31
+ UniformStrategyProfile,
30
32
  )
31
33
  from noregret.kernels import (
32
34
  CUDAKernel,
@@ -46,6 +48,7 @@ from noregret.regret_minimizers import (
46
48
  EuclideanRegularization,
47
49
  FollowTheRegularizedLeader,
48
50
  MirrorDescent,
51
+ MonteCarloCounterfactualRegretMinimization,
49
52
  MultiplicativeWeightsUpdate,
50
53
  OnlineGradientDescent,
51
54
  ProbabilitySimplexRegretMinimizer,
@@ -54,15 +57,17 @@ from noregret.regret_minimizers import (
54
57
  RegretMatchingPlus,
55
58
  RegretMinimizer,
56
59
  SequenceFormPolytopeRegretMinimizer,
60
+ StochasticRegretMinimizer,
57
61
  SwapRegretMinimizer,
58
62
  )
59
63
  from noregret.sequence_form_polytopes import SequenceFormPolytope
60
64
  from noregret.solvers import (
61
65
  linear_programming,
62
66
  regret_minimization,
67
+ stochastic_regret_minimization,
63
68
  symmetric_regret_minimization,
64
69
  )
65
- from noregret.utilities import import_object, tuple_or_none
70
+ from noregret.utilities import import_object, sample, tuple_or_none
66
71
 
67
72
  BM = BlumMansour
68
73
  """Alias for :class:`noregret.BlumMansour`."""
@@ -94,6 +99,10 @@ FTRL = FollowTheRegularizedLeader
94
99
  """Alias for :class:`noregret.FollowTheRegularizedLeader`."""
95
100
  lp = linear_programming
96
101
  """Alias for :func:`noregret.linear_programming`."""
102
+ MCCFR = MonteCarloCounterfactualRegretMinimization
103
+ """Alias for
104
+ :class:`noregret.MonteCarloCounterfactualRegretMinimization`.
105
+ """
97
106
  MD = MirrorDescent
98
107
  """Alias for :class:`noregret.MirrorDescent`."""
99
108
  MWU = MultiplicativeWeightsUpdate
@@ -112,6 +121,8 @@ RM = RegretMatching
112
121
  """Alias for :class:`noregret.RegretMatching`."""
113
122
  rm = regret_minimization
114
123
  """Alias for :func:`noregret.regret_minimization`."""
124
+ stochastic_rm = stochastic_regret_minimization
125
+ """Alias for :func:`noregret.stochastic_regret_minimization`."""
115
126
  symmetric_rm = symmetric_regret_minimization
116
127
  """Alias for :func:`noregret.symmetric_regret_minimization`."""
117
128
  to_efg = to_extensive_form_game
@@ -155,8 +166,10 @@ __all__ = (
155
166
  'lp',
156
167
  'MatchingPennies',
157
168
  'matrix_game',
169
+ 'MCCFR',
158
170
  'MD',
159
171
  'MirrorDescent',
172
+ 'MonteCarloCounterfactualRegretMinimization',
160
173
  'MultilinearGame',
161
174
  'MultiplicativeWeightsUpdate',
162
175
  'MWU',
@@ -181,10 +194,15 @@ __all__ = (
181
194
  'RockPaperScissors',
182
195
  'RockPaperScissorsPlus',
183
196
  'RockPaperSuperscissors',
197
+ 'sample',
184
198
  'SequenceFormPolytope',
185
199
  'SequenceFormPolytopeRegretMinimizer',
186
200
  'Serializable',
187
201
  'StagHunt',
202
+ 'stochastic_regret_minimization',
203
+ 'StochasticRegretMinimizer',
204
+ 'stochastic_rm',
205
+ 'StrategyProfile',
188
206
  'SwapRegretMinimizer',
189
207
  'symmetric_regret_minimization',
190
208
  'symmetric_rm',
@@ -199,4 +217,5 @@ __all__ = (
199
217
  'TwoPlayerZeroSumGame',
200
218
  'TwoPlayerZeroSumMultilinearGame',
201
219
  'TwoPlayerZeroSumNormalFormGame',
220
+ 'UniformStrategyProfile',
202
221
  )
@@ -1,5 +1,10 @@
1
1
  """Module for games."""
2
- from noregret.games.black_box import BlackBoxGame, open_spiel_game
2
+ from noregret.games.black_box import (
3
+ BlackBoxGame,
4
+ open_spiel_game,
5
+ StrategyProfile,
6
+ UniformStrategyProfile,
7
+ )
3
8
  from noregret.games.extensive_form import (
4
9
  ExtensiveFormGame,
5
10
  to_extensive_form_game,
@@ -49,6 +54,7 @@ __all__ = (
49
54
  'RockPaperScissorsPlus',
50
55
  'RockPaperSuperscissors',
51
56
  'StagHunt',
57
+ 'StrategyProfile',
52
58
  'to_extensive_form_game',
53
59
  'TwoPlayerExtensiveFormGame',
54
60
  'TwoPlayerGame',
@@ -58,4 +64,5 @@ __all__ = (
58
64
  'TwoPlayerZeroSumGame',
59
65
  'TwoPlayerZeroSumMultilinearGame',
60
66
  'TwoPlayerZeroSumNormalFormGame',
67
+ 'UniformStrategyProfile',
61
68
  )
@@ -4,7 +4,7 @@ from dataclasses import dataclass, field
4
4
  from functools import partial
5
5
 
6
6
  from ordered_set import OrderedSet
7
- from pyspiel import GameType, load_game
7
+ from pyspiel import exploitability, GameType, load_game
8
8
 
9
9
  from noregret.kernels import Kernel
10
10
 
@@ -139,6 +139,12 @@ class BlackBoxGame(ABC):
139
139
 
140
140
  return np.array(ps, dtype)
141
141
 
142
+ def exploitability(self, strategy_profile):
143
+ if not self.is_two_player or not self.is_zero_sum:
144
+ raise ValueError('not 2p0s')
145
+
146
+ raise NotImplementedError
147
+
142
148
 
143
149
  @dataclass
144
150
  class _OpenSpielBlackBoxGame(BlackBoxGame):
@@ -167,17 +173,14 @@ class _OpenSpielBlackBoxGame(BlackBoxGame):
167
173
  return node.child(node.string_to_action(action))
168
174
 
169
175
  def children(self, node):
170
- return list(node.child(a) for a in node.legal_actions())
176
+ return list(map(node.child, node.legal_actions()))
171
177
 
172
178
  def actions_and_children(self, node):
173
- actions = []
174
- children = []
175
-
176
- for a in node.legal_actions():
177
- actions.append(node.action_to_string(a))
178
- children.append(node.child(a))
179
+ A = node.legal_actions()
180
+ actions = OrderedSet(map(node.action_to_string, A))
181
+ children = list(map(node.child, A))
179
182
 
180
- return OrderedSet(actions), children
183
+ return actions, children
181
184
 
182
185
  def player(self, node):
183
186
  i = node.current_player()
@@ -212,6 +215,27 @@ class _OpenSpielBlackBoxGame(BlackBoxGame):
212
215
 
213
216
  return np.array([p for _, p in node.chance_outcomes()], dtype)
214
217
 
218
+ def _sigma(self, strategy_profile, h, sigma):
219
+ A = h.legal_actions()
220
+ h_primes = list(map(h.child, A))
221
+ i = self.player(h)
222
+
223
+ if A and i is not None and (j := self.information_set(h)) not in sigma:
224
+ sigma[j] = list(zip(A, strategy_profile(h).tolist()))
225
+
226
+ for h_prime in h_primes:
227
+ self._sigma(strategy_profile, h_prime, sigma)
228
+
229
+ def _sigma2(self, strategy_profile):
230
+ sigma = {}
231
+
232
+ self._sigma(strategy_profile, self.root_node, sigma)
233
+
234
+ return sigma
235
+
236
+ def exploitability(self, strategy_profile):
237
+ return exploitability(self._game, self._sigma2(strategy_profile))
238
+
215
239
 
216
240
  def open_spiel_game(kernel, game):
217
241
  """Load a game from OpenSpiel.
@@ -221,3 +245,28 @@ def open_spiel_game(kernel, game):
221
245
  :return: Game.
222
246
  """
223
247
  return _OpenSpielBlackBoxGame(kernel, game)
248
+
249
+
250
+ @dataclass
251
+ class StrategyProfile(ABC):
252
+ """Abstract base class for strategy profiles."""
253
+ kernel: Kernel
254
+ """Kernel."""
255
+ game: BlackBoxGame
256
+ """Game."""
257
+
258
+ @abstractmethod
259
+ def __call__(self, node):
260
+ pass
261
+
262
+
263
+ @dataclass
264
+ class UniformStrategyProfile(StrategyProfile):
265
+ """Class for uniform strategy profiles."""
266
+
267
+ def __call__(self, node):
268
+ np = self.kernel.numpy
269
+ dtype = self.kernel.data_type
270
+ n = len(self.game.actions(node))
271
+
272
+ return np.full(n, 1 / n, dtype)
@@ -178,9 +178,9 @@ def _nfg2efg(ker, game, decision_points='p{}'.format):
178
178
  payoffs = scipy.sparse.csr_array(payoffs)
179
179
  sfps = []
180
180
 
181
- for i, A_j in enumerate(game.actions):
181
+ for i, A in enumerate(game.actions):
182
182
  j = decision_points(i)
183
- sfp = SequenceFormPolytope(ker, {j: A_j}, {j: None})
183
+ sfp = SequenceFormPolytope(ker, {j: A}, {j: None})
184
184
 
185
185
  sfps.append(sfp)
186
186
 
@@ -198,11 +198,11 @@ def _bbg2efg(ker, game):
198
198
  raw_payoffs = defaultdict(int)
199
199
 
200
200
  def dfs(h, p, seqs, us):
201
- A_j, h_primes = game.actions_and_children(h)
201
+ A, h_primes = game.actions_and_children(h)
202
202
  i = game.player(h)
203
203
  us = us + game.utilities(h)
204
204
 
205
- if not A_j:
205
+ if not A:
206
206
  raw_payoffs[tuple(seqs)] += p * us
207
207
  elif i is None:
208
208
  p_primes = game.chance_probabilities(h)
@@ -214,7 +214,7 @@ def _bbg2efg(ker, game):
214
214
  p_j = seqs[i]
215
215
  p_js[i][j] = p_j
216
216
 
217
- for a, h_prime in zip(A_j, h_primes):
217
+ for a, h_prime in zip(A, h_primes):
218
218
  next_seqs = seqs.copy()
219
219
  next_seqs[i] = j, a
220
220
 
@@ -24,6 +24,10 @@ from noregret.regret_minimizers.sequence_form_polytopes import (
24
24
  DiscountedCounterfactualRegretMinimization,
25
25
  SequenceFormPolytopeRegretMinimizer,
26
26
  )
27
+ from noregret.regret_minimizers.stochastic import (
28
+ MonteCarloCounterfactualRegretMinimization,
29
+ StochasticRegretMinimizer,
30
+ )
27
31
 
28
32
  __all__ = (
29
33
  'BlumMansour',
@@ -36,6 +40,7 @@ __all__ = (
36
40
  'EuclideanRegularization',
37
41
  'FollowTheRegularizedLeader',
38
42
  'MirrorDescent',
43
+ 'MonteCarloCounterfactualRegretMinimization',
39
44
  'MultiplicativeWeightsUpdate',
40
45
  'OnlineGradientDescent',
41
46
  'ProbabilitySimplexRegretMinimizer',
@@ -44,5 +49,6 @@ __all__ = (
44
49
  'RegretMatchingPlus',
45
50
  'RegretMinimizer',
46
51
  'SequenceFormPolytopeRegretMinimizer',
52
+ 'StochasticRegretMinimizer',
47
53
  'SwapRegretMinimizer',
48
54
  )
@@ -0,0 +1,202 @@
1
+ """Module for regret minimizers operating over sequence-form polytopes."""
2
+ from abc import ABC
3
+ from collections.abc import Callable
4
+ from dataclasses import dataclass, field
5
+ from typing import Any
6
+
7
+ from noregret.games.black_box import BlackBoxGame
8
+ from noregret.kernels import Kernel
9
+ from noregret.regret_minimizers.probability_simplices import (
10
+ ProbabilitySimplexRegretMinimizer,
11
+ RegretMatching,
12
+ )
13
+ from noregret.utilities import sample
14
+
15
+
16
+ @dataclass
17
+ class StochasticRegretMinimizer(ABC):
18
+ """Abstract base class for stochastic regret minimizers."""
19
+ kernel: Kernel
20
+ """Kernel."""
21
+ game: BlackBoxGame
22
+ """Game."""
23
+ regret_minimizer_type: type[ProbabilitySimplexRegretMinimizer]
24
+ """Regret minimizer type."""
25
+ reference_strategy_profile: Callable[[Any], Any] | None = None
26
+ """Reference strategy profile."""
27
+ sample_count: int = field(default=0, init=False)
28
+ """number of samples."""
29
+ next_sample_count: int = field(default=0, init=False)
30
+ """Next number of samples."""
31
+ node_visit_count: int = field(default=0, init=False)
32
+ """Number of node visits."""
33
+ next_node_visit_count: int = field(default=0, init=False)
34
+ """Next number of node visits."""
35
+ regret_minimizers: dict[str, ProbabilitySimplexRegretMinimizer] = field(
36
+ default_factory=dict,
37
+ init=False,
38
+ )
39
+
40
+ def regret_minimizer(self, node):
41
+ """Return the regret minimizer given an information set.
42
+
43
+ :param node: Node.
44
+ :return: Regret minimizer.
45
+ """
46
+ j = self.game.information_set(node)
47
+
48
+ if j not in self.regret_minimizers:
49
+ self.regret_minimizers[j] = self.regret_minimizer_type(
50
+ self.kernel,
51
+ len(self.game.actions(node)),
52
+ )
53
+
54
+ return self.regret_minimizers[j]
55
+
56
+ def average_action_probabilities(self, node):
57
+ """Return the average action probabilities given a node.
58
+
59
+ :param node: Node.
60
+ :return: Average action probabilities.
61
+ """
62
+ np = self.kernel.numpy
63
+ dtype = self.kernel.data_type
64
+ R = self.regret_minimizer(node)
65
+ ps = R.average_strategy
66
+
67
+ if np.isscalar(ps):
68
+ ps = np.full(R.dimension, 1 / R.dimension, dtype)
69
+
70
+ return ps
71
+
72
+ def _action_probabilities(self, h):
73
+ R = self.regret_minimizer(h)
74
+ ps = R.next_strategy
75
+
76
+ if ps is None:
77
+ ps = R.output()
78
+
79
+ return ps
80
+
81
+ def _external_sampling(self, i, us, h):
82
+ np = self.kernel.numpy
83
+ dtype = self.kernel.data_type
84
+ self.next_node_visit_count += 1
85
+ u = self.game.utility(h, i)
86
+ A = self.game.actions(h)
87
+
88
+ if A:
89
+ i_prime = self.game.player(h)
90
+
91
+ if i_prime is None:
92
+ ps = self.game.chance_probabilities(h)
93
+ else:
94
+ ps = self._action_probabilities(h)
95
+
96
+ if i_prime == i:
97
+ u_primes = []
98
+
99
+ for a in A:
100
+ h_prime = self.game.apply(h, a)
101
+
102
+ u_primes.append(self._external_sampling(i, us, h_prime))
103
+
104
+ j = self.game.information_set(h)
105
+ us[j] = np.array(u_primes, dtype)
106
+ u += us[j] @ ps
107
+ else:
108
+ a = sample(A, ps)
109
+ h_prime = self.game.apply(h, a)
110
+ u += self._external_sampling(i, us, h_prime)
111
+
112
+ return u
113
+
114
+ def _external_sampling2(self, player):
115
+ us = {}
116
+
117
+ self._external_sampling(player, us, self.game.root_node)
118
+
119
+ return us
120
+
121
+ def _outcome_sampling(self, i, us, h, p):
122
+ np = self.kernel.numpy
123
+ dtype = self.kernel.data_type
124
+ self.next_node_visit_count += 1
125
+ u = self.game.utility(h, i) / p
126
+ A = self.game.actions(h)
127
+
128
+ if A:
129
+ i_prime = self.game.player(h)
130
+
131
+ if i_prime is None:
132
+ ps = self.game.chance_probabilities(h)
133
+ elif i_prime == i:
134
+ ps = self.reference_strategy_profile(h)
135
+ else:
136
+ ps = self._action_probabilities(h)
137
+
138
+ k = sample(range(len(A)), ps)
139
+ a = A[k]
140
+ h_prime = self.game.apply(h, a)
141
+ p_prime = ps[k] * p
142
+ u_prime = ps[k] * self._outcome_sampling(i, us, h_prime, p_prime)
143
+ u += u_prime
144
+
145
+ if i_prime == i:
146
+ self.regret_minimizer(h)
147
+
148
+ j = self.game.information_set(h)
149
+ us[j] = np.zeros(len(A), dtype)
150
+ us[j][k] = u_prime
151
+
152
+ return u
153
+
154
+ def _outcome_sampling2(self, player):
155
+ us = {}
156
+
157
+ self._outcome_sampling(player, us, self.game.root_node, 1)
158
+
159
+ return us
160
+
161
+ def sample(self, player):
162
+ """Sample.
163
+
164
+ :param player: Player.
165
+ :return: Utilities.
166
+ """
167
+ self.next_sample_count += 1
168
+
169
+ if self.reference_strategy_profile is None:
170
+ us = self._external_sampling2(player)
171
+ else:
172
+ us = self._outcome_sampling2(player)
173
+
174
+ return us
175
+
176
+ def observe(self, utilities):
177
+ """Observe utilities.
178
+
179
+ :param utilities: Utilities.
180
+ :return: ``None``.
181
+ """
182
+ self.sample_count = self.next_sample_count
183
+ self.node_visit_count = self.next_node_visit_count
184
+
185
+ for j, u in utilities.items():
186
+ R = self.regret_minimizers[j]
187
+
188
+ if R.next_strategy is None:
189
+ R.output()
190
+
191
+ R.observe(u)
192
+
193
+
194
+ @dataclass
195
+ class MonteCarloCounterfactualRegretMinimization(
196
+ StochasticRegretMinimizer,
197
+ ABC,
198
+ ):
199
+ """Class for Monte Carlo counterfactual regret minimization (MCCFR)."""
200
+ regret_minimizer_type: type[ProbabilitySimplexRegretMinimizer] = (
201
+ RegretMatching
202
+ )
@@ -2,11 +2,13 @@
2
2
  from noregret.solvers.linear_programming import linear_programming
3
3
  from noregret.solvers.regret_minimization import (
4
4
  regret_minimization,
5
+ stochastic_regret_minimization,
5
6
  symmetric_regret_minimization,
6
7
  )
7
8
 
8
9
  __all__ = (
9
10
  'linear_programming',
10
11
  'regret_minimization',
12
+ 'stochastic_regret_minimization',
11
13
  'symmetric_regret_minimization',
12
14
  )
@@ -1,6 +1,6 @@
1
1
  """Module or regret minimization."""
2
2
  from collections.abc import Iterable, Mapping
3
- from itertools import count
3
+ from itertools import count, repeat
4
4
 
5
5
  from tqdm import tqdm
6
6
 
@@ -29,10 +29,6 @@ def regret_minimization(
29
29
  :param progress_bar: Whether to show a progress bar.
30
30
  :return: Average strategy profile.
31
31
  """
32
- np = game.kernel.numpy
33
-
34
- if len(regret_minimizers) != game.player_count:
35
- raise ValueError('inconsistent number of regret minimizers')
36
32
 
37
33
  def average_strategy_profile():
38
34
  average_strategy_profile = []
@@ -45,6 +41,11 @@ def regret_minimization(
45
41
  def exploitability():
46
42
  return game.exploitability(*average_strategy_profile())
47
43
 
44
+ np = game.kernel.numpy
45
+
46
+ if len(regret_minimizers) != game.player_count:
47
+ raise ValueError('inconsistent number of regret minimizers')
48
+
48
49
  if iteration_count is None or np.isposinf(iteration_count):
49
50
  iterations = count()
50
51
  else:
@@ -57,24 +58,24 @@ def regret_minimization(
57
58
  elif isinstance(progress_bar, Iterable):
58
59
  iterations = tqdm(iterations, *progress_bar)
59
60
 
60
- s = []
61
+ sigma = []
61
62
 
62
63
  for R in regret_minimizers:
63
- s.append(R.output(prediction))
64
+ sigma.append(R.output(prediction))
64
65
 
65
66
  for t in iterations:
66
67
  if alternation:
67
68
  for i, R in enumerate(regret_minimizers):
68
- R.observe(game.utility(i, *s[:i], *s[i + 1:]))
69
+ R.observe(game.utility(i, *sigma[:i], *sigma[i + 1:]))
69
70
 
70
- s[i] = R.output(prediction)
71
+ sigma[i] = R.output(prediction)
71
72
  else:
72
- U = game.utilities(*s)
73
+ us = game.utilities(*sigma)
73
74
 
74
- for i, (R, u) in enumerate(zip(regret_minimizers, U)):
75
+ for i, (R, u) in enumerate(zip(regret_minimizers, us)):
75
76
  R.observe(u)
76
77
 
77
- s[i] = R.output(prediction)
78
+ sigma[i] = R.output(prediction)
78
79
 
79
80
  if not checkpoints or t in checkpoints:
80
81
  if update is not None:
@@ -116,19 +117,18 @@ def symmetric_regret_minimization(
116
117
  :param progress_bar: Whether to show a progress bar.
117
118
  :return: Average strategy profile.
118
119
  """
119
- np = game.kernel.numpy
120
-
121
- if not game.is_symmetric:
122
- raise ValueError('game is asymmetric')
123
-
124
- R = regret_minimizer
125
120
 
126
121
  def average_strategy_profile():
127
- return [R.average_strategy] * game.player_count
122
+ return [regret_minimizer.average_strategy] * game.player_count
128
123
 
129
124
  def exploitability():
130
125
  return game.exploitability(*average_strategy_profile())
131
126
 
127
+ np = game.kernel.numpy
128
+
129
+ if not game.is_symmetric:
130
+ raise ValueError('game is asymmetric')
131
+
132
132
  if iteration_count is None or np.isposinf(iteration_count):
133
133
  iterations = count()
134
134
  else:
@@ -141,12 +141,14 @@ def symmetric_regret_minimization(
141
141
  elif isinstance(progress_bar, Iterable):
142
142
  iterations = tqdm(iterations, *progress_bar)
143
143
 
144
- s_neg_1 = [R.output(prediction)] * (game.player_count - 1)
144
+ sigma_1 = regret_minimizer.output(prediction)
145
145
 
146
146
  for t in iterations:
147
- R.observe(game.utility(0, *s_neg_1))
147
+ u = game.utility(0, *repeat(sigma_1, game.player_count - 1))
148
148
 
149
- s_neg_1 = [R.output(prediction)] * (game.player_count - 1)
149
+ regret_minimizer.observe(u)
150
+
151
+ sigma_1 = regret_minimizer.output(prediction)
150
152
 
151
153
  if not checkpoints or t in checkpoints:
152
154
  if update is not None:
@@ -164,3 +166,62 @@ def symmetric_regret_minimization(
164
166
  break
165
167
 
166
168
  return average_strategy_profile()
169
+
170
+
171
+ def stochastic_regret_minimization(
172
+ game,
173
+ regret_minimizer,
174
+ alternation=False,
175
+ sample_count=1000000,
176
+ checkpoints=(),
177
+ update=None,
178
+ progress_bar=True,
179
+ ):
180
+ """Solve a game using stochastic regret minimization.
181
+
182
+ :param game: Game.
183
+ :param regret_minimizer: Regret minimizer.
184
+ :param alternation: Whether to alternate, defaults to ``True''.
185
+ :param sample_count: Number of samples, defaults to ``1000000''.
186
+ :param checkpoints: Checkpoints.
187
+ :param update: Update.
188
+ :param progress_bar: Whether to show a progress bar.
189
+ :return: Average action probabilities.
190
+ """
191
+ np = game.kernel.numpy
192
+
193
+ if sample_count is None or np.isposinf(sample_count):
194
+ samples = count()
195
+ else:
196
+ samples = range(sample_count)
197
+
198
+ if progress_bar is True:
199
+ samples = tqdm(samples)
200
+ elif isinstance(progress_bar, Mapping):
201
+ samples = tqdm(samples, **progress_bar)
202
+ elif isinstance(progress_bar, Iterable):
203
+ samples = tqdm(samples, *progress_bar)
204
+
205
+ for s in samples:
206
+ if alternation:
207
+ for i in range(game.player_count):
208
+ regret_minimizer.observe(regret_minimizer.sample(i))
209
+ else:
210
+ uss = []
211
+
212
+ for i in range(game.player_count):
213
+ uss.append(regret_minimizer.sample(i))
214
+
215
+ for us in uss:
216
+ regret_minimizer.observe(us)
217
+
218
+ if not checkpoints or s in checkpoints:
219
+ if update is not None:
220
+ status = update()
221
+ else:
222
+ status = False
223
+
224
+ if status:
225
+ break
226
+
227
+ return regret_minimizer.average_action_probabilities
@@ -197,6 +197,19 @@ class BlackBoxGameTestCase(TestCase):
197
197
 
198
198
  np.testing.assert_equal(ps, ps2)
199
199
 
200
+ def test_exploitability(self):
201
+ for game in self.GAMES:
202
+ sigma = nr.UniformStrategyProfile(self.KER, game)
203
+ epsilon = game.exploitability(sigma)
204
+
205
+ game = nr.to_efg(self.KER, game)
206
+ sfps = game.sequence_form_polytopes
207
+ bs = [sfp.behavioral_form_uniform_strategy for sfp in sfps]
208
+ sigma = [sfp.to_sequence_form(b) for sfp, b in zip(sfps, bs)]
209
+ epsilon2 = game.exploitability(*sigma)
210
+
211
+ self.assertAlmostEqual(epsilon, epsilon2)
212
+
200
213
 
201
214
  if __name__ == '__main__':
202
215
  main() # pragma: no cover
@@ -23,12 +23,12 @@ class LinearProgrammingTestCase(TestCase):
23
23
 
24
24
  for game, value in self.GAME_VALUES:
25
25
  x, y = nr.lp(game)
26
- e = game.exploitability(x, y)
26
+ epsilon = game.exploitability(x, y)
27
27
  v = game.expected_row_utility(x, y)
28
28
 
29
- self.assertAlmostEqual(e, 0)
29
+ self.assertAlmostEqual(epsilon, 0)
30
30
  self.assertAlmostEqual(v, value)
31
- self.assertEqual(e.dtype, dtype)
31
+ self.assertEqual(epsilon.dtype, dtype)
32
32
  self.assertEqual(v.dtype, dtype)
33
33
 
34
34
 
@@ -1,5 +1,6 @@
1
1
  from functools import partial
2
2
  from math import inf
3
+ from random import seed
3
4
  from unittest import main, TestCase
4
5
 
5
6
  import noregret as nr
@@ -44,12 +45,12 @@ class ProbabilitySimplexRegretMinimizationTestCase(TestCase):
44
45
  target_exploitability=self.TARGET_EXPLOITABILITY,
45
46
  progress_bar=False,
46
47
  )
47
- e = game.exploitability(x_bar, y_bar)
48
+ epsilon = game.exploitability(x_bar, y_bar)
48
49
  v = game.expected_row_utility(x_bar, y_bar)
49
50
 
50
- self.assertLess(e, self.TARGET_EXPLOITABILITY)
51
+ self.assertLess(epsilon, self.TARGET_EXPLOITABILITY)
51
52
  self.assertAlmostEqual(v, value, delta=self.DELTA)
52
- self.assertEqual(e.dtype, dtype)
53
+ self.assertEqual(epsilon.dtype, dtype)
53
54
  self.assertEqual(v.dtype, dtype)
54
55
 
55
56
  def test_last_iterate_convergence(self):
@@ -70,12 +71,12 @@ class ProbabilitySimplexRegretMinimizationTestCase(TestCase):
70
71
  target_exploitability=self.TARGET_EXPLOITABILITY,
71
72
  progress_bar=False,
72
73
  )
73
- e = game.exploitability(x_bar, y_bar)
74
+ epsilon = game.exploitability(x_bar, y_bar)
74
75
  v = game.expected_row_utility(x_bar, y_bar)
75
76
 
76
- self.assertLess(e, self.TARGET_EXPLOITABILITY)
77
+ self.assertLess(epsilon, self.TARGET_EXPLOITABILITY)
77
78
  self.assertAlmostEqual(v, value, delta=self.DELTA)
78
- self.assertEqual(e.dtype, dtype)
79
+ self.assertEqual(epsilon.dtype, dtype)
79
80
  self.assertEqual(v.dtype, dtype)
80
81
 
81
82
  def test_frequent_iterate_convergence(self):
@@ -94,12 +95,12 @@ class ProbabilitySimplexRegretMinimizationTestCase(TestCase):
94
95
  target_exploitability=self.TARGET_EXPLOITABILITY,
95
96
  progress_bar=False,
96
97
  )
97
- e = game.exploitability(x_bar, y_bar)
98
+ epsilon = game.exploitability(x_bar, y_bar)
98
99
  v = game.expected_row_utility(x_bar, y_bar)
99
100
 
100
- self.assertLess(e, self.TARGET_EXPLOITABILITY)
101
+ self.assertLess(epsilon, self.TARGET_EXPLOITABILITY)
101
102
  self.assertAlmostEqual(v, value, delta=self.DELTA)
102
- self.assertEqual(e.dtype, dtype)
103
+ self.assertEqual(epsilon.dtype, dtype)
103
104
  self.assertEqual(v.dtype, dtype)
104
105
 
105
106
 
@@ -141,12 +142,12 @@ class SequenceFormPolytopeRegretMinimizationTestCase(TestCase):
141
142
  target_exploitability=self.TARGET_EXPLOITABILITY,
142
143
  progress_bar=False,
143
144
  )
144
- e = game.exploitability(x_bar, y_bar)
145
+ epsilon = game.exploitability(x_bar, y_bar)
145
146
  v = game.expected_row_utility(x_bar, y_bar)
146
147
 
147
- self.assertLess(e, self.TARGET_EXPLOITABILITY)
148
+ self.assertLess(epsilon, self.TARGET_EXPLOITABILITY)
148
149
  self.assertAlmostEqual(v, value, delta=self.DELTA)
149
- self.assertEqual(e.dtype, dtype)
150
+ self.assertEqual(epsilon.dtype, dtype)
150
151
  self.assertEqual(v.dtype, dtype)
151
152
 
152
153
 
@@ -172,7 +173,7 @@ class SequenceFormPolytopeRegretMinimization2TestCase(TestCase):
172
173
  nr.CFR(self.KER, game.column_sequence_form_polytope),
173
174
  progress_bar=False,
174
175
  )
175
- e = game.exploitability(x_bar, y_bar)
176
+ epsilon = game.exploitability(x_bar, y_bar)
176
177
  v = game.expected_row_utility(x_bar, y_bar)
177
178
  x_bar2, y_bar2 = nr.rm(
178
179
  game,
@@ -180,10 +181,10 @@ class SequenceFormPolytopeRegretMinimization2TestCase(TestCase):
180
181
  nr.CFR2(self.KER, game.column_sequence_form_polytope),
181
182
  progress_bar=False,
182
183
  )
183
- e2 = game.exploitability(x_bar2, y_bar2)
184
+ epsilon2 = game.exploitability(x_bar2, y_bar2)
184
185
  v2 = game.expected_row_utility(x_bar2, y_bar2)
185
186
 
186
- self.assertAlmostEqual(e, e2, self.PLACES)
187
+ self.assertAlmostEqual(epsilon, epsilon2, self.PLACES)
187
188
  self.assertAlmostEqual(v, v2, self.PLACES)
188
189
 
189
190
  x_bar, y_bar = nr.rm(
@@ -193,7 +194,7 @@ class SequenceFormPolytopeRegretMinimization2TestCase(TestCase):
193
194
  prediction=True,
194
195
  progress_bar=False,
195
196
  )
196
- e = game.exploitability(x_bar, y_bar)
197
+ epsilon = game.exploitability(x_bar, y_bar)
197
198
  v = game.expected_row_utility(x_bar, y_bar)
198
199
  x_bar2, y_bar2 = nr.rm(
199
200
  game,
@@ -202,12 +203,61 @@ class SequenceFormPolytopeRegretMinimization2TestCase(TestCase):
202
203
  prediction=True,
203
204
  progress_bar=False,
204
205
  )
205
- e2 = game.exploitability(x_bar2, y_bar2)
206
+ epsilon2 = game.exploitability(x_bar2, y_bar2)
206
207
  v2 = game.expected_row_utility(x_bar2, y_bar2)
207
208
 
208
- self.assertAlmostEqual(e, e2, self.PLACES)
209
+ self.assertAlmostEqual(epsilon, epsilon2, self.PLACES)
209
210
  self.assertAlmostEqual(v, v2, self.PLACES)
210
211
 
211
212
 
213
+ class StochasticRegretMinimizationTestCase(TestCase):
214
+ KER = nr.FPKer()
215
+ GAME = nr.open_spiel_game(KER, 'kuhn_poker')
216
+ SAMPLE_COUNT = 100000
217
+ TARGET_EXPLOITABILITY = 1e-1
218
+ SEED = 42
219
+
220
+ def test_external_sampling(self):
221
+ assert self.GAME.is_two_player and self.GAME.is_zero_sum
222
+
223
+ seed(self.SEED)
224
+
225
+ R = nr.MCCFR(self.KER, self.GAME)
226
+ sigma = nr.stochastic_rm(
227
+ self.GAME,
228
+ R,
229
+ alternation=True,
230
+ sample_count=self.SAMPLE_COUNT,
231
+ progress_bar=False,
232
+ )
233
+ epsilon = self.GAME.exploitability(sigma)
234
+
235
+ self.assertLess(epsilon, self.TARGET_EXPLOITABILITY)
236
+
237
+ def test_outcome_sampling(self):
238
+ assert self.GAME.is_two_player and self.GAME.is_zero_sum
239
+
240
+ seed(self.SEED)
241
+
242
+ R = nr.MCCFR(
243
+ self.KER,
244
+ self.GAME,
245
+ reference_strategy_profile=nr.UniformStrategyProfile(
246
+ self.KER,
247
+ self.GAME,
248
+ ),
249
+ )
250
+ sigma = nr.stochastic_rm(
251
+ self.GAME,
252
+ R,
253
+ alternation=True,
254
+ sample_count=self.SAMPLE_COUNT,
255
+ progress_bar=False,
256
+ )
257
+ epsilon = self.GAME.exploitability(sigma)
258
+
259
+ self.assertLess(epsilon, self.TARGET_EXPLOITABILITY)
260
+
261
+
212
262
  if __name__ == '__main__':
213
263
  main() # pragma: no cover
@@ -1,5 +1,6 @@
1
1
  """Module for utilities."""
2
2
  from importlib import import_module
3
+ from random import choices
3
4
 
4
5
 
5
6
  def import_object(object_path):
@@ -34,3 +35,16 @@ def tuple_or_none(values):
34
35
  :return: Tuple or ``None``.
35
36
  """
36
37
  return None if values is None else tuple(values)
38
+
39
+
40
+ def sample(values, probabilities):
41
+ """Sample a random value as per the probabilities.
42
+
43
+ >>> sample(range(5), [0, 0, 1, 0, 0])
44
+ 2
45
+
46
+ :param values: Values to be sampled from.
47
+ :param probabilities: The probabilities of sampling each value.
48
+ :return: The sampled value.
49
+ """
50
+ return choices(values, probabilities)[0]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: noregret
3
- Version: 0.0.0.dev8
3
+ Version: 0.0.0.dev9
4
4
  Summary: No-regret learning dynamics
5
5
  Home-page: https://github.com/uoftcprg/noregret
6
6
  Author: Universal, Open, Free, and Transparent Computer Poker Research Group
@@ -31,6 +31,7 @@ noregret/regret_minimizers/__init__.py
31
31
  noregret/regret_minimizers/probability_simplices.py
32
32
  noregret/regret_minimizers/regret_minimizers.py
33
33
  noregret/regret_minimizers/sequence_form_polytopes.py
34
+ noregret/regret_minimizers/stochastic.py
34
35
  noregret/solvers/__init__.py
35
36
  noregret/solvers/linear_programming.py
36
37
  noregret/solvers/regret_minimization.py
@@ -4,7 +4,7 @@ from setuptools import find_packages, setup
4
4
 
5
5
  setup(
6
6
  name='noregret',
7
- version='0.0.0.dev8',
7
+ version='0.0.0.dev9',
8
8
  description='No-regret learning dynamics',
9
9
  long_description=open('README.rst').read(),
10
10
  long_description_content_type='text/x-rst',
File without changes
File without changes
File without changes