noregret 0.0.0.dev8__tar.gz → 0.0.0.dev10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/PKG-INFO +1 -1
  2. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/__init__.py +23 -0
  3. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/__init__.py +10 -1
  4. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/black_box.py +134 -9
  5. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/extensive_form.py +5 -5
  6. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/regret_minimizers/__init__.py +6 -0
  7. noregret-0.0.0.dev10/noregret/regret_minimizers/stochastic.py +201 -0
  8. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/solvers/__init__.py +2 -0
  9. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/solvers/regret_minimization.py +83 -22
  10. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/tests/test_games.py +55 -0
  11. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/tests/test_linear_programming.py +3 -3
  12. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/tests/test_regret_minimization.py +71 -18
  13. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret.egg-info/PKG-INFO +1 -1
  14. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret.egg-info/SOURCES.txt +1 -0
  15. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/setup.py +1 -1
  16. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/LICENSE +0 -0
  17. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/README.rst +0 -0
  18. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/examples/assurance-game.json +0 -0
  19. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/examples/battle-of-the-sexes.json +0 -0
  20. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/examples/chicken.json +0 -0
  21. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/examples/gift-exchange-game.json +0 -0
  22. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/examples/matching-pennies.json +0 -0
  23. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/examples/prisoners-dilemma.json +0 -0
  24. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/examples/pure-coordination.json +0 -0
  25. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/examples/rock-paper-scissors-plus.json +0 -0
  26. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/examples/rock-paper-scissors.json +0 -0
  27. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/examples/rock-paper-superscissors.json +0 -0
  28. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/examples/stag-hunt.json +0 -0
  29. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/games.py +0 -0
  30. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/multilinear.py +0 -0
  31. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/games/normal_form.py +0 -0
  32. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/kernels.py +0 -0
  33. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/regret_minimizers/probability_simplices.py +0 -0
  34. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/regret_minimizers/regret_minimizers.py +0 -0
  35. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/regret_minimizers/sequence_form_polytopes.py +0 -0
  36. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/sequence_form_polytopes.py +0 -0
  37. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/solvers/linear_programming.py +0 -0
  38. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/tests/__init__.py +0 -0
  39. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/tests/test_sequence_form_polytopes.py +0 -0
  40. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret/utilities.py +0 -0
  41. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret.egg-info/dependency_links.txt +0 -0
  42. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret.egg-info/requires.txt +0 -0
  43. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/noregret.egg-info/top_level.txt +0 -0
  44. {noregret-0.0.0.dev8 → noregret-0.0.0.dev10}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: noregret
3
- Version: 0.0.0.dev8
3
+ Version: 0.0.0.dev10
4
4
  Summary: No-regret learning dynamics
5
5
  Home-page: https://github.com/uoftcprg/noregret
6
6
  Author: Universal, Open, Free, and Transparent Computer Poker Research Group
@@ -17,7 +17,9 @@ from noregret.games import (
17
17
  RockPaperScissors,
18
18
  RockPaperScissorsPlus,
19
19
  RockPaperSuperscissors,
20
+ Simulation,
20
21
  StagHunt,
22
+ StrategyProfile,
21
23
  to_extensive_form_game,
22
24
  TwoPlayerExtensiveFormGame,
23
25
  TwoPlayerGame,
@@ -27,6 +29,7 @@ from noregret.games import (
27
29
  TwoPlayerZeroSumGame,
28
30
  TwoPlayerZeroSumMultilinearGame,
29
31
  TwoPlayerZeroSumNormalFormGame,
32
+ UniformStrategyProfile,
30
33
  )
31
34
  from noregret.kernels import (
32
35
  CUDAKernel,
@@ -46,6 +49,7 @@ from noregret.regret_minimizers import (
46
49
  EuclideanRegularization,
47
50
  FollowTheRegularizedLeader,
48
51
  MirrorDescent,
52
+ MonteCarloCounterfactualRegretMinimization,
49
53
  MultiplicativeWeightsUpdate,
50
54
  OnlineGradientDescent,
51
55
  ProbabilitySimplexRegretMinimizer,
@@ -54,12 +58,14 @@ from noregret.regret_minimizers import (
54
58
  RegretMatchingPlus,
55
59
  RegretMinimizer,
56
60
  SequenceFormPolytopeRegretMinimizer,
61
+ StochasticRegretMinimizer,
57
62
  SwapRegretMinimizer,
58
63
  )
59
64
  from noregret.sequence_form_polytopes import SequenceFormPolytope
60
65
  from noregret.solvers import (
61
66
  linear_programming,
62
67
  regret_minimization,
68
+ stochastic_regret_minimization,
63
69
  symmetric_regret_minimization,
64
70
  )
65
71
  from noregret.utilities import import_object, tuple_or_none
@@ -94,6 +100,10 @@ FTRL = FollowTheRegularizedLeader
94
100
  """Alias for :class:`noregret.FollowTheRegularizedLeader`."""
95
101
  lp = linear_programming
96
102
  """Alias for :func:`noregret.linear_programming`."""
103
+ MCCFR = MonteCarloCounterfactualRegretMinimization
104
+ """Alias for
105
+ :class:`noregret.MonteCarloCounterfactualRegretMinimization`.
106
+ """
97
107
  MD = MirrorDescent
98
108
  """Alias for :class:`noregret.MirrorDescent`."""
99
109
  MWU = MultiplicativeWeightsUpdate
@@ -112,6 +122,10 @@ RM = RegretMatching
112
122
  """Alias for :class:`noregret.RegretMatching`."""
113
123
  rm = regret_minimization
114
124
  """Alias for :func:`noregret.regret_minimization`."""
125
+ Sim = Simulation
126
+ """Alias for :class:`noregret.Simulation`."""
127
+ stochastic_rm = stochastic_regret_minimization
128
+ """Alias for :func:`noregret.stochastic_regret_minimization`."""
115
129
  symmetric_rm = symmetric_regret_minimization
116
130
  """Alias for :func:`noregret.symmetric_regret_minimization`."""
117
131
  to_efg = to_extensive_form_game
@@ -155,8 +169,10 @@ __all__ = (
155
169
  'lp',
156
170
  'MatchingPennies',
157
171
  'matrix_game',
172
+ 'MCCFR',
158
173
  'MD',
159
174
  'MirrorDescent',
175
+ 'MonteCarloCounterfactualRegretMinimization',
160
176
  'MultilinearGame',
161
177
  'MultiplicativeWeightsUpdate',
162
178
  'MWU',
@@ -184,7 +200,13 @@ __all__ = (
184
200
  'SequenceFormPolytope',
185
201
  'SequenceFormPolytopeRegretMinimizer',
186
202
  'Serializable',
203
+ 'Sim',
204
+ 'Simulation',
187
205
  'StagHunt',
206
+ 'stochastic_regret_minimization',
207
+ 'StochasticRegretMinimizer',
208
+ 'stochastic_rm',
209
+ 'StrategyProfile',
188
210
  'SwapRegretMinimizer',
189
211
  'symmetric_regret_minimization',
190
212
  'symmetric_rm',
@@ -199,4 +221,5 @@ __all__ = (
199
221
  'TwoPlayerZeroSumGame',
200
222
  'TwoPlayerZeroSumMultilinearGame',
201
223
  'TwoPlayerZeroSumNormalFormGame',
224
+ 'UniformStrategyProfile',
202
225
  )
@@ -1,5 +1,11 @@
1
1
  """Module for games."""
2
- from noregret.games.black_box import BlackBoxGame, open_spiel_game
2
+ from noregret.games.black_box import (
3
+ BlackBoxGame,
4
+ open_spiel_game,
5
+ Simulation,
6
+ StrategyProfile,
7
+ UniformStrategyProfile,
8
+ )
3
9
  from noregret.games.extensive_form import (
4
10
  ExtensiveFormGame,
5
11
  to_extensive_form_game,
@@ -48,7 +54,9 @@ __all__ = (
48
54
  'RockPaperScissors',
49
55
  'RockPaperScissorsPlus',
50
56
  'RockPaperSuperscissors',
57
+ 'Simulation',
51
58
  'StagHunt',
59
+ 'StrategyProfile',
52
60
  'to_extensive_form_game',
53
61
  'TwoPlayerExtensiveFormGame',
54
62
  'TwoPlayerGame',
@@ -58,4 +66,5 @@ __all__ = (
58
66
  'TwoPlayerZeroSumGame',
59
67
  'TwoPlayerZeroSumMultilinearGame',
60
68
  'TwoPlayerZeroSumNormalFormGame',
69
+ 'UniformStrategyProfile',
61
70
  )
@@ -2,13 +2,47 @@
2
2
  from abc import ABC, abstractmethod
3
3
  from dataclasses import dataclass, field
4
4
  from functools import partial
5
+ from typing import Any
5
6
 
6
7
  from ordered_set import OrderedSet
7
- from pyspiel import GameType, load_game
8
+ from pyspiel import exploitability, GameType, load_game
8
9
 
9
10
  from noregret.kernels import Kernel
10
11
 
11
12
 
13
+ @dataclass
14
+ class Simulation:
15
+ """Class for simulations."""
16
+ kernel: Kernel
17
+ """Kernel."""
18
+ players: list[int]
19
+ """Players."""
20
+ decision_points: list[str | None]
21
+ """Decision points."""
22
+ actions: list[str]
23
+ """Actions."""
24
+ utilities: Any
25
+ """Utilities."""
26
+
27
+ def sequences(self, player=None):
28
+ """Return sequences given an optional player.
29
+
30
+ :param player: Optional player.
31
+ :return: Sequences.
32
+ """
33
+ for i, j, a in zip(self.players, self.decision_points, self.actions):
34
+ if i is not None and (player is None or i == player):
35
+ yield j, a
36
+
37
+ def utility(self, player):
38
+ """Return the utility given a player.
39
+
40
+ :param player: Player.
41
+ :return: Utility.
42
+ """
43
+ return self.utilities[player]
44
+
45
+
12
46
  @dataclass
13
47
  class BlackBoxGame(ABC):
14
48
  """Abstract base class for black box games."""
@@ -139,6 +173,54 @@ class BlackBoxGame(ABC):
139
173
 
140
174
  return np.array(ps, dtype)
141
175
 
176
+ def exploitability(self, strategy_profile):
177
+ """Return exploitability given a strategy profile.
178
+
179
+ :param strategy_profile: Strategy profile.
180
+ :return: Exploitability.
181
+ """
182
+ if not self.is_two_player or not self.is_zero_sum:
183
+ raise ValueError('not 2p0s')
184
+
185
+ raise NotImplementedError
186
+
187
+ def simulate(self, strategy_profile):
188
+ """Run a simulation given a strategy profile.
189
+
190
+ :param strategy_profile: Strategy profile.
191
+ :return: Simulation.
192
+ """
193
+ np = self.kernel.numpy
194
+ is_ = []
195
+ js = []
196
+ as_ = []
197
+ h = self.root_node
198
+
199
+ while A := self.actions(h):
200
+ i = self.player(h)
201
+
202
+ if i is None:
203
+ j = None
204
+ ps = self.chance_probabilities(h)
205
+ else:
206
+ j = self.information_set(h)
207
+ ps = strategy_profile(h)
208
+
209
+ a = np.random.choice(A, p=ps).item()
210
+ h = self.apply(h, a)
211
+
212
+ is_.append(i)
213
+ js.append(j)
214
+ as_.append(a)
215
+
216
+ is_ = tuple(is_)
217
+ js = tuple(js)
218
+ as_ = tuple(as_)
219
+ us = self.utilities(h)
220
+ simulation = Simulation(self.kernel, is_, js, as_, us)
221
+
222
+ return simulation
223
+
142
224
 
143
225
  @dataclass
144
226
  class _OpenSpielBlackBoxGame(BlackBoxGame):
@@ -167,17 +249,14 @@ class _OpenSpielBlackBoxGame(BlackBoxGame):
167
249
  return node.child(node.string_to_action(action))
168
250
 
169
251
  def children(self, node):
170
- return list(node.child(a) for a in node.legal_actions())
252
+ return list(map(node.child, node.legal_actions()))
171
253
 
172
254
  def actions_and_children(self, node):
173
- actions = []
174
- children = []
175
-
176
- for a in node.legal_actions():
177
- actions.append(node.action_to_string(a))
178
- children.append(node.child(a))
255
+ A = node.legal_actions()
256
+ actions = OrderedSet(map(node.action_to_string, A))
257
+ children = list(map(node.child, A))
179
258
 
180
- return OrderedSet(actions), children
259
+ return actions, children
181
260
 
182
261
  def player(self, node):
183
262
  i = node.current_player()
@@ -212,6 +291,27 @@ class _OpenSpielBlackBoxGame(BlackBoxGame):
212
291
 
213
292
  return np.array([p for _, p in node.chance_outcomes()], dtype)
214
293
 
294
+ def _sigma(self, strategy_profile, h, sigma):
295
+ A = h.legal_actions()
296
+ h_primes = list(map(h.child, A))
297
+ i = self.player(h)
298
+
299
+ if A and i is not None and (j := self.information_set(h)) not in sigma:
300
+ sigma[j] = list(zip(A, strategy_profile(h).tolist()))
301
+
302
+ for h_prime in h_primes:
303
+ self._sigma(strategy_profile, h_prime, sigma)
304
+
305
+ def _sigma2(self, strategy_profile):
306
+ sigma = {}
307
+
308
+ self._sigma(strategy_profile, self.root_node, sigma)
309
+
310
+ return sigma
311
+
312
+ def exploitability(self, strategy_profile):
313
+ return exploitability(self._game, self._sigma2(strategy_profile))
314
+
215
315
 
216
316
  def open_spiel_game(kernel, game):
217
317
  """Load a game from OpenSpiel.
@@ -221,3 +321,28 @@ def open_spiel_game(kernel, game):
221
321
  :return: Game.
222
322
  """
223
323
  return _OpenSpielBlackBoxGame(kernel, game)
324
+
325
+
326
+ @dataclass
327
+ class StrategyProfile(ABC):
328
+ """Abstract base class for strategy profiles."""
329
+ kernel: Kernel
330
+ """Kernel."""
331
+ game: BlackBoxGame
332
+ """Game."""
333
+
334
+ @abstractmethod
335
+ def __call__(self, node):
336
+ pass
337
+
338
+
339
+ @dataclass
340
+ class UniformStrategyProfile(StrategyProfile):
341
+ """Class for uniform strategy profiles."""
342
+
343
+ def __call__(self, node):
344
+ np = self.kernel.numpy
345
+ dtype = self.kernel.data_type
346
+ n = len(self.game.actions(node))
347
+
348
+ return np.full(n, 1 / n, dtype)
@@ -178,9 +178,9 @@ def _nfg2efg(ker, game, decision_points='p{}'.format):
178
178
  payoffs = scipy.sparse.csr_array(payoffs)
179
179
  sfps = []
180
180
 
181
- for i, A_j in enumerate(game.actions):
181
+ for i, A in enumerate(game.actions):
182
182
  j = decision_points(i)
183
- sfp = SequenceFormPolytope(ker, {j: A_j}, {j: None})
183
+ sfp = SequenceFormPolytope(ker, {j: A}, {j: None})
184
184
 
185
185
  sfps.append(sfp)
186
186
 
@@ -198,11 +198,11 @@ def _bbg2efg(ker, game):
198
198
  raw_payoffs = defaultdict(int)
199
199
 
200
200
  def dfs(h, p, seqs, us):
201
- A_j, h_primes = game.actions_and_children(h)
201
+ A, h_primes = game.actions_and_children(h)
202
202
  i = game.player(h)
203
203
  us = us + game.utilities(h)
204
204
 
205
- if not A_j:
205
+ if not A:
206
206
  raw_payoffs[tuple(seqs)] += p * us
207
207
  elif i is None:
208
208
  p_primes = game.chance_probabilities(h)
@@ -214,7 +214,7 @@ def _bbg2efg(ker, game):
214
214
  p_j = seqs[i]
215
215
  p_js[i][j] = p_j
216
216
 
217
- for a, h_prime in zip(A_j, h_primes):
217
+ for a, h_prime in zip(A, h_primes):
218
218
  next_seqs = seqs.copy()
219
219
  next_seqs[i] = j, a
220
220
 
@@ -24,6 +24,10 @@ from noregret.regret_minimizers.sequence_form_polytopes import (
24
24
  DiscountedCounterfactualRegretMinimization,
25
25
  SequenceFormPolytopeRegretMinimizer,
26
26
  )
27
+ from noregret.regret_minimizers.stochastic import (
28
+ MonteCarloCounterfactualRegretMinimization,
29
+ StochasticRegretMinimizer,
30
+ )
27
31
 
28
32
  __all__ = (
29
33
  'BlumMansour',
@@ -36,6 +40,7 @@ __all__ = (
36
40
  'EuclideanRegularization',
37
41
  'FollowTheRegularizedLeader',
38
42
  'MirrorDescent',
43
+ 'MonteCarloCounterfactualRegretMinimization',
39
44
  'MultiplicativeWeightsUpdate',
40
45
  'OnlineGradientDescent',
41
46
  'ProbabilitySimplexRegretMinimizer',
@@ -44,5 +49,6 @@ __all__ = (
44
49
  'RegretMatchingPlus',
45
50
  'RegretMinimizer',
46
51
  'SequenceFormPolytopeRegretMinimizer',
52
+ 'StochasticRegretMinimizer',
47
53
  'SwapRegretMinimizer',
48
54
  )
@@ -0,0 +1,201 @@
1
+ """Module for regret minimizers operating over sequence-form polytopes."""
2
+ from abc import ABC
3
+ from collections.abc import Callable
4
+ from dataclasses import dataclass, field
5
+ from typing import Any
6
+
7
+ from noregret.games.black_box import BlackBoxGame
8
+ from noregret.kernels import Kernel
9
+ from noregret.regret_minimizers.probability_simplices import (
10
+ ProbabilitySimplexRegretMinimizer,
11
+ RegretMatching,
12
+ )
13
+
14
+
15
+ @dataclass
16
+ class StochasticRegretMinimizer(ABC):
17
+ """Abstract base class for stochastic regret minimizers."""
18
+ kernel: Kernel
19
+ """Kernel."""
20
+ game: BlackBoxGame
21
+ """Game."""
22
+ regret_minimizer_type: type[ProbabilitySimplexRegretMinimizer]
23
+ """Regret minimizer type."""
24
+ reference_strategy_profile: Callable[[Any], Any] | None = None
25
+ """Reference strategy profile."""
26
+ sample_count: int = field(default=0, init=False)
27
+ """number of samples."""
28
+ next_sample_count: int = field(default=0, init=False)
29
+ """Next number of samples."""
30
+ node_visit_count: int = field(default=0, init=False)
31
+ """Number of node visits."""
32
+ next_node_visit_count: int = field(default=0, init=False)
33
+ """Next number of node visits."""
34
+ regret_minimizers: dict[str, ProbabilitySimplexRegretMinimizer] = field(
35
+ default_factory=dict,
36
+ init=False,
37
+ )
38
+
39
+ def regret_minimizer(self, node):
40
+ """Return the regret minimizer given an information set.
41
+
42
+ :param node: Node.
43
+ :return: Regret minimizer.
44
+ """
45
+ j = self.game.information_set(node)
46
+
47
+ if j not in self.regret_minimizers:
48
+ self.regret_minimizers[j] = self.regret_minimizer_type(
49
+ self.kernel,
50
+ len(self.game.actions(node)),
51
+ )
52
+
53
+ return self.regret_minimizers[j]
54
+
55
+ def average_action_probabilities(self, node):
56
+ """Return the average action probabilities given a node.
57
+
58
+ :param node: Node.
59
+ :return: Average action probabilities.
60
+ """
61
+ np = self.kernel.numpy
62
+ dtype = self.kernel.data_type
63
+ R = self.regret_minimizer(node)
64
+ ps = R.average_strategy
65
+
66
+ if np.isscalar(ps):
67
+ ps = np.full(R.dimension, 1 / R.dimension, dtype)
68
+
69
+ return ps
70
+
71
+ def _action_probabilities(self, h):
72
+ R = self.regret_minimizer(h)
73
+ ps = R.next_strategy
74
+
75
+ if ps is None:
76
+ ps = R.output()
77
+
78
+ return ps
79
+
80
+ def _external_sampling(self, i, us, h):
81
+ np = self.kernel.numpy
82
+ dtype = self.kernel.data_type
83
+ self.next_node_visit_count += 1
84
+ u = self.game.utility(h, i)
85
+ A = self.game.actions(h)
86
+
87
+ if A:
88
+ i_prime = self.game.player(h)
89
+
90
+ if i_prime is None:
91
+ ps = self.game.chance_probabilities(h)
92
+ else:
93
+ ps = self._action_probabilities(h)
94
+
95
+ if i_prime == i:
96
+ u_primes = []
97
+
98
+ for a in A:
99
+ h_prime = self.game.apply(h, a)
100
+
101
+ u_primes.append(self._external_sampling(i, us, h_prime))
102
+
103
+ j = self.game.information_set(h)
104
+ us[j] = np.array(u_primes, dtype)
105
+ u += us[j] @ ps
106
+ else:
107
+ a = np.random.choice(A, p=ps).item()
108
+ h_prime = self.game.apply(h, a)
109
+ u += self._external_sampling(i, us, h_prime)
110
+
111
+ return u
112
+
113
+ def _external_sampling2(self, player):
114
+ us = {}
115
+
116
+ self._external_sampling(player, us, self.game.root_node)
117
+
118
+ return us
119
+
120
+ def _outcome_sampling(self, i, us, h, p):
121
+ np = self.kernel.numpy
122
+ dtype = self.kernel.data_type
123
+ self.next_node_visit_count += 1
124
+ u = self.game.utility(h, i) / p
125
+ A = self.game.actions(h)
126
+
127
+ if A:
128
+ i_prime = self.game.player(h)
129
+
130
+ if i_prime is None:
131
+ ps = self.game.chance_probabilities(h)
132
+ elif i_prime == i:
133
+ ps = self.reference_strategy_profile(h)
134
+ else:
135
+ ps = self._action_probabilities(h)
136
+
137
+ k = np.random.choice(len(A), p=ps)
138
+ a = A[k]
139
+ h_prime = self.game.apply(h, a)
140
+ p_prime = ps[k] * p
141
+ u_prime = ps[k] * self._outcome_sampling(i, us, h_prime, p_prime)
142
+ u += u_prime
143
+
144
+ if i_prime == i:
145
+ self.regret_minimizer(h)
146
+
147
+ j = self.game.information_set(h)
148
+ us[j] = np.zeros(len(A), dtype)
149
+ us[j][k] = u_prime
150
+
151
+ return u
152
+
153
+ def _outcome_sampling2(self, player):
154
+ us = {}
155
+
156
+ self._outcome_sampling(player, us, self.game.root_node, 1)
157
+
158
+ return us
159
+
160
+ def sample(self, player):
161
+ """Sample.
162
+
163
+ :param player: Player.
164
+ :return: Utilities.
165
+ """
166
+ self.next_sample_count += 1
167
+
168
+ if self.reference_strategy_profile is None:
169
+ us = self._external_sampling2(player)
170
+ else:
171
+ us = self._outcome_sampling2(player)
172
+
173
+ return us
174
+
175
+ def observe(self, utilities):
176
+ """Observe utilities.
177
+
178
+ :param utilities: Utilities.
179
+ :return: ``None``.
180
+ """
181
+ self.sample_count = self.next_sample_count
182
+ self.node_visit_count = self.next_node_visit_count
183
+
184
+ for j, u in utilities.items():
185
+ R = self.regret_minimizers[j]
186
+
187
+ if R.next_strategy is None:
188
+ R.output()
189
+
190
+ R.observe(u)
191
+
192
+
193
+ @dataclass
194
+ class MonteCarloCounterfactualRegretMinimization(
195
+ StochasticRegretMinimizer,
196
+ ABC,
197
+ ):
198
+ """Class for Monte Carlo counterfactual regret minimization (MCCFR)."""
199
+ regret_minimizer_type: type[ProbabilitySimplexRegretMinimizer] = (
200
+ RegretMatching
201
+ )
@@ -2,11 +2,13 @@
2
2
  from noregret.solvers.linear_programming import linear_programming
3
3
  from noregret.solvers.regret_minimization import (
4
4
  regret_minimization,
5
+ stochastic_regret_minimization,
5
6
  symmetric_regret_minimization,
6
7
  )
7
8
 
8
9
  __all__ = (
9
10
  'linear_programming',
10
11
  'regret_minimization',
12
+ 'stochastic_regret_minimization',
11
13
  'symmetric_regret_minimization',
12
14
  )
@@ -1,6 +1,6 @@
1
1
  """Module or regret minimization."""
2
2
  from collections.abc import Iterable, Mapping
3
- from itertools import count
3
+ from itertools import count, repeat
4
4
 
5
5
  from tqdm import tqdm
6
6
 
@@ -29,10 +29,6 @@ def regret_minimization(
29
29
  :param progress_bar: Whether to show a progress bar.
30
30
  :return: Average strategy profile.
31
31
  """
32
- np = game.kernel.numpy
33
-
34
- if len(regret_minimizers) != game.player_count:
35
- raise ValueError('inconsistent number of regret minimizers')
36
32
 
37
33
  def average_strategy_profile():
38
34
  average_strategy_profile = []
@@ -45,6 +41,11 @@ def regret_minimization(
45
41
  def exploitability():
46
42
  return game.exploitability(*average_strategy_profile())
47
43
 
44
+ np = game.kernel.numpy
45
+
46
+ if len(regret_minimizers) != game.player_count:
47
+ raise ValueError('inconsistent number of regret minimizers')
48
+
48
49
  if iteration_count is None or np.isposinf(iteration_count):
49
50
  iterations = count()
50
51
  else:
@@ -57,24 +58,24 @@ def regret_minimization(
57
58
  elif isinstance(progress_bar, Iterable):
58
59
  iterations = tqdm(iterations, *progress_bar)
59
60
 
60
- s = []
61
+ sigma = []
61
62
 
62
63
  for R in regret_minimizers:
63
- s.append(R.output(prediction))
64
+ sigma.append(R.output(prediction))
64
65
 
65
66
  for t in iterations:
66
67
  if alternation:
67
68
  for i, R in enumerate(regret_minimizers):
68
- R.observe(game.utility(i, *s[:i], *s[i + 1:]))
69
+ R.observe(game.utility(i, *sigma[:i], *sigma[i + 1:]))
69
70
 
70
- s[i] = R.output(prediction)
71
+ sigma[i] = R.output(prediction)
71
72
  else:
72
- U = game.utilities(*s)
73
+ us = game.utilities(*sigma)
73
74
 
74
- for i, (R, u) in enumerate(zip(regret_minimizers, U)):
75
+ for i, (R, u) in enumerate(zip(regret_minimizers, us)):
75
76
  R.observe(u)
76
77
 
77
- s[i] = R.output(prediction)
78
+ sigma[i] = R.output(prediction)
78
79
 
79
80
  if not checkpoints or t in checkpoints:
80
81
  if update is not None:
@@ -116,19 +117,18 @@ def symmetric_regret_minimization(
116
117
  :param progress_bar: Whether to show a progress bar.
117
118
  :return: Average strategy profile.
118
119
  """
119
- np = game.kernel.numpy
120
-
121
- if not game.is_symmetric:
122
- raise ValueError('game is asymmetric')
123
-
124
- R = regret_minimizer
125
120
 
126
121
  def average_strategy_profile():
127
- return [R.average_strategy] * game.player_count
122
+ return [regret_minimizer.average_strategy] * game.player_count
128
123
 
129
124
  def exploitability():
130
125
  return game.exploitability(*average_strategy_profile())
131
126
 
127
+ np = game.kernel.numpy
128
+
129
+ if not game.is_symmetric:
130
+ raise ValueError('game is asymmetric')
131
+
132
132
  if iteration_count is None or np.isposinf(iteration_count):
133
133
  iterations = count()
134
134
  else:
@@ -141,12 +141,14 @@ def symmetric_regret_minimization(
141
141
  elif isinstance(progress_bar, Iterable):
142
142
  iterations = tqdm(iterations, *progress_bar)
143
143
 
144
- s_neg_1 = [R.output(prediction)] * (game.player_count - 1)
144
+ sigma_1 = regret_minimizer.output(prediction)
145
145
 
146
146
  for t in iterations:
147
- R.observe(game.utility(0, *s_neg_1))
147
+ u = game.utility(0, *repeat(sigma_1, game.player_count - 1))
148
148
 
149
- s_neg_1 = [R.output(prediction)] * (game.player_count - 1)
149
+ regret_minimizer.observe(u)
150
+
151
+ sigma_1 = regret_minimizer.output(prediction)
150
152
 
151
153
  if not checkpoints or t in checkpoints:
152
154
  if update is not None:
@@ -164,3 +166,62 @@ def symmetric_regret_minimization(
164
166
  break
165
167
 
166
168
  return average_strategy_profile()
169
+
170
+
171
+ def stochastic_regret_minimization(
172
+ game,
173
+ regret_minimizer,
174
+ alternation=False,
175
+ sample_count=1000000,
176
+ checkpoints=(),
177
+ update=None,
178
+ progress_bar=True,
179
+ ):
180
+ """Solve a game using stochastic regret minimization.
181
+
182
+ :param game: Game.
183
+ :param regret_minimizer: Regret minimizer.
184
+ :param alternation: Whether to alternate, defaults to ``True''.
185
+ :param sample_count: Number of samples, defaults to ``1000000''.
186
+ :param checkpoints: Checkpoints.
187
+ :param update: Update.
188
+ :param progress_bar: Whether to show a progress bar.
189
+ :return: Average action probabilities.
190
+ """
191
+ np = game.kernel.numpy
192
+
193
+ if sample_count is None or np.isposinf(sample_count):
194
+ samples = count()
195
+ else:
196
+ samples = range(sample_count)
197
+
198
+ if progress_bar is True:
199
+ samples = tqdm(samples)
200
+ elif isinstance(progress_bar, Mapping):
201
+ samples = tqdm(samples, **progress_bar)
202
+ elif isinstance(progress_bar, Iterable):
203
+ samples = tqdm(samples, *progress_bar)
204
+
205
+ for s in samples:
206
+ if alternation:
207
+ for i in range(game.player_count):
208
+ regret_minimizer.observe(regret_minimizer.sample(i))
209
+ else:
210
+ uss = []
211
+
212
+ for i in range(game.player_count):
213
+ uss.append(regret_minimizer.sample(i))
214
+
215
+ for us in uss:
216
+ regret_minimizer.observe(us)
217
+
218
+ if not checkpoints or s in checkpoints:
219
+ if update is not None:
220
+ status = update()
221
+ else:
222
+ status = False
223
+
224
+ if status:
225
+ break
226
+
227
+ return regret_minimizer.average_action_probabilities
@@ -6,6 +6,7 @@ import noregret as nr
6
6
 
7
7
  class GameTestCaseMixin(ABC):
8
8
  KER = None
9
+ GAMES = None
9
10
 
10
11
  @abstractmethod
11
12
  def uniform_strategy_profile(self, game):
@@ -145,12 +146,43 @@ class ExtensiveFormGameTestCase(GameTestCaseMixin, TestCase):
145
146
  self.assertEqual(sfp.parent_sequences, sfp2.parent_sequences)
146
147
 
147
148
 
149
+ class SimulationTestCase(TestCase):
150
+ KER = nr.FPKer()
151
+
152
+ def test_sequences(self):
153
+ np = self.KER.numpy
154
+ dtype = self.KER.data_type
155
+ sim = nr.Sim(
156
+ self.KER,
157
+ (0, None, 0, 1),
158
+ ('', None, 'ab', 'b'),
159
+ ('a', 'b', 'c', 'd'),
160
+ np.array([1, -1], dtype),
161
+ )
162
+
163
+ self.assertEqual(
164
+ tuple(sim.sequences()),
165
+ (('', 'a'), ('ab', 'c'), ('b', 'd')),
166
+ )
167
+ self.assertEqual(tuple(sim.sequences(0)), (('', 'a'), ('ab', 'c')))
168
+ self.assertEqual(tuple(sim.sequences(1)), (('b', 'd'),))
169
+
170
+ def test_utility(self):
171
+ np = self.KER.numpy
172
+ dtype = self.KER.data_type
173
+ sim = nr.Sim(self.KER, (), (), (), np.array([1, -1], dtype))
174
+
175
+ self.assertEqual(sim.utility(0), 1)
176
+ self.assertEqual(sim.utility(1), -1)
177
+
178
+
148
179
  class BlackBoxGameTestCase(TestCase):
149
180
  KER = nr.FPKer()
150
181
  GAMES = (
151
182
  nr.open_spiel_game(KER, 'kuhn_poker'),
152
183
  nr.open_spiel_game(KER, 'leduc_poker'),
153
184
  )
185
+ SEED = 42
154
186
 
155
187
  def test_actions_and_children(self):
156
188
  for game in self.GAMES:
@@ -197,6 +229,29 @@ class BlackBoxGameTestCase(TestCase):
197
229
 
198
230
  np.testing.assert_equal(ps, ps2)
199
231
 
232
+ def test_exploitability(self):
233
+ for game in self.GAMES:
234
+ sigma = nr.UniformStrategyProfile(self.KER, game)
235
+ epsilon = game.exploitability(sigma)
236
+
237
+ game = nr.to_efg(self.KER, game)
238
+ sfps = game.sequence_form_polytopes
239
+ bs = [sfp.behavioral_form_uniform_strategy for sfp in sfps]
240
+ sigma = [sfp.to_sequence_form(b) for sfp, b in zip(sfps, bs)]
241
+ epsilon2 = game.exploitability(*sigma)
242
+
243
+ self.assertAlmostEqual(epsilon, epsilon2)
244
+
245
+ def test_simulation(self):
246
+ np = self.KER.numpy
247
+
248
+ for game in self.GAMES:
249
+ np.random.seed(self.SEED)
250
+
251
+ sigma = nr.UniformStrategyProfile(self.KER, game)
252
+
253
+ game.simulate(sigma)
254
+
200
255
 
201
256
  if __name__ == '__main__':
202
257
  main() # pragma: no cover
@@ -23,12 +23,12 @@ class LinearProgrammingTestCase(TestCase):
23
23
 
24
24
  for game, value in self.GAME_VALUES:
25
25
  x, y = nr.lp(game)
26
- e = game.exploitability(x, y)
26
+ epsilon = game.exploitability(x, y)
27
27
  v = game.expected_row_utility(x, y)
28
28
 
29
- self.assertAlmostEqual(e, 0)
29
+ self.assertAlmostEqual(epsilon, 0)
30
30
  self.assertAlmostEqual(v, value)
31
- self.assertEqual(e.dtype, dtype)
31
+ self.assertEqual(epsilon.dtype, dtype)
32
32
  self.assertEqual(v.dtype, dtype)
33
33
 
34
34
 
@@ -44,12 +44,12 @@ class ProbabilitySimplexRegretMinimizationTestCase(TestCase):
44
44
  target_exploitability=self.TARGET_EXPLOITABILITY,
45
45
  progress_bar=False,
46
46
  )
47
- e = game.exploitability(x_bar, y_bar)
47
+ epsilon = game.exploitability(x_bar, y_bar)
48
48
  v = game.expected_row_utility(x_bar, y_bar)
49
49
 
50
- self.assertLess(e, self.TARGET_EXPLOITABILITY)
50
+ self.assertLess(epsilon, self.TARGET_EXPLOITABILITY)
51
51
  self.assertAlmostEqual(v, value, delta=self.DELTA)
52
- self.assertEqual(e.dtype, dtype)
52
+ self.assertEqual(epsilon.dtype, dtype)
53
53
  self.assertEqual(v.dtype, dtype)
54
54
 
55
55
  def test_last_iterate_convergence(self):
@@ -70,12 +70,12 @@ class ProbabilitySimplexRegretMinimizationTestCase(TestCase):
70
70
  target_exploitability=self.TARGET_EXPLOITABILITY,
71
71
  progress_bar=False,
72
72
  )
73
- e = game.exploitability(x_bar, y_bar)
73
+ epsilon = game.exploitability(x_bar, y_bar)
74
74
  v = game.expected_row_utility(x_bar, y_bar)
75
75
 
76
- self.assertLess(e, self.TARGET_EXPLOITABILITY)
76
+ self.assertLess(epsilon, self.TARGET_EXPLOITABILITY)
77
77
  self.assertAlmostEqual(v, value, delta=self.DELTA)
78
- self.assertEqual(e.dtype, dtype)
78
+ self.assertEqual(epsilon.dtype, dtype)
79
79
  self.assertEqual(v.dtype, dtype)
80
80
 
81
81
  def test_frequent_iterate_convergence(self):
@@ -94,12 +94,12 @@ class ProbabilitySimplexRegretMinimizationTestCase(TestCase):
94
94
  target_exploitability=self.TARGET_EXPLOITABILITY,
95
95
  progress_bar=False,
96
96
  )
97
- e = game.exploitability(x_bar, y_bar)
97
+ epsilon = game.exploitability(x_bar, y_bar)
98
98
  v = game.expected_row_utility(x_bar, y_bar)
99
99
 
100
- self.assertLess(e, self.TARGET_EXPLOITABILITY)
100
+ self.assertLess(epsilon, self.TARGET_EXPLOITABILITY)
101
101
  self.assertAlmostEqual(v, value, delta=self.DELTA)
102
- self.assertEqual(e.dtype, dtype)
102
+ self.assertEqual(epsilon.dtype, dtype)
103
103
  self.assertEqual(v.dtype, dtype)
104
104
 
105
105
 
@@ -141,12 +141,12 @@ class SequenceFormPolytopeRegretMinimizationTestCase(TestCase):
141
141
  target_exploitability=self.TARGET_EXPLOITABILITY,
142
142
  progress_bar=False,
143
143
  )
144
- e = game.exploitability(x_bar, y_bar)
144
+ epsilon = game.exploitability(x_bar, y_bar)
145
145
  v = game.expected_row_utility(x_bar, y_bar)
146
146
 
147
- self.assertLess(e, self.TARGET_EXPLOITABILITY)
147
+ self.assertLess(epsilon, self.TARGET_EXPLOITABILITY)
148
148
  self.assertAlmostEqual(v, value, delta=self.DELTA)
149
- self.assertEqual(e.dtype, dtype)
149
+ self.assertEqual(epsilon.dtype, dtype)
150
150
  self.assertEqual(v.dtype, dtype)
151
151
 
152
152
 
@@ -172,7 +172,7 @@ class SequenceFormPolytopeRegretMinimization2TestCase(TestCase):
172
172
  nr.CFR(self.KER, game.column_sequence_form_polytope),
173
173
  progress_bar=False,
174
174
  )
175
- e = game.exploitability(x_bar, y_bar)
175
+ epsilon = game.exploitability(x_bar, y_bar)
176
176
  v = game.expected_row_utility(x_bar, y_bar)
177
177
  x_bar2, y_bar2 = nr.rm(
178
178
  game,
@@ -180,10 +180,10 @@ class SequenceFormPolytopeRegretMinimization2TestCase(TestCase):
180
180
  nr.CFR2(self.KER, game.column_sequence_form_polytope),
181
181
  progress_bar=False,
182
182
  )
183
- e2 = game.exploitability(x_bar2, y_bar2)
183
+ epsilon2 = game.exploitability(x_bar2, y_bar2)
184
184
  v2 = game.expected_row_utility(x_bar2, y_bar2)
185
185
 
186
- self.assertAlmostEqual(e, e2, self.PLACES)
186
+ self.assertAlmostEqual(epsilon, epsilon2, self.PLACES)
187
187
  self.assertAlmostEqual(v, v2, self.PLACES)
188
188
 
189
189
  x_bar, y_bar = nr.rm(
@@ -193,7 +193,7 @@ class SequenceFormPolytopeRegretMinimization2TestCase(TestCase):
193
193
  prediction=True,
194
194
  progress_bar=False,
195
195
  )
196
- e = game.exploitability(x_bar, y_bar)
196
+ epsilon = game.exploitability(x_bar, y_bar)
197
197
  v = game.expected_row_utility(x_bar, y_bar)
198
198
  x_bar2, y_bar2 = nr.rm(
199
199
  game,
@@ -202,12 +202,65 @@ class SequenceFormPolytopeRegretMinimization2TestCase(TestCase):
202
202
  prediction=True,
203
203
  progress_bar=False,
204
204
  )
205
- e2 = game.exploitability(x_bar2, y_bar2)
205
+ epsilon2 = game.exploitability(x_bar2, y_bar2)
206
206
  v2 = game.expected_row_utility(x_bar2, y_bar2)
207
207
 
208
- self.assertAlmostEqual(e, e2, self.PLACES)
208
+ self.assertAlmostEqual(epsilon, epsilon2, self.PLACES)
209
209
  self.assertAlmostEqual(v, v2, self.PLACES)
210
210
 
211
211
 
212
+ class StochasticRegretMinimizationTestCase(TestCase):
213
+ KER = nr.FPKer()
214
+ GAME = nr.open_spiel_game(KER, 'kuhn_poker')
215
+ SAMPLE_COUNT = 100000
216
+ TARGET_EXPLOITABILITY = 1e-1
217
+ SEED = 42
218
+
219
+ def test_external_sampling(self):
220
+ np = self.KER.numpy
221
+
222
+ assert self.GAME.is_two_player and self.GAME.is_zero_sum
223
+
224
+ np.random.seed(self.SEED)
225
+
226
+ R = nr.MCCFR(self.KER, self.GAME)
227
+ sigma = nr.stochastic_rm(
228
+ self.GAME,
229
+ R,
230
+ alternation=True,
231
+ sample_count=self.SAMPLE_COUNT,
232
+ progress_bar=False,
233
+ )
234
+ epsilon = self.GAME.exploitability(sigma)
235
+
236
+ self.assertLess(epsilon, self.TARGET_EXPLOITABILITY)
237
+
238
+ def test_outcome_sampling(self):
239
+ np = self.KER.numpy
240
+
241
+ assert self.GAME.is_two_player and self.GAME.is_zero_sum
242
+
243
+ np.random.seed(self.SEED)
244
+
245
+ R = nr.MCCFR(
246
+ self.KER,
247
+ self.GAME,
248
+ reference_strategy_profile=nr.UniformStrategyProfile(
249
+ self.KER,
250
+ self.GAME,
251
+ ),
252
+ )
253
+ sigma = nr.stochastic_rm(
254
+ self.GAME,
255
+ R,
256
+ alternation=True,
257
+ sample_count=self.SAMPLE_COUNT,
258
+ progress_bar=False,
259
+ )
260
+ epsilon = self.GAME.exploitability(sigma)
261
+
262
+ self.assertLess(epsilon, self.TARGET_EXPLOITABILITY)
263
+
264
+
212
265
  if __name__ == '__main__':
213
266
  main() # pragma: no cover
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: noregret
3
- Version: 0.0.0.dev8
3
+ Version: 0.0.0.dev10
4
4
  Summary: No-regret learning dynamics
5
5
  Home-page: https://github.com/uoftcprg/noregret
6
6
  Author: Universal, Open, Free, and Transparent Computer Poker Research Group
@@ -31,6 +31,7 @@ noregret/regret_minimizers/__init__.py
31
31
  noregret/regret_minimizers/probability_simplices.py
32
32
  noregret/regret_minimizers/regret_minimizers.py
33
33
  noregret/regret_minimizers/sequence_form_polytopes.py
34
+ noregret/regret_minimizers/stochastic.py
34
35
  noregret/solvers/__init__.py
35
36
  noregret/solvers/linear_programming.py
36
37
  noregret/solvers/regret_minimization.py
@@ -4,7 +4,7 @@ from setuptools import find_packages, setup
4
4
 
5
5
  setup(
6
6
  name='noregret',
7
- version='0.0.0.dev8',
7
+ version='0.0.0.dev10',
8
8
  description='No-regret learning dynamics',
9
9
  long_description=open('README.rst').read(),
10
10
  long_description_content_type='text/x-rst',
File without changes
File without changes
File without changes