noregret 0.0.0.dev4__tar.gz → 0.0.0.dev5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/PKG-INFO +6 -57
  2. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/README.rst +5 -56
  3. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/__init__.py +6 -0
  4. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/__init__.py +3 -1
  5. noregret-0.0.0.dev5/noregret/games/black_box.py +200 -0
  6. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/games.py +1 -0
  7. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/multilinear.py +2 -0
  8. noregret-0.0.0.dev5/noregret/games/utilities.py +140 -0
  9. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/kernels.py +0 -1
  10. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/regret_minimizers/__init__.py +2 -0
  11. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/regret_minimizers/probability_simplices.py +2 -0
  12. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/regret_minimizers/regret_minimizers.py +7 -2
  13. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/regret_minimizers/sequence_form_polytopes.py +101 -12
  14. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/sequence_form_polytopes.py +3 -3
  15. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/solvers/regret_minimization.py +1 -1
  16. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/tests/test_games.py +2 -2
  17. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/tests/test_linear_programming.py +2 -2
  18. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/tests/test_regret_minimization.py +40 -3
  19. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret.egg-info/PKG-INFO +6 -57
  20. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret.egg-info/SOURCES.txt +1 -0
  21. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/setup.py +1 -1
  22. noregret-0.0.0.dev4/noregret/games/utilities.py +0 -141
  23. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/LICENSE +0 -0
  24. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/extensive_form/__init__.py +0 -0
  25. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/extensive_form/games.py +0 -0
  26. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/normal_form/__init__.py +0 -0
  27. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/normal_form/assurance-game.json +0 -0
  28. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/normal_form/battle-of-the-sexes.json +0 -0
  29. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/normal_form/chicken.json +0 -0
  30. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/normal_form/games.py +0 -0
  31. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/normal_form/gift-exchange-game.json +0 -0
  32. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/normal_form/matching-pennies.json +0 -0
  33. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/normal_form/prisoners-dilemma.json +0 -0
  34. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/normal_form/pure-coordination.json +0 -0
  35. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/normal_form/rock-paper-scissors-plus.json +0 -0
  36. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/normal_form/rock-paper-scissors.json +0 -0
  37. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/normal_form/rock-paper-superscissors.json +0 -0
  38. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/games/normal_form/stag-hunt.json +0 -0
  39. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/solvers/__init__.py +0 -0
  40. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/solvers/linear_programming.py +0 -0
  41. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/tests/__init__.py +0 -0
  42. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/tests/test_sequence_form_polytopes.py +0 -0
  43. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret/utilities.py +0 -0
  44. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret.egg-info/dependency_links.txt +0 -0
  45. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret.egg-info/requires.txt +0 -0
  46. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/noregret.egg-info/top_level.txt +0 -0
  47. {noregret-0.0.0.dev4 → noregret-0.0.0.dev5}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: noregret
3
- Version: 0.0.0.dev4
3
+ Version: 0.0.0.dev5
4
4
  Summary: No-regret learning dynamics
5
5
  Home-page: https://github.com/uoftcprg/noregret
6
6
  Author: Universal, Open, Free, and Transparent Computer Poker Research Group
@@ -94,8 +94,8 @@ The code snippet below demonstrates how one can solve games via regret minimizat
94
94
  KERNEL = nr.FloatingPointKernel()
95
95
  GAMES = {
96
96
  'Rock paper superscissors': nr.to_efg(nr.RockPaperSuperscissors(KERNEL)),
97
- 'Kuhn poker': nr.from_open_spiel(KERNEL, 'kuhn_poker'),
98
- 'Leduc poker': nr.from_open_spiel(KERNEL, 'leduc_poker'),
97
+ 'Kuhn poker': nr.to_efg(KERNEL, nr.from_open_spiel('kuhn_poker')),
98
+ 'Leduc poker': nr.to_efg(KERNEL, nr.from_open_spiel('leduc_poker')),
99
99
  }
100
100
  PARAMETERS = {
101
101
  'CFR': (nr.CFR, False, False),
@@ -180,7 +180,7 @@ The code snippet below demonstrates how one can solve games while leveraging GPU
180
180
  import noregret as nr
181
181
 
182
182
  KERNEL = nr.CUDAKernel()
183
- GAME = nr.from_open_spiel(KERNEL, 'liars_dice')
183
+ GAME = nr.to_efg(KERNEL, nr.from_open_spiel('liars_dice'))
184
184
  PARAMETERS = nr.CFR, True, False
185
185
 
186
186
 
@@ -220,8 +220,8 @@ The code snippet below demonstrates how one can solve games via linear programmi
220
220
  KERNEL = nr.FloatingPointKernel()
221
221
  GAMES = {
222
222
  'Rock paper superscissors': nr.RockPaperSuperscissors(KERNEL),
223
- 'Kuhn poker': nr.from_open_spiel(KERNEL, 'kuhn_poker'),
224
- 'Leduc poker': nr.from_open_spiel(KERNEL, 'leduc_poker'),
223
+ 'Kuhn poker': nr.to_efg(KERNEL, nr.from_open_spiel('kuhn_poker')),
224
+ 'Leduc poker': nr.to_efg(KERNEL, nr.from_open_spiel('leduc_poker')),
225
225
  }
226
226
 
227
227
 
@@ -236,57 +236,6 @@ The code snippet below demonstrates how one can solve games via linear programmi
236
236
  if __name__ == '__main__':
237
237
  main()
238
238
 
239
- Conduct Research in Online Convex Optimization
240
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
241
-
242
- The code snippet below reproduces Leme, Piliouras, and Schneider (NeurIPS, 2024) using NoRegret.
243
-
244
- .. code-block:: python
245
-
246
- from functools import partial
247
-
248
- import matplotlib.pyplot as plt
249
- import noregret as nr
250
-
251
- KERNEL = nr.FloatingPointKernel()
252
- GAME = nr.RockPaperScissorsPlus(KERNEL)
253
- R_type = partial(nr.MWU, learning_rate=1e-3)
254
-
255
-
256
- def main():
257
- RM = R_type(KERNEL, GAME.row_dimension, is_time_symmetric=False)
258
- BM_RM = nr.BM(KERNEL, GAME.row_dimension, R_type, is_time_symmetric=False)
259
-
260
- nr.symmetric_regret_minimization(GAME, RM, iteration_count=100000)
261
- nr.symmetric_regret_minimization(GAME, BM_RM, iteration_count=100000)
262
- x, _ = nr.linear_programming(GAME)
263
-
264
- strategies = KERNEL.numpy.array(RM.strategies)
265
-
266
- plt.clf()
267
- plt.plot(strategies[:, 0], strategies[:, 1])
268
- plt.plot(strategies[-1, 0], strategies[-1, 1], 'bo')
269
- plt.plot(*x[:2], 'ro')
270
- plt.xlabel('Probability of action 1')
271
- plt.ylabel('Probability of action 2')
272
- plt.title('No-external regret dynamics')
273
- plt.show()
274
-
275
- strategies = KERNEL.numpy.array(BM_RM.strategies)
276
-
277
- plt.clf()
278
- plt.plot(strategies[:, 0], strategies[:, 1])
279
- plt.plot(strategies[-1, 0], strategies[-1, 1], 'bo')
280
- plt.plot(*x[:2], 'ro')
281
- plt.xlabel('Probability of action 1')
282
- plt.ylabel('Probability of action 2')
283
- plt.title('No-swap regret dynamics')
284
- plt.show()
285
-
286
-
287
- if __name__ == '__main__':
288
- main()
289
-
290
239
  Testing and Validation
291
240
  ----------------------
292
241
 
@@ -44,8 +44,8 @@ The code snippet below demonstrates how one can solve games via regret minimizat
44
44
  KERNEL = nr.FloatingPointKernel()
45
45
  GAMES = {
46
46
  'Rock paper superscissors': nr.to_efg(nr.RockPaperSuperscissors(KERNEL)),
47
- 'Kuhn poker': nr.from_open_spiel(KERNEL, 'kuhn_poker'),
48
- 'Leduc poker': nr.from_open_spiel(KERNEL, 'leduc_poker'),
47
+ 'Kuhn poker': nr.to_efg(KERNEL, nr.from_open_spiel('kuhn_poker')),
48
+ 'Leduc poker': nr.to_efg(KERNEL, nr.from_open_spiel('leduc_poker')),
49
49
  }
50
50
  PARAMETERS = {
51
51
  'CFR': (nr.CFR, False, False),
@@ -130,7 +130,7 @@ The code snippet below demonstrates how one can solve games while leveraging GPU
130
130
  import noregret as nr
131
131
 
132
132
  KERNEL = nr.CUDAKernel()
133
- GAME = nr.from_open_spiel(KERNEL, 'liars_dice')
133
+ GAME = nr.to_efg(KERNEL, nr.from_open_spiel('liars_dice'))
134
134
  PARAMETERS = nr.CFR, True, False
135
135
 
136
136
 
@@ -170,8 +170,8 @@ The code snippet below demonstrates how one can solve games via linear programmi
170
170
  KERNEL = nr.FloatingPointKernel()
171
171
  GAMES = {
172
172
  'Rock paper superscissors': nr.RockPaperSuperscissors(KERNEL),
173
- 'Kuhn poker': nr.from_open_spiel(KERNEL, 'kuhn_poker'),
174
- 'Leduc poker': nr.from_open_spiel(KERNEL, 'leduc_poker'),
173
+ 'Kuhn poker': nr.to_efg(KERNEL, nr.from_open_spiel('kuhn_poker')),
174
+ 'Leduc poker': nr.to_efg(KERNEL, nr.from_open_spiel('leduc_poker')),
175
175
  }
176
176
 
177
177
 
@@ -186,57 +186,6 @@ The code snippet below demonstrates how one can solve games via linear programmi
186
186
  if __name__ == '__main__':
187
187
  main()
188
188
 
189
- Conduct Research in Online Convex Optimization
190
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
191
-
192
- The code snippet below reproduces Leme, Piliouras, and Schneider (NeurIPS, 2024) using NoRegret.
193
-
194
- .. code-block:: python
195
-
196
- from functools import partial
197
-
198
- import matplotlib.pyplot as plt
199
- import noregret as nr
200
-
201
- KERNEL = nr.FloatingPointKernel()
202
- GAME = nr.RockPaperScissorsPlus(KERNEL)
203
- R_type = partial(nr.MWU, learning_rate=1e-3)
204
-
205
-
206
- def main():
207
- RM = R_type(KERNEL, GAME.row_dimension, is_time_symmetric=False)
208
- BM_RM = nr.BM(KERNEL, GAME.row_dimension, R_type, is_time_symmetric=False)
209
-
210
- nr.symmetric_regret_minimization(GAME, RM, iteration_count=100000)
211
- nr.symmetric_regret_minimization(GAME, BM_RM, iteration_count=100000)
212
- x, _ = nr.linear_programming(GAME)
213
-
214
- strategies = KERNEL.numpy.array(RM.strategies)
215
-
216
- plt.clf()
217
- plt.plot(strategies[:, 0], strategies[:, 1])
218
- plt.plot(strategies[-1, 0], strategies[-1, 1], 'bo')
219
- plt.plot(*x[:2], 'ro')
220
- plt.xlabel('Probability of action 1')
221
- plt.ylabel('Probability of action 2')
222
- plt.title('No-external regret dynamics')
223
- plt.show()
224
-
225
- strategies = KERNEL.numpy.array(BM_RM.strategies)
226
-
227
- plt.clf()
228
- plt.plot(strategies[:, 0], strategies[:, 1])
229
- plt.plot(strategies[-1, 0], strategies[-1, 1], 'bo')
230
- plt.plot(*x[:2], 'ro')
231
- plt.xlabel('Probability of action 1')
232
- plt.ylabel('Probability of action 2')
233
- plt.title('No-swap regret dynamics')
234
- plt.show()
235
-
236
-
237
- if __name__ == '__main__':
238
- main()
239
-
240
189
  Testing and Validation
241
190
  ----------------------
242
191
 
@@ -2,6 +2,7 @@
2
2
  from noregret.games import (
3
3
  AssuranceGame,
4
4
  BattleOfTheSexes,
5
+ BlackBoxGame,
5
6
  Chicken,
6
7
  ExtensiveFormGame,
7
8
  from_open_spiel,
@@ -36,6 +37,7 @@ from noregret.kernels import (
36
37
  from noregret.regret_minimizers import (
37
38
  BlumMansour,
38
39
  CounterfactualRegretMinimization,
40
+ CounterfactualRegretMinimization2,
39
41
  CounterfactualRegretMinimizationPlus,
40
42
  DiscountedCounterfactualRegretMinimization,
41
43
  DiscountedRegretMatching,
@@ -65,6 +67,8 @@ BM = BlumMansour
65
67
  """Alias for :class:`noregret.BlumMansour`."""
66
68
  CFR = CounterfactualRegretMinimization
67
69
  """Alias for :class:`noregret.CounterfactualRegretMinimization`."""
70
+ CFR2 = CounterfactualRegretMinimization2
71
+ """Alias for :class:`noregret.CounterfactualRegretMinimization2`."""
68
72
  CFR_plus = CounterfactualRegretMinimizationPlus
69
73
  """Alias for :class:`noregret.CounterfactualRegretMinimizationPlus`."""
70
74
  DCFR = DiscountedCounterfactualRegretMinimization
@@ -111,12 +115,14 @@ to_efg = to_extensive_form
111
115
  __all__ = (
112
116
  'AssuranceGame',
113
117
  'BattleOfTheSexes',
118
+ 'BlackBoxGame',
114
119
  'BlumMansour',
115
120
  'BM',
116
121
  'CFR',
117
122
  'CFR_plus',
118
123
  'Chicken',
119
124
  'CounterfactualRegretMinimization',
125
+ 'CounterfactualRegretMinimization2',
120
126
  'CounterfactualRegretMinimizationPlus',
121
127
  'CUDAKernel',
122
128
  'DCFR',
@@ -1,4 +1,5 @@
1
1
  """Module for games."""
2
+ from noregret.games.black_box import BlackBoxGame, from_open_spiel
2
3
  from noregret.games.extensive_form import (
3
4
  ExtensiveFormGame,
4
5
  TwoPlayerExtensiveFormGame,
@@ -26,11 +27,12 @@ from noregret.games.normal_form import (
26
27
  TwoPlayerNormalFormGame,
27
28
  TwoPlayerZeroSumNormalFormGame,
28
29
  )
29
- from noregret.games.utilities import from_open_spiel, to_extensive_form
30
+ from noregret.games.utilities import to_extensive_form
30
31
 
31
32
  __all__ = (
32
33
  'AssuranceGame',
33
34
  'BattleOfTheSexes',
35
+ 'BlackBoxGame',
34
36
  'Chicken',
35
37
  'ExtensiveFormGame',
36
38
  'from_open_spiel',
@@ -0,0 +1,200 @@
1
+ """Module for black box games."""
2
+ from abc import ABC, abstractmethod
3
+ from dataclasses import dataclass, field
4
+ from functools import partial
5
+
6
+ from ordered_set import OrderedSet
7
+ from pyspiel import GameType, load_game
8
+
9
+
10
+ @dataclass
11
+ class BlackBoxGame(ABC):
12
+ """Abstract base class for black box games."""
13
+
14
+ @property
15
+ @abstractmethod
16
+ def player_count(self):
17
+ """Return the number of players.
18
+
19
+ :return: Number of players.
20
+ """
21
+
22
+ @property
23
+ def is_two_player(self):
24
+ """Return whether the game is two-player.
25
+
26
+ :return: Whether the game is two-player.
27
+ """
28
+ return self.player_count == 2
29
+
30
+ @property
31
+ @abstractmethod
32
+ def is_zero_sum(self):
33
+ """Return whether the game is zero-sum.
34
+
35
+ :return: Whether the game is zero-sum.
36
+ """
37
+
38
+ @property
39
+ @abstractmethod
40
+ def root_node(self):
41
+ """Return the root node.
42
+
43
+ :return: Root node.
44
+ """
45
+
46
+ @abstractmethod
47
+ def actions(self, node):
48
+ """Return the actions given a node.
49
+
50
+ :param node: Node.
51
+ :return: Actions.
52
+ """
53
+
54
+ @abstractmethod
55
+ def apply(self, node, action):
56
+ """Return the child node given a node and an action.
57
+
58
+ :param node: Node.
59
+ :param action: Action.
60
+ :return: Child node.
61
+ """
62
+
63
+ def children(self, node):
64
+ """Return the children given a node.
65
+
66
+ :return: Children.
67
+ """
68
+ return list(map(partial(self.apply, node), self.actions(node)))
69
+
70
+ def actions_and_children(self, node):
71
+ """Return the actions and children given a node.
72
+
73
+ :return: Actions and children.
74
+ """
75
+ A = self.actions(node)
76
+
77
+ return A, list(map(partial(self.apply, node), A))
78
+
79
+ @abstractmethod
80
+ def player(self, node):
81
+ """Return the player given a node.
82
+
83
+ :param node: Node.
84
+ :return: Player.
85
+ """
86
+
87
+ @abstractmethod
88
+ def utility(self, player, node):
89
+ """Return the utility given a player and a node.
90
+
91
+ :param player: Player.
92
+ :param node: Node.
93
+ :return: Utility.
94
+ """
95
+
96
+ def utilities(self, node):
97
+ """Return the utilities given a node.
98
+
99
+ :param node: Node.
100
+ :return: Utilities.
101
+ """
102
+ P = range(self.player_count)
103
+
104
+ return list(map(self.utility(i, node) for i in P))
105
+
106
+ @abstractmethod
107
+ def information_set(self, node):
108
+ """Return the information set given a node.
109
+
110
+ :param node: Node.
111
+ :return: information set.
112
+ """
113
+
114
+ @abstractmethod
115
+ def chance_probability(self, node, action):
116
+ """Return the chance probability given a node and an action.
117
+
118
+ :param node: Node.
119
+ :param action: Action.
120
+ :return: Chance probability.
121
+ """
122
+
123
+ def chance_probabilities(self, node):
124
+ """Return the chance probabilities given a node.
125
+
126
+ :param node: Node.
127
+ :return: Chance probabilities.
128
+ """
129
+ A = self.actions(node)
130
+
131
+ return list(map(self.chance_probability(node, a) for a in A))
132
+
133
+
134
+ @dataclass
135
+ class _OpenSpielBlackBoxGame(BlackBoxGame):
136
+ game: str
137
+ _game: str = field(init=False)
138
+
139
+ def __post_init__(self):
140
+ self._game = load_game(self.game)
141
+
142
+ @property
143
+ def player_count(self):
144
+ return self._game.num_players()
145
+
146
+ @property
147
+ def is_zero_sum(self):
148
+ return self._game.get_type().utility == GameType.Utility.ZERO_SUM
149
+
150
+ @property
151
+ def root_node(self):
152
+ return self._game.new_initial_state()
153
+
154
+ def actions(self, node):
155
+ return OrderedSet(map(node.action_to_string, node.legal_actions()))
156
+
157
+ def apply(self, node, action):
158
+ return node.child(node.string_to_action(action))
159
+
160
+ def children(self, node):
161
+ return list(node.child(a) for a in node.legal_actions())
162
+
163
+ def actions_and_children(self, node):
164
+ actions = []
165
+ children = []
166
+
167
+ for a in node.legal_actions():
168
+ actions.append(node.action_to_string(a))
169
+ children.append(node.child(a))
170
+
171
+ return actions, children
172
+
173
+ def player(self, node):
174
+ i = node.current_player()
175
+
176
+ return None if i == -1 else i
177
+
178
+ def utility(self, player, node):
179
+ return node.player_reward(player)
180
+
181
+ def utilities(self, node):
182
+ return node.rewards()
183
+
184
+ def information_set(self, node):
185
+ return node.information_state_string()
186
+
187
+ def chance_probability(self, node, action):
188
+ return node.chance_outcomes()[self.actions(node).index(action)][1]
189
+
190
+ def chance_probabilities(self, node):
191
+ return [p for _, p in node.chance_outcomes()]
192
+
193
+
194
+ def from_open_spiel(game):
195
+ """Load a game from OpenSpiel.
196
+
197
+ :param game: Game in OpenSpiel.
198
+ :return: Game.
199
+ """
200
+ return _OpenSpielBlackBoxGame(game)
@@ -22,6 +22,7 @@ class Game(ABC):
22
22
  :return: Number of players.
23
23
  """
24
24
 
25
+ @property
25
26
  @abstractmethod
26
27
  def is_symmetric(self):
27
28
  """Return whether the game is symmetric.
@@ -39,6 +39,7 @@ class MultilinearGame(Game, ABC):
39
39
  """
40
40
  return tuple(self.dimension(i) for i in range(self.player_count))
41
41
 
42
+ @property
42
43
  def is_symmetric(self):
43
44
  raise NotImplementedError
44
45
 
@@ -100,6 +101,7 @@ class TwoPlayerMultilinearGame(TwoPlayerGame, MultilinearGame, ABC):
100
101
  """
101
102
  return self.payoffs[1]
102
103
 
104
+ @property
103
105
  def is_symmetric(self):
104
106
  np = self.kernel.numpy
105
107
 
@@ -0,0 +1,140 @@
1
+ from collections import defaultdict
2
+ from functools import partial, singledispatch
3
+ from itertools import starmap
4
+
5
+ from ordered_set import OrderedSet
6
+ from scipy.sparse import lil_array
7
+
8
+ from noregret.games.black_box import BlackBoxGame
9
+ from noregret.games.extensive_form.games import (
10
+ ExtensiveFormGame,
11
+ TwoPlayerExtensiveFormGame,
12
+ TwoPlayerZeroSumExtensiveFormGame,
13
+ )
14
+ from noregret.games.games import Game
15
+ from noregret.games.normal_form.games import (
16
+ NormalFormGame,
17
+ TwoPlayerNormalFormGame,
18
+ TwoPlayerZeroSumNormalFormGame,
19
+ )
20
+ from noregret.sequence_form_polytopes import SequenceFormPolytope
21
+
22
+
23
+ def _nfg2efg(kernel, game, decision_points=str):
24
+ np = kernel.numpy
25
+ scipy = kernel.scipy
26
+ dtype = kernel.data_type
27
+
28
+ if isinstance(game, TwoPlayerZeroSumNormalFormGame):
29
+ type_ = TwoPlayerZeroSumExtensiveFormGame
30
+ elif isinstance(game, TwoPlayerNormalFormGame):
31
+ type_ = TwoPlayerExtensiveFormGame
32
+ else:
33
+ type_ = ExtensiveFormGame
34
+
35
+ d = game.dimensions
36
+
37
+ if isinstance(game, TwoPlayerZeroSumNormalFormGame):
38
+ payoffs = np.zeros(tuple(n + 1 for n in d), dtype)
39
+ payoffs[tuple(slice(1, None) for _ in d)] = game.payoffs
40
+ else:
41
+ payoffs = np.zeros((game.player_count, *(n + 1 for n in d)), dtype)
42
+ payoffs[:, *(slice(1, None) for _ in d)] = game.payoffs
43
+
44
+ payoffs = scipy.sparse.csr_array(payoffs)
45
+ sfps = []
46
+
47
+ for i, A_j in enumerate(game.actions):
48
+ j = decision_points(i)
49
+ sfp = SequenceFormPolytope(kernel, {j: A_j}, {j: None})
50
+
51
+ sfps.append(sfp)
52
+
53
+ sfps = tuple(sfps)
54
+
55
+ return type_(kernel, payoffs, sfps)
56
+
57
+
58
+ def _bbg2efg(kernel, game):
59
+ scipy = kernel.scipy
60
+ dtype = kernel.data_type
61
+ P = range(game.player_count)
62
+ A_js = [defaultdict(OrderedSet) for _ in P]
63
+ p_js = [{} for _ in P]
64
+ raw_payoffs = [defaultdict(int) for _ in P]
65
+
66
+ def dfs(h, p, seqs, us):
67
+ A_j, h_primes = game.actions_and_children(h)
68
+ i = game.player(h)
69
+ us = us.copy()
70
+
71
+ for i_prime, v in enumerate(game.utilities(h)):
72
+ us[i_prime] += v
73
+
74
+ if not A_j:
75
+ seqs = tuple(seqs)
76
+
77
+ for i_prime, u in enumerate(us):
78
+ raw_payoffs[i_prime][seqs] += p * u
79
+ elif i is None:
80
+ p_primes = game.chance_probabilities(h)
81
+
82
+ for h_prime, p_prime in zip(h_primes, p_primes):
83
+ dfs(h_prime, p_prime * p, seqs, us)
84
+ else:
85
+ j = game.information_set(h)
86
+ p_j = seqs[i]
87
+ p_js[i][j] = p_j
88
+
89
+ for a, h_prime in zip(A_j, h_primes):
90
+ next_seqs = seqs.copy()
91
+ next_seqs[i] = j, a
92
+
93
+ A_js[i][j].add(a)
94
+ dfs(h_prime, p, next_seqs, us)
95
+
96
+ dfs(game.root_node, 1, [None for _ in P], [0 for _ in P])
97
+
98
+ SFP = partial(SequenceFormPolytope, kernel)
99
+ sfps = tuple(starmap(SFP, zip(A_js, p_js)))
100
+ dimensions = tuple(sfp.column_count for sfp in sfps)
101
+
102
+ if game.is_two_player and game.is_zero_sum:
103
+ type_ = TwoPlayerZeroSumExtensiveFormGame
104
+ payoffs = lil_array(dimensions, dtype=dtype)
105
+
106
+ for seqs, u in raw_payoffs[0].items():
107
+ indices = []
108
+
109
+ for sfp, seq in zip(sfps, seqs):
110
+ indices.append(sfp.column(seq))
111
+
112
+ payoffs[tuple(indices)] = u
113
+
114
+ payoffs = scipy.sparse.csr_array(payoffs)
115
+ else:
116
+ raise NotImplementedError
117
+
118
+ return type_(kernel, payoffs, sfps)
119
+
120
+
121
+ @singledispatch
122
+ def to_extensive_form(kernel, game):
123
+ """Convert a given game to an extensive-form game.
124
+
125
+ :param game: Game.
126
+ :return: Extensive-form game.
127
+ """
128
+ if isinstance(game, NormalFormGame):
129
+ game = _nfg2efg(kernel, game)
130
+ elif isinstance(game, BlackBoxGame):
131
+ game = _bbg2efg(kernel, game)
132
+ else:
133
+ raise ValueError('unknown game')
134
+
135
+ return game
136
+
137
+
138
+ @to_extensive_form.register
139
+ def _(game: Game):
140
+ return to_extensive_form(game.kernel, game)
@@ -9,7 +9,6 @@ from noregret.utilities import import_object
9
9
  @dataclass(repr=False)
10
10
  class Kernel(ABC):
11
11
  """Abstract base class for kernels."""
12
-
13
12
  data_type: Any = float
14
13
  """Data type."""
15
14
  index_type: Any = int
@@ -19,6 +19,7 @@ from noregret.regret_minimizers.probability_simplices import (
19
19
  )
20
20
  from noregret.regret_minimizers.sequence_form_polytopes import (
21
21
  CounterfactualRegretMinimization,
22
+ CounterfactualRegretMinimization2,
22
23
  CounterfactualRegretMinimizationPlus,
23
24
  DiscountedCounterfactualRegretMinimization,
24
25
  SequenceFormPolytopeRegretMinimizer,
@@ -27,6 +28,7 @@ from noregret.regret_minimizers.sequence_form_polytopes import (
27
28
  __all__ = (
28
29
  'BlumMansour',
29
30
  'CounterfactualRegretMinimization',
31
+ 'CounterfactualRegretMinimization2',
30
32
  'CounterfactualRegretMinimizationPlus',
31
33
  'DiscountedCounterfactualRegretMinimization',
32
34
  'DiscountedRegretMatching',
@@ -270,6 +270,8 @@ class BlumMansour(ProbabilitySimplexSwapRegretMinimizer):
270
270
  """External regret minimizeres."""
271
271
 
272
272
  def __post_init__(self):
273
+ super().__post_init__()
274
+
273
275
  n = self.dimension
274
276
  R_type = partial(self.regret_minimizer_type, self.kernel)
275
277
  self.external_regret_minimizers = tuple(map(R_type, repeat(n, n)))
@@ -38,13 +38,18 @@ class RegretMinimizer(ABC):
38
38
  """Strategies."""
39
39
  utilities: list[Any] = field(default_factory=list)
40
40
  """Utilities."""
41
- _next_strategy: Any = None
41
+ dimension: int = field(init=False)
42
+ """Dimension."""
43
+ _next_strategy: Any = field(default=None, init=False)
44
+
45
+ def __post_init__(self):
46
+ pass
42
47
 
43
48
  @property
44
49
  def next_strategy(self):
45
50
  """Return the next strategy.
46
51
 
47
- :return: The next strategy.
52
+ :return: Next strategy.
48
53
  """
49
54
  return self._next_strategy
50
55
 
@@ -5,14 +5,14 @@ from typing import Any
5
5
  from abc import ABC
6
6
 
7
7
  from noregret.regret_minimizers.probability_simplices import (
8
- DiscountedRegretMatching,
8
+ ProbabilitySimplexRegretMinimizer,
9
9
  RegretMatching,
10
- RegretMatchingPlus,
11
10
  )
12
11
  from noregret.regret_minimizers.regret_minimizers import (
13
12
  DiscountedRegretMinimizer,
14
13
  RegretMinimizer,
15
14
  )
15
+ from noregret.sequence_form_polytopes import SequenceFormPolytope
16
16
 
17
17
 
18
18
  @dataclass
@@ -20,7 +20,7 @@ class SequenceFormPolytopeRegretMinimizer(RegretMinimizer, ABC):
20
20
  """Abstract base class for regret minimizers operating over
21
21
  sequence-form polytopes.
22
22
  """
23
- sequence_form_polytope: Any
23
+ sequence_form_polytope: SequenceFormPolytope
24
24
  """Sequence-form polytope."""
25
25
  _: KW_ONLY
26
26
  previous_behavioral_strategy: Any = 0.0
@@ -73,8 +73,6 @@ class SequenceFormPolytopeRegretMinimizer(RegretMinimizer, ABC):
73
73
  @dataclass
74
74
  class CounterfactualRegretMinimization(SequenceFormPolytopeRegretMinimizer):
75
75
  """Class for counterfactual regret minimization (CFR)."""
76
- regret_minimizer_type: Any = RegretMatching
77
- """Regret minimizer type."""
78
76
 
79
77
  def _theta(self, m):
80
78
  np = self.kernel.numpy
@@ -99,11 +97,9 @@ class CounterfactualRegretMinimization(SequenceFormPolytopeRegretMinimizer):
99
97
 
100
98
  def output(self, prediction=False):
101
99
  theta = self._theta(prediction)
102
- normalize = self.sequence_form_polytope.normalize
103
- self.next_behavioral_strategy = normalize(theta)
104
- self.next_strategy = self.sequence_form_polytope.to_sequence_form(
105
- self.next_behavioral_strategy,
106
- )
100
+ b = self.sequence_form_polytope.normalize(theta)
101
+ self.next_behavioral_strategy = b
102
+ self.next_strategy = self.sequence_form_polytope.to_sequence_form(b)
107
103
 
108
104
  return self.next_strategy
109
105
 
@@ -111,7 +107,6 @@ class CounterfactualRegretMinimization(SequenceFormPolytopeRegretMinimizer):
111
107
  @dataclass
112
108
  class CounterfactualRegretMinimizationPlus(CounterfactualRegretMinimization):
113
109
  """Class for counterfactual regret minimization+ (CFR+)."""
114
- regret_minimizer_type: Any = RegretMatchingPlus
115
110
  _: KW_ONLY
116
111
  floored_cumulative_counterfactual_regrets: Any = 0.0
117
112
  """Floored cumulative counterfactual regrets."""
@@ -156,7 +151,6 @@ class DiscountedCounterfactualRegretMinimization(
156
151
  DiscountedRegretMinimizer,
157
152
  ):
158
153
  """Class for discounted counterfactual regret minimization+ (DCFR)."""
159
- regret_minimizer_type: Any = DiscountedRegretMatching
160
154
  _: KW_ONLY
161
155
  discounted_counterfactual_regrets: Any = 0.0
162
156
  """Discounted counterfactual regrets."""
@@ -195,3 +189,98 @@ class DiscountedCounterfactualRegretMinimization(
195
189
  T = self.iteration_count
196
190
  r[r > 0] *= T ** self.alpha / (T ** self.alpha + 1)
197
191
  r[r < 0] *= T ** self.beta / (T ** self.beta + 1)
192
+
193
+
194
+ @dataclass
195
+ class CounterfactualRegretMinimization2(SequenceFormPolytopeRegretMinimizer):
196
+ """Class for counterfactual regret minimization (CFR).
197
+
198
+ This is an alternative to :class:`CounterfactualRegretMinimization`.
199
+
200
+ Do **not** use this class unless it is absolutely necessary.
201
+
202
+ Main advantage: Arbitrary local regret minimizers.
203
+
204
+ Main disadvantage: **Slow** and unparallelizable.
205
+ """
206
+ regret_minimizer_type: type[ProbabilitySimplexRegretMinimizer] = (
207
+ RegretMatching
208
+ )
209
+ """Regret minimizer type."""
210
+ _: KW_ONLY
211
+ regret_minimizers: dict[str, ProbabilitySimplexRegretMinimizer] = field(
212
+ default_factory=dict,
213
+ init=False,
214
+ )
215
+ """Regret minimizers."""
216
+
217
+ def __post_init__(self):
218
+ super().__post_init__()
219
+
220
+ R_type = self.regret_minimizer_type
221
+ A = self.sequence_form_polytope.actions
222
+ J = self.sequence_form_polytope.decision_points
223
+
224
+ for j in J:
225
+ self.regret_minimizers[j] = R_type(self.kernel, len(A[j]))
226
+
227
+ def output(self, prediction=False):
228
+ np = self.kernel.numpy
229
+ dtype = self.kernel.data_type
230
+ A = self.sequence_form_polytope.actions
231
+ J = self.sequence_form_polytope.decision_points
232
+ seqs = self.sequence_form_polytope.non_empty_sequences
233
+
234
+ if prediction is False or prediction is True:
235
+ predictions = {j: prediction for j in J}
236
+ else:
237
+ predictions = {}
238
+ m = self.sequence_form_polytope.counterfactual_utilities(
239
+ prediction,
240
+ )
241
+
242
+ for j in J:
243
+ m_j = []
244
+
245
+ for a in A[j]:
246
+ m_j.append(m[seqs.index((j, a))])
247
+
248
+ predictions[j] = np.array(m_j, dtype)
249
+
250
+ b = np.empty(len(seqs), dtype)
251
+
252
+ for j, R in self.regret_minimizers.items():
253
+ x = R.output(predictions[j])
254
+
255
+ for a, p in zip(A[j], x):
256
+ b[seqs.index((j, a))] = p
257
+
258
+ self.next_behavioral_strategy = b
259
+ self.next_strategy = self.sequence_form_polytope.to_sequence_form(b)
260
+
261
+ return self.next_strategy
262
+
263
+ def observe(self, utility):
264
+ super().observe(utility)
265
+
266
+ np = self.kernel.numpy
267
+ dtype = self.kernel.data_type
268
+ A = self.sequence_form_polytope.actions
269
+ J = self.sequence_form_polytope.decision_points
270
+ seqs = self.sequence_form_polytope.non_empty_sequences
271
+ u = self.sequence_form_polytope.counterfactual_utilities(
272
+ self.previous_behavioral_strategy,
273
+ utility,
274
+ )
275
+ counterfactual_utilities = {}
276
+
277
+ for j in J:
278
+ u_j = []
279
+
280
+ for a in A[j]:
281
+ u_j.append(u[seqs.index((j, a))])
282
+
283
+ counterfactual_utilities[j] = np.array(u_j, dtype)
284
+
285
+ for j, R in self.regret_minimizers.items():
286
+ R.observe(counterfactual_utilities[j])
@@ -127,13 +127,13 @@ class SequenceFormPolytope:
127
127
 
128
128
  :return: Non-empty sequences.
129
129
  """
130
- sequences = OrderedSet()
130
+ seqs = OrderedSet()
131
131
 
132
132
  for j in self.decision_points:
133
133
  for a in self.actions[j]:
134
- sequences.add((j, a))
134
+ seqs.add((j, a))
135
135
 
136
- return sequences
136
+ return seqs
137
137
 
138
138
  @property
139
139
  def row_count(self):
@@ -110,7 +110,7 @@ def symmetric_regret_minimization(
110
110
  """
111
111
  np = game.kernel.numpy
112
112
 
113
- if not game.is_symmetric():
113
+ if not game.is_symmetric:
114
114
  raise ValueError('game is asymmetric')
115
115
 
116
116
  R = regret_minimizer
@@ -37,8 +37,8 @@ class ExtensiveFormGameTestCase(TestCase):
37
37
  nr.to_efg(nr.RockPaperScissors(KERNEL)),
38
38
  nr.to_efg(nr.RockPaperScissorsPlus(KERNEL)),
39
39
  nr.to_efg(nr.RockPaperSuperscissors(KERNEL)),
40
- nr.from_open_spiel(KERNEL, 'kuhn_poker'),
41
- nr.from_open_spiel(KERNEL, 'leduc_poker'),
40
+ nr.to_efg(KERNEL, nr.from_open_spiel('kuhn_poker')),
41
+ nr.to_efg(KERNEL, nr.from_open_spiel('leduc_poker')),
42
42
  )
43
43
 
44
44
  def test_serialization(self):
@@ -14,8 +14,8 @@ class LinearProgrammingTestCase(TestCase):
14
14
  (nr.to_efg(nr.RockPaperScissors(KERNEL)), 0),
15
15
  (nr.to_efg(nr.RockPaperScissorsPlus(KERNEL)), 0),
16
16
  (nr.to_efg(nr.RockPaperSuperscissors(KERNEL)), 0),
17
- (nr.from_open_spiel(KERNEL, 'kuhn_poker'), -1 / 18),
18
- (nr.from_open_spiel(KERNEL, 'leduc_poker'), -0.08560642407800048),
17
+ (nr.to_efg(KERNEL, nr.from_open_spiel('kuhn_poker')), -1 / 18),
18
+ (nr.to_efg(KERNEL, nr.from_open_spiel('leduc_poker')), -0.08560642408),
19
19
  )
20
20
 
21
21
  def test_linear_programming(self):
@@ -82,7 +82,7 @@ class ProbabilitySimplexRegretMinimizationTestCase(TestCase):
82
82
  dtype = self.KERNEL.data_type
83
83
 
84
84
  for game, value in self.SYMMETRIC_GAME_VALUES:
85
- assert game.is_symmetric()
85
+ assert game.is_symmetric
86
86
  assert isinstance(game, nr.NFG_2p0s)
87
87
 
88
88
  for R_type in self.REGRET_MINIMIZER_TYPES:
@@ -110,8 +110,8 @@ class SequenceFormPolytopeRegretMinimizationTestCase(TestCase):
110
110
  (nr.to_efg(nr.RockPaperScissors(KERNEL)), 0),
111
111
  (nr.to_efg(nr.RockPaperScissorsPlus(KERNEL)), 0),
112
112
  (nr.to_efg(nr.RockPaperSuperscissors(KERNEL)), 0),
113
- (nr.from_open_spiel(KERNEL, 'kuhn_poker'), -1 / 18),
114
- (nr.from_open_spiel(KERNEL, 'leduc_poker'), -0.08560642407800048),
113
+ (nr.to_efg(KERNEL, nr.from_open_spiel('kuhn_poker')), -1 / 18),
114
+ (nr.to_efg(KERNEL, nr.from_open_spiel('leduc_poker')), -0.08560642408),
115
115
  )
116
116
  REGRET_MINIMIZION_PARAMETERS = (
117
117
  (partial(nr.CFR, KERNEL), False, False),
@@ -150,5 +150,42 @@ class SequenceFormPolytopeRegretMinimizationTestCase(TestCase):
150
150
  self.assertEqual(v.dtype, dtype)
151
151
 
152
152
 
153
+ class SequenceFormPolytopeRegretMinimization2TestCase(TestCase):
154
+ KERNEL = nr.FloatingPointKernel()
155
+ GAMES = (
156
+ nr.to_efg(nr.MatchingPennies(KERNEL)),
157
+ nr.to_efg(nr.RockPaperScissors(KERNEL)),
158
+ nr.to_efg(nr.RockPaperScissorsPlus(KERNEL)),
159
+ nr.to_efg(nr.RockPaperSuperscissors(KERNEL)),
160
+ nr.to_efg(KERNEL, nr.from_open_spiel('kuhn_poker')),
161
+ nr.to_efg(KERNEL, nr.from_open_spiel('leduc_poker')),
162
+ )
163
+ PLACES = 6
164
+
165
+ def test_equivalence(self):
166
+ for game in self.GAMES:
167
+ assert isinstance(game, nr.EFG_2p0s)
168
+
169
+ x_bar, y_bar = nr.regret_minimization(
170
+ game,
171
+ nr.CFR(self.KERNEL, game.row_sequence_form_polytope),
172
+ nr.CFR(self.KERNEL, game.column_sequence_form_polytope),
173
+ progress_bar=False,
174
+ )
175
+ e = game.exploitability(x_bar, y_bar)
176
+ v = game.expected_row_utility(x_bar, y_bar)
177
+ x_bar2, y_bar2 = nr.regret_minimization(
178
+ game,
179
+ nr.CFR2(self.KERNEL, game.row_sequence_form_polytope),
180
+ nr.CFR2(self.KERNEL, game.column_sequence_form_polytope),
181
+ progress_bar=False,
182
+ )
183
+ e2 = game.exploitability(x_bar2, y_bar2)
184
+ v2 = game.expected_row_utility(x_bar2, y_bar2)
185
+
186
+ self.assertAlmostEqual(e, e2, self.PLACES)
187
+ self.assertAlmostEqual(v, v2, self.PLACES)
188
+
189
+
153
190
  if __name__ == '__main__':
154
191
  main() # pragma: no cover
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: noregret
3
- Version: 0.0.0.dev4
3
+ Version: 0.0.0.dev5
4
4
  Summary: No-regret learning dynamics
5
5
  Home-page: https://github.com/uoftcprg/noregret
6
6
  Author: Universal, Open, Free, and Transparent Computer Poker Research Group
@@ -94,8 +94,8 @@ The code snippet below demonstrates how one can solve games via regret minimizat
94
94
  KERNEL = nr.FloatingPointKernel()
95
95
  GAMES = {
96
96
  'Rock paper superscissors': nr.to_efg(nr.RockPaperSuperscissors(KERNEL)),
97
- 'Kuhn poker': nr.from_open_spiel(KERNEL, 'kuhn_poker'),
98
- 'Leduc poker': nr.from_open_spiel(KERNEL, 'leduc_poker'),
97
+ 'Kuhn poker': nr.to_efg(KERNEL, nr.from_open_spiel('kuhn_poker')),
98
+ 'Leduc poker': nr.to_efg(KERNEL, nr.from_open_spiel('leduc_poker')),
99
99
  }
100
100
  PARAMETERS = {
101
101
  'CFR': (nr.CFR, False, False),
@@ -180,7 +180,7 @@ The code snippet below demonstrates how one can solve games while leveraging GPU
180
180
  import noregret as nr
181
181
 
182
182
  KERNEL = nr.CUDAKernel()
183
- GAME = nr.from_open_spiel(KERNEL, 'liars_dice')
183
+ GAME = nr.to_efg(KERNEL, nr.from_open_spiel('liars_dice'))
184
184
  PARAMETERS = nr.CFR, True, False
185
185
 
186
186
 
@@ -220,8 +220,8 @@ The code snippet below demonstrates how one can solve games via linear programmi
220
220
  KERNEL = nr.FloatingPointKernel()
221
221
  GAMES = {
222
222
  'Rock paper superscissors': nr.RockPaperSuperscissors(KERNEL),
223
- 'Kuhn poker': nr.from_open_spiel(KERNEL, 'kuhn_poker'),
224
- 'Leduc poker': nr.from_open_spiel(KERNEL, 'leduc_poker'),
223
+ 'Kuhn poker': nr.to_efg(KERNEL, nr.from_open_spiel('kuhn_poker')),
224
+ 'Leduc poker': nr.to_efg(KERNEL, nr.from_open_spiel('leduc_poker')),
225
225
  }
226
226
 
227
227
 
@@ -236,57 +236,6 @@ The code snippet below demonstrates how one can solve games via linear programmi
236
236
  if __name__ == '__main__':
237
237
  main()
238
238
 
239
- Conduct Research in Online Convex Optimization
240
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
241
-
242
- The code snippet below reproduces Leme, Piliouras, and Schneider (NeurIPS, 2024) using NoRegret.
243
-
244
- .. code-block:: python
245
-
246
- from functools import partial
247
-
248
- import matplotlib.pyplot as plt
249
- import noregret as nr
250
-
251
- KERNEL = nr.FloatingPointKernel()
252
- GAME = nr.RockPaperScissorsPlus(KERNEL)
253
- R_type = partial(nr.MWU, learning_rate=1e-3)
254
-
255
-
256
- def main():
257
- RM = R_type(KERNEL, GAME.row_dimension, is_time_symmetric=False)
258
- BM_RM = nr.BM(KERNEL, GAME.row_dimension, R_type, is_time_symmetric=False)
259
-
260
- nr.symmetric_regret_minimization(GAME, RM, iteration_count=100000)
261
- nr.symmetric_regret_minimization(GAME, BM_RM, iteration_count=100000)
262
- x, _ = nr.linear_programming(GAME)
263
-
264
- strategies = KERNEL.numpy.array(RM.strategies)
265
-
266
- plt.clf()
267
- plt.plot(strategies[:, 0], strategies[:, 1])
268
- plt.plot(strategies[-1, 0], strategies[-1, 1], 'bo')
269
- plt.plot(*x[:2], 'ro')
270
- plt.xlabel('Probability of action 1')
271
- plt.ylabel('Probability of action 2')
272
- plt.title('No-external regret dynamics')
273
- plt.show()
274
-
275
- strategies = KERNEL.numpy.array(BM_RM.strategies)
276
-
277
- plt.clf()
278
- plt.plot(strategies[:, 0], strategies[:, 1])
279
- plt.plot(strategies[-1, 0], strategies[-1, 1], 'bo')
280
- plt.plot(*x[:2], 'ro')
281
- plt.xlabel('Probability of action 1')
282
- plt.ylabel('Probability of action 2')
283
- plt.title('No-swap regret dynamics')
284
- plt.show()
285
-
286
-
287
- if __name__ == '__main__':
288
- main()
289
-
290
239
  Testing and Validation
291
240
  ----------------------
292
241
 
@@ -11,6 +11,7 @@ noregret.egg-info/dependency_links.txt
11
11
  noregret.egg-info/requires.txt
12
12
  noregret.egg-info/top_level.txt
13
13
  noregret/games/__init__.py
14
+ noregret/games/black_box.py
14
15
  noregret/games/games.py
15
16
  noregret/games/multilinear.py
16
17
  noregret/games/utilities.py
@@ -4,7 +4,7 @@ from setuptools import find_packages, setup
4
4
 
5
5
  setup(
6
6
  name='noregret',
7
- version='0.0.0.dev4',
7
+ version='0.0.0.dev5',
8
8
  description='No-regret learning dynamics',
9
9
  long_description=open('README.rst').read(),
10
10
  long_description_content_type='text/x-rst',
@@ -1,141 +0,0 @@
1
- from collections import defaultdict
2
- from functools import partial
3
- from itertools import starmap
4
-
5
- from ordered_set import OrderedSet
6
- from pyspiel import GameType, load_game
7
- from scipy.sparse import lil_array
8
-
9
- from noregret.games.normal_form.games import (
10
- NormalFormGame,
11
- TwoPlayerNormalFormGame,
12
- TwoPlayerZeroSumNormalFormGame,
13
- )
14
- from noregret.games.extensive_form.games import (
15
- ExtensiveFormGame,
16
- TwoPlayerExtensiveFormGame,
17
- TwoPlayerZeroSumExtensiveFormGame,
18
- )
19
- from noregret.sequence_form_polytopes import SequenceFormPolytope
20
-
21
-
22
- def _nfg2efg(game, decision_points):
23
- kernel = game.kernel
24
- np = kernel.numpy
25
- scipy = kernel.scipy
26
- dtype = kernel.data_type
27
-
28
- if isinstance(game, TwoPlayerZeroSumNormalFormGame):
29
- type_ = TwoPlayerZeroSumExtensiveFormGame
30
- elif isinstance(game, TwoPlayerNormalFormGame):
31
- type_ = TwoPlayerExtensiveFormGame
32
- else:
33
- type_ = ExtensiveFormGame
34
-
35
- d = game.dimensions
36
-
37
- if isinstance(game, TwoPlayerZeroSumNormalFormGame):
38
- payoffs = np.zeros(tuple(n + 1 for n in d), dtype)
39
- payoffs[tuple(slice(1, None) for _ in d)] = game.payoffs
40
- else:
41
- payoffs = np.zeros((game.player_count, *(n + 1 for n in d)), dtype)
42
- payoffs[:, *(slice(1, None) for _ in d)] = game.payoffs
43
-
44
- payoffs = scipy.sparse.csr_array(payoffs)
45
- sequence_form_polytopes = []
46
-
47
- for i, A_j in enumerate(game.actions):
48
- j = decision_points(i)
49
- sfp = SequenceFormPolytope(kernel, {j: A_j}, {j: None})
50
-
51
- sequence_form_polytopes.append(sfp)
52
-
53
- sequence_form_polytopes = tuple(sequence_form_polytopes)
54
-
55
- return type_(kernel, payoffs, sequence_form_polytopes)
56
-
57
-
58
- def to_extensive_form(game, decision_points=str):
59
- """Convert a given game to an extensive-form game.
60
-
61
- :param game: Game.
62
- :param decision_points: Decision points, defaults to ``str''.
63
- :return: Extensive-form game.
64
- """
65
- if isinstance(game, NormalFormGame):
66
- game = _nfg2efg(game, decision_points)
67
- else:
68
- raise ValueError('unknown game')
69
-
70
- return game
71
-
72
-
73
- def from_open_spiel(kernel, game):
74
- """Load a game from OpenSpiel.
75
-
76
- :param kernel: Kernel.
77
- :param game: Game in OpenSpiel.
78
- :return: Game.
79
- """
80
- dtype = kernel.data_type
81
- scipy = kernel.scipy
82
- game = load_game(game)
83
- player_count = game.num_players()
84
- actions = [defaultdict(OrderedSet) for _ in range(player_count)]
85
- parent_sequences = [{} for _ in range(player_count)]
86
- raw_payoffs = [defaultdict(int) for _ in range(player_count)]
87
-
88
- def dfs(state, chance_probability, sequences):
89
- if state.is_terminal():
90
- key = tuple(sequences)
91
-
92
- for i, u in enumerate(state.rewards()):
93
- raw_payoffs[i][key] += chance_probability * u
94
- elif state.is_chance_node():
95
- for a, p in state.chance_outcomes():
96
- dfs(state.child(a), p * chance_probability, sequences)
97
- else:
98
- i = state.current_player()
99
- j = state.information_state_string()
100
- p_j = sequences[i]
101
- parent_sequences[i][j] = p_j
102
-
103
- for a in state.legal_actions():
104
- next_state = state.child(a)
105
- a = state.action_to_string(a)
106
- next_sequences = sequences.copy()
107
- next_sequences[i] = j, a
108
-
109
- actions[i][j].add(a)
110
- dfs(next_state, chance_probability, next_sequences)
111
-
112
- dfs(game.new_initial_state(), 1, [None] * player_count)
113
-
114
- sequence_form_polytopes = tuple(
115
- starmap(
116
- partial(SequenceFormPolytope, kernel),
117
- zip(actions, parent_sequences),
118
- ),
119
- )
120
- dimensions = tuple(sfp.column_count for sfp in sequence_form_polytopes)
121
-
122
- if (
123
- player_count == 2
124
- and game.get_type().utility == GameType.Utility.ZERO_SUM
125
- ):
126
- type_ = TwoPlayerZeroSumExtensiveFormGame
127
- payoffs = lil_array(dimensions, dtype=dtype)
128
-
129
- for sequences, payoff in raw_payoffs[0].items():
130
- indices = []
131
-
132
- for sfp, sequence in zip(sequence_form_polytopes, sequences):
133
- indices.append(sfp.column(sequence))
134
-
135
- payoffs[tuple(indices)] = payoff
136
-
137
- payoffs = scipy.sparse.csr_array(payoffs)
138
- else:
139
- raise NotImplementedError
140
-
141
- return type_(kernel, payoffs, sequence_form_polytopes)
File without changes
File without changes