noregret 0.0.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- noregret/__init__.py +0 -0
- noregret/games.py +632 -0
- noregret/regret_minimizers.py +645 -0
- noregret/utilities.py +308 -0
- noregret-0.0.0.dev0.dist-info/METADATA +106 -0
- noregret-0.0.0.dev0.dist-info/RECORD +9 -0
- noregret-0.0.0.dev0.dist-info/WHEEL +5 -0
- noregret-0.0.0.dev0.dist-info/licenses/LICENSE +21 -0
- noregret-0.0.0.dev0.dist-info/top_level.txt +1 -0
noregret/__init__.py
ADDED
|
File without changes
|
noregret/games.py
ADDED
|
@@ -0,0 +1,632 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from functools import partial
|
|
4
|
+
from itertools import count, permutations
|
|
5
|
+
from math import factorial
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from scipy.sparse import lil_array
|
|
9
|
+
import numpy as np
|
|
10
|
+
|
|
11
|
+
from noregret.utilities import (
|
|
12
|
+
Serializable,
|
|
13
|
+
split,
|
|
14
|
+
TreeFormSequentialDecisionProcess,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class Game(ABC):
|
|
20
|
+
"""Game."""
|
|
21
|
+
|
|
22
|
+
def __post_init__(self):
|
|
23
|
+
self._verify()
|
|
24
|
+
|
|
25
|
+
def _verify(self, **kwargs):
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
@abstractmethod
|
|
30
|
+
def player_count(self):
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
@abstractmethod
|
|
34
|
+
def dimension(self, player):
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
@property
|
|
38
|
+
def dimensions(self):
|
|
39
|
+
return np.array(list(map(self.dimension, range(self.player_count))))
|
|
40
|
+
|
|
41
|
+
@abstractmethod
|
|
42
|
+
def utility(self, player, *opponent_strategies):
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
@abstractmethod
|
|
46
|
+
def value(self, player, *strategies):
|
|
47
|
+
pass
|
|
48
|
+
|
|
49
|
+
def values(self, *strategies):
|
|
50
|
+
return np.array(
|
|
51
|
+
[self.value(i, *strategies) for i in range(self.player_count)],
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
@abstractmethod
|
|
55
|
+
def correlated_value(self, player, *strategies):
|
|
56
|
+
pass
|
|
57
|
+
|
|
58
|
+
def correlated_values(self, *strategies):
|
|
59
|
+
return np.array(
|
|
60
|
+
[
|
|
61
|
+
self.correlated_value(i, *strategies)
|
|
62
|
+
for i in range(self.player_count)
|
|
63
|
+
],
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
@abstractmethod
|
|
67
|
+
def best_response(self, player, *opponent_strategies):
|
|
68
|
+
pass
|
|
69
|
+
|
|
70
|
+
def nash_gap(self, *strategies):
|
|
71
|
+
gap = 0
|
|
72
|
+
|
|
73
|
+
for i, value in enumerate(self.values(*strategies)):
|
|
74
|
+
opponent_strategies = strategies[:i] + strategies[i + 1:]
|
|
75
|
+
_, br_value = self.best_response(i, *opponent_strategies)
|
|
76
|
+
gap += br_value - value
|
|
77
|
+
|
|
78
|
+
return gap
|
|
79
|
+
|
|
80
|
+
def cce_gap(self, *strategies):
|
|
81
|
+
average_strategies = list(map(partial(np.mean, axis=0), strategies))
|
|
82
|
+
gap = 0
|
|
83
|
+
|
|
84
|
+
for i, value in enumerate(self.correlated_values(*strategies)):
|
|
85
|
+
average_opponent_strategies = (
|
|
86
|
+
average_strategies[:i] + average_strategies[i + 1:]
|
|
87
|
+
)
|
|
88
|
+
_, br_value = self.best_response(i, *average_opponent_strategies)
|
|
89
|
+
gap += br_value - value
|
|
90
|
+
|
|
91
|
+
return gap
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@dataclass
|
|
95
|
+
class TwoPlayerGame(Game, ABC):
|
|
96
|
+
"""Two-player (2p) game.
|
|
97
|
+
|
|
98
|
+
Row and column players are of indices 0 and 1, respectively.
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
def _verify(self, **kwargs):
|
|
102
|
+
super()._verify(**kwargs)
|
|
103
|
+
|
|
104
|
+
if self.player_count != 2:
|
|
105
|
+
raise ValueError('number of players not 2')
|
|
106
|
+
|
|
107
|
+
@property
|
|
108
|
+
@abstractmethod
|
|
109
|
+
def row_utilities(self):
|
|
110
|
+
pass
|
|
111
|
+
|
|
112
|
+
@property
|
|
113
|
+
@abstractmethod
|
|
114
|
+
def column_utilities(self):
|
|
115
|
+
pass
|
|
116
|
+
|
|
117
|
+
def dimension(self, player):
|
|
118
|
+
match player:
|
|
119
|
+
case 0:
|
|
120
|
+
dimension = self.row_dimension
|
|
121
|
+
case 1:
|
|
122
|
+
dimension = self.column_dimension
|
|
123
|
+
case _:
|
|
124
|
+
raise ValueError(f'Player {player} does not exist')
|
|
125
|
+
|
|
126
|
+
return dimension
|
|
127
|
+
|
|
128
|
+
@property
|
|
129
|
+
def row_dimension(self):
|
|
130
|
+
return self.row_utilities.shape[0]
|
|
131
|
+
|
|
132
|
+
@property
|
|
133
|
+
def column_dimension(self):
|
|
134
|
+
return self.row_utilities.shape[1]
|
|
135
|
+
|
|
136
|
+
def utility(self, player, opponent_strategy):
|
|
137
|
+
match player:
|
|
138
|
+
case 0:
|
|
139
|
+
utility = self.row_utility(opponent_strategy)
|
|
140
|
+
case 1:
|
|
141
|
+
utility = self.column_utility(opponent_strategy)
|
|
142
|
+
case _:
|
|
143
|
+
raise ValueError(f'Player {player} does not exist')
|
|
144
|
+
|
|
145
|
+
return utility
|
|
146
|
+
|
|
147
|
+
def row_utility(self, column_strategy):
|
|
148
|
+
return self.row_utilities @ column_strategy
|
|
149
|
+
|
|
150
|
+
def column_utility(self, row_strategy):
|
|
151
|
+
return row_strategy @ self.column_utilities
|
|
152
|
+
|
|
153
|
+
def value(self, player, row_strategy, column_strategy):
|
|
154
|
+
match player:
|
|
155
|
+
case 0:
|
|
156
|
+
value = self.row_value(row_strategy, column_strategy)
|
|
157
|
+
case 1:
|
|
158
|
+
value = self.column_value(row_strategy, column_strategy)
|
|
159
|
+
case _:
|
|
160
|
+
raise ValueError(f'Player {player} does not exist')
|
|
161
|
+
|
|
162
|
+
return value
|
|
163
|
+
|
|
164
|
+
def row_value(self, row_strategy, column_strategy):
|
|
165
|
+
return row_strategy @ self.row_utilities @ column_strategy
|
|
166
|
+
|
|
167
|
+
def column_value(self, row_strategy, column_strategy):
|
|
168
|
+
return row_strategy @ self.column_utilities @ column_strategy
|
|
169
|
+
|
|
170
|
+
def correlated_value(self, player, row_strategies, column_strategies):
|
|
171
|
+
match player:
|
|
172
|
+
case 0:
|
|
173
|
+
value = self.correlated_row_value(
|
|
174
|
+
row_strategies,
|
|
175
|
+
column_strategies,
|
|
176
|
+
)
|
|
177
|
+
case 1:
|
|
178
|
+
value = self.correlated_column_value(
|
|
179
|
+
row_strategies,
|
|
180
|
+
column_strategies,
|
|
181
|
+
)
|
|
182
|
+
case _:
|
|
183
|
+
raise ValueError(f'Player {player} does not exist')
|
|
184
|
+
|
|
185
|
+
return value
|
|
186
|
+
|
|
187
|
+
def correlated_row_value(self, row_strategies, column_strategies):
|
|
188
|
+
return (
|
|
189
|
+
row_strategies @ self.row_utilities * column_strategies
|
|
190
|
+
).sum(1).mean()
|
|
191
|
+
|
|
192
|
+
def correlated_column_value(self, row_strategies, column_strategies):
|
|
193
|
+
return (
|
|
194
|
+
row_strategies @ self.column_utilities * column_strategies
|
|
195
|
+
).sum(1).mean()
|
|
196
|
+
|
|
197
|
+
def best_response(self, player, opponent_strategy):
|
|
198
|
+
match player:
|
|
199
|
+
case 0:
|
|
200
|
+
best_response = self.row_best_response(opponent_strategy)
|
|
201
|
+
case 1:
|
|
202
|
+
best_response = self.column_best_response(opponent_strategy)
|
|
203
|
+
case _:
|
|
204
|
+
raise ValueError(f'Player {player} does not exist')
|
|
205
|
+
|
|
206
|
+
return best_response
|
|
207
|
+
|
|
208
|
+
@abstractmethod
|
|
209
|
+
def row_best_response(self, column_strategy):
|
|
210
|
+
pass
|
|
211
|
+
|
|
212
|
+
@abstractmethod
|
|
213
|
+
def column_best_response(self, row_strategy):
|
|
214
|
+
pass
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
@dataclass
|
|
218
|
+
class TwoPlayerZeroSumGame(TwoPlayerGame, ABC):
|
|
219
|
+
"""Two-player zero-sum (2p0s) game."""
|
|
220
|
+
|
|
221
|
+
@property
|
|
222
|
+
def column_utilities(self):
|
|
223
|
+
return -self.row_utilities
|
|
224
|
+
|
|
225
|
+
def values(self, row_strategy, column_strategy):
|
|
226
|
+
value = self.row_value(row_strategy, column_strategy)
|
|
227
|
+
|
|
228
|
+
return np.array((value, -value))
|
|
229
|
+
|
|
230
|
+
def correlated_values(self, row_strategies, column_strategies):
|
|
231
|
+
value = self.correlated_row_value(row_strategies, column_strategies)
|
|
232
|
+
|
|
233
|
+
return np.array((value, -value))
|
|
234
|
+
|
|
235
|
+
def nash_gap(self, row_strategy, column_strategy):
|
|
236
|
+
_, row_best_response_value = self.row_best_response(column_strategy)
|
|
237
|
+
_, column_best_response_value = self.column_best_response(row_strategy)
|
|
238
|
+
|
|
239
|
+
return row_best_response_value + column_best_response_value
|
|
240
|
+
|
|
241
|
+
def exploitability(self, row_strategy, column_strategy):
|
|
242
|
+
return self.nash_gap(row_strategy, column_strategy) / 2
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
@dataclass
|
|
246
|
+
class NormalFormGame(Serializable, Game):
|
|
247
|
+
"""Normal-form game.
|
|
248
|
+
|
|
249
|
+
Each player optimizes over the probability simplex.
|
|
250
|
+
"""
|
|
251
|
+
|
|
252
|
+
@classmethod
|
|
253
|
+
def deserialize(cls, raw_data):
|
|
254
|
+
return cls(raw_data['actions'], np.array(raw_data['utilities']))
|
|
255
|
+
|
|
256
|
+
actions: Any
|
|
257
|
+
utilities: Any
|
|
258
|
+
indices: Any = field(init=False, default_factory=list)
|
|
259
|
+
|
|
260
|
+
def __post_init__(self):
|
|
261
|
+
super().__post_init__()
|
|
262
|
+
|
|
263
|
+
for i, actions in enumerate(self.actions):
|
|
264
|
+
self.indices.append(dict(zip(actions, count())))
|
|
265
|
+
|
|
266
|
+
def _verify(self, *, utilities_shape=None, **kwargs):
|
|
267
|
+
super()._verify(**kwargs)
|
|
268
|
+
|
|
269
|
+
if utilities_shape is None:
|
|
270
|
+
utilities_shape = (*map(len, self.actions), self.player_count)
|
|
271
|
+
|
|
272
|
+
if self.utilities.shape != utilities_shape:
|
|
273
|
+
raise ValueError('utilities do not match actions and players')
|
|
274
|
+
|
|
275
|
+
@property
|
|
276
|
+
def player_count(self):
|
|
277
|
+
return len(self.actions)
|
|
278
|
+
|
|
279
|
+
def utility(self, player, *opponent_strategies):
|
|
280
|
+
raise NotImplementedError
|
|
281
|
+
|
|
282
|
+
def value(self, player, *strategies):
|
|
283
|
+
raise NotImplementedError
|
|
284
|
+
|
|
285
|
+
def correlated_value(self, player, *strategies):
|
|
286
|
+
raise NotImplementedError
|
|
287
|
+
|
|
288
|
+
def best_response(self, player, *opponent_strategies):
|
|
289
|
+
raise NotImplementedError
|
|
290
|
+
|
|
291
|
+
def serialize(self):
|
|
292
|
+
return {'actions': self.actions, 'utilities': self.utilities.tolist()}
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
@dataclass
|
|
296
|
+
class TwoPlayerNormalFormGame(TwoPlayerGame, NormalFormGame):
|
|
297
|
+
"""Two-player (2p) normal-form game."""
|
|
298
|
+
|
|
299
|
+
@property
|
|
300
|
+
def row_actions(self):
|
|
301
|
+
return self.actions[0]
|
|
302
|
+
|
|
303
|
+
@property
|
|
304
|
+
def column_actions(self):
|
|
305
|
+
return self.actions[1]
|
|
306
|
+
|
|
307
|
+
@property
|
|
308
|
+
def row_indices(self):
|
|
309
|
+
return self.indices[0]
|
|
310
|
+
|
|
311
|
+
@property
|
|
312
|
+
def column_indices(self):
|
|
313
|
+
return self.indices[1]
|
|
314
|
+
|
|
315
|
+
@property
|
|
316
|
+
def row_utilities(self):
|
|
317
|
+
return self.utilities[:, :, 0]
|
|
318
|
+
|
|
319
|
+
@property
|
|
320
|
+
def column_utilities(self):
|
|
321
|
+
return self.utilities[:, :, 1]
|
|
322
|
+
|
|
323
|
+
def row_best_response(self, column_strategy):
|
|
324
|
+
strategy = np.zeros(len(self.row_actions))
|
|
325
|
+
utility = self.row_utility(column_strategy)
|
|
326
|
+
index = utility.argmax()
|
|
327
|
+
strategy[index] = 1
|
|
328
|
+
|
|
329
|
+
return strategy, utility[index]
|
|
330
|
+
|
|
331
|
+
def column_best_response(self, row_strategy):
|
|
332
|
+
strategy = np.zeros(len(self.column_actions))
|
|
333
|
+
utility = self.column_utility(row_strategy)
|
|
334
|
+
index = utility.argmax()
|
|
335
|
+
strategy[index] = 1
|
|
336
|
+
|
|
337
|
+
return strategy, utility[index]
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
@dataclass
|
|
341
|
+
class TwoPlayerZeroSumNormalFormGame(
|
|
342
|
+
TwoPlayerZeroSumGame,
|
|
343
|
+
TwoPlayerNormalFormGame,
|
|
344
|
+
):
|
|
345
|
+
"""Two-player zero-sum (2p0s) normal-form game.
|
|
346
|
+
|
|
347
|
+
The utility matrix is from the viewpoint of the row player.
|
|
348
|
+
"""
|
|
349
|
+
|
|
350
|
+
def _verify(self, **kwargs):
|
|
351
|
+
super()._verify(
|
|
352
|
+
**kwargs,
|
|
353
|
+
utilities_shape=(len(self.row_actions), len(self.column_actions)),
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
@property
|
|
357
|
+
def row_utilities(self):
|
|
358
|
+
return self.utilities
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
@dataclass
|
|
362
|
+
class ExtensiveFormGame(Serializable, Game):
|
|
363
|
+
"""Extensive-form game (EFG).
|
|
364
|
+
|
|
365
|
+
Each player optimizes over the sequence-form polytope.
|
|
366
|
+
"""
|
|
367
|
+
|
|
368
|
+
@classmethod
|
|
369
|
+
def deserialize(cls, raw_data):
|
|
370
|
+
raise NotImplementedError
|
|
371
|
+
|
|
372
|
+
tree_form_sequential_decision_processes: Any
|
|
373
|
+
utilities: Any
|
|
374
|
+
|
|
375
|
+
@property
|
|
376
|
+
def player_count(self):
|
|
377
|
+
return len(self.tree_form_sequential_decision_processes)
|
|
378
|
+
|
|
379
|
+
def utility(self, player, *opponent_strategies):
|
|
380
|
+
raise NotImplementedError
|
|
381
|
+
|
|
382
|
+
def value(self, player, *strategies):
|
|
383
|
+
raise NotImplementedError
|
|
384
|
+
|
|
385
|
+
def correlated_value(self, player, *strategies):
|
|
386
|
+
raise NotImplementedError
|
|
387
|
+
|
|
388
|
+
def best_response(self, player, *opponent_strategies):
|
|
389
|
+
raise NotImplementedError
|
|
390
|
+
|
|
391
|
+
def serialize(self):
|
|
392
|
+
raise NotImplementedError
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
@dataclass
|
|
396
|
+
class TwoPlayerExtensiveFormGame(TwoPlayerGame, ExtensiveFormGame):
|
|
397
|
+
"""Two-player (2p) extensive-form game (EFG)."""
|
|
398
|
+
|
|
399
|
+
@classmethod
|
|
400
|
+
def deserialize(cls, raw_data):
|
|
401
|
+
tfsdps = TreeFormSequentialDecisionProcess.deserialize_all(
|
|
402
|
+
raw_data['tree_form_sequential_decision_processes'],
|
|
403
|
+
)
|
|
404
|
+
shape = tuple(len(tfsdp.sequences) for tfsdp in tfsdps)
|
|
405
|
+
row_utilities = lil_array(shape)
|
|
406
|
+
column_utilities = lil_array(shape)
|
|
407
|
+
|
|
408
|
+
for raw_utility in raw_data['utilities']:
|
|
409
|
+
if len(raw_utility['values']) != 2:
|
|
410
|
+
raise ValueError('utility is not of a 2-player game')
|
|
411
|
+
|
|
412
|
+
indices = []
|
|
413
|
+
|
|
414
|
+
for tfsdp, sequence in zip(tfsdps, raw_utility['sequences']):
|
|
415
|
+
sequence = tuple(sequence)
|
|
416
|
+
|
|
417
|
+
indices.append(tfsdp.indices[sequence])
|
|
418
|
+
|
|
419
|
+
indices = tuple(indices)
|
|
420
|
+
row_utilities[indices] = raw_utility['values'][0]
|
|
421
|
+
column_utilities[indices] = raw_utility['values'][1]
|
|
422
|
+
|
|
423
|
+
return cls(tfsdps, [row_utilities.tocsr(), column_utilities.tocsr()])
|
|
424
|
+
|
|
425
|
+
def _verify(self, **kwargs):
|
|
426
|
+
super()._verify(**kwargs)
|
|
427
|
+
|
|
428
|
+
if not (
|
|
429
|
+
self.row_utilities.shape
|
|
430
|
+
== self.column_utilities.shape
|
|
431
|
+
== (len(self.row_sequences), len(self.column_sequences))
|
|
432
|
+
):
|
|
433
|
+
raise ValueError('utilities do not match sequences')
|
|
434
|
+
|
|
435
|
+
@property
|
|
436
|
+
def row_tree_form_sequential_decision_process(self):
|
|
437
|
+
return self.tree_form_sequential_decision_processes[0]
|
|
438
|
+
|
|
439
|
+
@property
|
|
440
|
+
def column_tree_form_sequential_decision_process(self):
|
|
441
|
+
return self.tree_form_sequential_decision_processes[1]
|
|
442
|
+
|
|
443
|
+
@property
|
|
444
|
+
def row_sequences(self):
|
|
445
|
+
return self.row_tree_form_sequential_decision_process.sequences
|
|
446
|
+
|
|
447
|
+
@property
|
|
448
|
+
def column_sequences(self):
|
|
449
|
+
return self.column_tree_form_sequential_decision_process.sequences
|
|
450
|
+
|
|
451
|
+
@property
|
|
452
|
+
def row_indices(self):
|
|
453
|
+
return self.row_tree_form_sequential_decision_process.indices
|
|
454
|
+
|
|
455
|
+
@property
|
|
456
|
+
def column_indices(self):
|
|
457
|
+
return self.column_tree_form_sequential_decision_process.indices
|
|
458
|
+
|
|
459
|
+
@property
|
|
460
|
+
def row_utilities(self):
|
|
461
|
+
return self.utilities[0]
|
|
462
|
+
|
|
463
|
+
@property
|
|
464
|
+
def column_utilities(self):
|
|
465
|
+
return self.utilities[1]
|
|
466
|
+
|
|
467
|
+
def row_best_response(self, column_strategy):
|
|
468
|
+
best_response = (
|
|
469
|
+
self
|
|
470
|
+
.row_tree_form_sequential_decision_process
|
|
471
|
+
.sequence_form_best_response(self.row_utility(column_strategy))
|
|
472
|
+
)
|
|
473
|
+
|
|
474
|
+
return best_response
|
|
475
|
+
|
|
476
|
+
def column_best_response(self, row_strategy):
|
|
477
|
+
best_response = (
|
|
478
|
+
self
|
|
479
|
+
.column_tree_form_sequential_decision_process
|
|
480
|
+
.sequence_form_best_response(self.column_utility(row_strategy))
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
return best_response
|
|
484
|
+
|
|
485
|
+
def serialize(self):
|
|
486
|
+
tfsdps = self.tree_form_sequential_decision_processes
|
|
487
|
+
raw_tfsdps = [tfsdp.to_list() for tfsdp in tfsdps]
|
|
488
|
+
raw_utilities = []
|
|
489
|
+
abs_utility_sums = abs(self.row_utilities) + abs(self.column_utilities)
|
|
490
|
+
|
|
491
|
+
for indices in zip(*abs_utility_sums.nonzero()):
|
|
492
|
+
sequences = []
|
|
493
|
+
|
|
494
|
+
for tfsdp, index in zip(tfsdps, indices):
|
|
495
|
+
sequences.append(tfsdp.sequences[index])
|
|
496
|
+
|
|
497
|
+
row_value = self.row_utilities[indices].item()
|
|
498
|
+
column_value = self.column_utilities[indices].item()
|
|
499
|
+
values = row_value, column_value
|
|
500
|
+
|
|
501
|
+
raw_utilities.append({'sequences': sequences, 'values': values})
|
|
502
|
+
|
|
503
|
+
return {
|
|
504
|
+
'tree_form_sequential_decision_processes': raw_tfsdps,
|
|
505
|
+
'utilities': raw_utilities,
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
|
|
509
|
+
@dataclass
|
|
510
|
+
class TwoPlayerZeroSumExtensiveFormGame(
|
|
511
|
+
TwoPlayerZeroSumGame,
|
|
512
|
+
TwoPlayerExtensiveFormGame,
|
|
513
|
+
):
|
|
514
|
+
"""Two-player zero-sum (2p0s) extensive-form game (EFG).
|
|
515
|
+
|
|
516
|
+
The utility matrix is from the viewpoint of the row player.
|
|
517
|
+
"""
|
|
518
|
+
|
|
519
|
+
@classmethod
|
|
520
|
+
def deserialize(cls, raw_data):
|
|
521
|
+
tfsdps = TreeFormSequentialDecisionProcess.deserialize_all(
|
|
522
|
+
raw_data['tree_form_sequential_decision_processes'],
|
|
523
|
+
)
|
|
524
|
+
shape = tuple(len(tfsdp.sequences) for tfsdp in tfsdps)
|
|
525
|
+
utilities = lil_array(shape)
|
|
526
|
+
|
|
527
|
+
for raw_utility in raw_data['utilities']:
|
|
528
|
+
indices = []
|
|
529
|
+
|
|
530
|
+
for tfsdp, sequence in zip(tfsdps, raw_utility['sequences']):
|
|
531
|
+
sequence = tuple(sequence)
|
|
532
|
+
|
|
533
|
+
indices.append(tfsdp.indices[sequence])
|
|
534
|
+
|
|
535
|
+
indices = tuple(indices)
|
|
536
|
+
utilities[indices] = raw_utility['value']
|
|
537
|
+
|
|
538
|
+
return cls(tfsdps, utilities.tocsr())
|
|
539
|
+
|
|
540
|
+
@property
|
|
541
|
+
def row_utilities(self):
|
|
542
|
+
return self.utilities
|
|
543
|
+
|
|
544
|
+
def serialize(self):
|
|
545
|
+
tfsdps = self.tree_form_sequential_decision_processes
|
|
546
|
+
raw_tfsdps = [tfsdp.to_list() for tfsdp in tfsdps]
|
|
547
|
+
raw_utilities = []
|
|
548
|
+
|
|
549
|
+
for indices in zip(*self.utilities.nonzero()):
|
|
550
|
+
sequences = []
|
|
551
|
+
|
|
552
|
+
for tfsdp, index in zip(tfsdps, indices):
|
|
553
|
+
sequences.append(tfsdp.sequences[index])
|
|
554
|
+
|
|
555
|
+
value = self.utilities[indices].item()
|
|
556
|
+
|
|
557
|
+
raw_utilities.append({'sequences': sequences, 'value': value})
|
|
558
|
+
|
|
559
|
+
return {
|
|
560
|
+
'tree_form_sequential_decision_processes': raw_tfsdps,
|
|
561
|
+
'utilities': raw_utilities,
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
|
|
565
|
+
@dataclass
|
|
566
|
+
class SymmetrizedGame(Game):
|
|
567
|
+
"""Symmetrized game.
|
|
568
|
+
|
|
569
|
+
Each player optimizes over the cartesian product of probability
|
|
570
|
+
simplices.
|
|
571
|
+
"""
|
|
572
|
+
|
|
573
|
+
game: Any
|
|
574
|
+
|
|
575
|
+
@property
|
|
576
|
+
def player_count(self):
|
|
577
|
+
return self.game.player_count
|
|
578
|
+
|
|
579
|
+
def dimension(self, player):
|
|
580
|
+
return sum(self.game.dimensions)
|
|
581
|
+
|
|
582
|
+
def utility(self, player, *opponent_strategies):
|
|
583
|
+
strategies = []
|
|
584
|
+
|
|
585
|
+
for opponent_strategy in opponent_strategies:
|
|
586
|
+
strategies.append(split(opponent_strategy, self.game.dimensions))
|
|
587
|
+
|
|
588
|
+
strategies.insert(player, None)
|
|
589
|
+
|
|
590
|
+
utilities = [0] * self.player_count
|
|
591
|
+
|
|
592
|
+
for permutation in permutations(range(self.player_count)):
|
|
593
|
+
utilities[permutation.index(player)] += self.game.utility(
|
|
594
|
+
permutation.index(player),
|
|
595
|
+
*(
|
|
596
|
+
strategies[permutation[i]][i]
|
|
597
|
+
for i in range(self.player_count)
|
|
598
|
+
if permutation[i] != player
|
|
599
|
+
),
|
|
600
|
+
)
|
|
601
|
+
|
|
602
|
+
utility = np.concatenate(utilities)
|
|
603
|
+
utility /= factorial(self.player_count)
|
|
604
|
+
|
|
605
|
+
return utility
|
|
606
|
+
|
|
607
|
+
def value(self, player, *strategies):
|
|
608
|
+
strategies = list(strategies)
|
|
609
|
+
|
|
610
|
+
for i in range(self.player_count):
|
|
611
|
+
strategies[i] = split(strategies[i], self.game.dimensions)
|
|
612
|
+
|
|
613
|
+
value = 0
|
|
614
|
+
|
|
615
|
+
for permutation in permutations(range(self.player_count)):
|
|
616
|
+
value += self.game.value(
|
|
617
|
+
permutation.index(player),
|
|
618
|
+
*(
|
|
619
|
+
strategies[permutation[i]][i]
|
|
620
|
+
for i in range(self.player_count)
|
|
621
|
+
),
|
|
622
|
+
)
|
|
623
|
+
|
|
624
|
+
value /= factorial(self.player_count)
|
|
625
|
+
|
|
626
|
+
return value
|
|
627
|
+
|
|
628
|
+
def correlated_value(self, player, *strategies):
|
|
629
|
+
raise NotImplementedError
|
|
630
|
+
|
|
631
|
+
def best_response(self, player, *opponent_strategies):
|
|
632
|
+
raise NotImplementedError
|