noregret 0.0.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
noregret/__init__.py ADDED
File without changes
noregret/games.py ADDED
@@ -0,0 +1,632 @@
1
+ from abc import ABC, abstractmethod
2
+ from dataclasses import dataclass, field
3
+ from functools import partial
4
+ from itertools import count, permutations
5
+ from math import factorial
6
+ from typing import Any
7
+
8
+ from scipy.sparse import lil_array
9
+ import numpy as np
10
+
11
+ from noregret.utilities import (
12
+ Serializable,
13
+ split,
14
+ TreeFormSequentialDecisionProcess,
15
+ )
16
+
17
+
18
+ @dataclass
19
+ class Game(ABC):
20
+ """Game."""
21
+
22
+ def __post_init__(self):
23
+ self._verify()
24
+
25
+ def _verify(self, **kwargs):
26
+ pass
27
+
28
+ @property
29
+ @abstractmethod
30
+ def player_count(self):
31
+ pass
32
+
33
+ @abstractmethod
34
+ def dimension(self, player):
35
+ pass
36
+
37
+ @property
38
+ def dimensions(self):
39
+ return np.array(list(map(self.dimension, range(self.player_count))))
40
+
41
+ @abstractmethod
42
+ def utility(self, player, *opponent_strategies):
43
+ pass
44
+
45
+ @abstractmethod
46
+ def value(self, player, *strategies):
47
+ pass
48
+
49
+ def values(self, *strategies):
50
+ return np.array(
51
+ [self.value(i, *strategies) for i in range(self.player_count)],
52
+ )
53
+
54
+ @abstractmethod
55
+ def correlated_value(self, player, *strategies):
56
+ pass
57
+
58
+ def correlated_values(self, *strategies):
59
+ return np.array(
60
+ [
61
+ self.correlated_value(i, *strategies)
62
+ for i in range(self.player_count)
63
+ ],
64
+ )
65
+
66
+ @abstractmethod
67
+ def best_response(self, player, *opponent_strategies):
68
+ pass
69
+
70
+ def nash_gap(self, *strategies):
71
+ gap = 0
72
+
73
+ for i, value in enumerate(self.values(*strategies)):
74
+ opponent_strategies = strategies[:i] + strategies[i + 1:]
75
+ _, br_value = self.best_response(i, *opponent_strategies)
76
+ gap += br_value - value
77
+
78
+ return gap
79
+
80
+ def cce_gap(self, *strategies):
81
+ average_strategies = list(map(partial(np.mean, axis=0), strategies))
82
+ gap = 0
83
+
84
+ for i, value in enumerate(self.correlated_values(*strategies)):
85
+ average_opponent_strategies = (
86
+ average_strategies[:i] + average_strategies[i + 1:]
87
+ )
88
+ _, br_value = self.best_response(i, *average_opponent_strategies)
89
+ gap += br_value - value
90
+
91
+ return gap
92
+
93
+
94
+ @dataclass
95
+ class TwoPlayerGame(Game, ABC):
96
+ """Two-player (2p) game.
97
+
98
+ Row and column players are of indices 0 and 1, respectively.
99
+ """
100
+
101
+ def _verify(self, **kwargs):
102
+ super()._verify(**kwargs)
103
+
104
+ if self.player_count != 2:
105
+ raise ValueError('number of players not 2')
106
+
107
+ @property
108
+ @abstractmethod
109
+ def row_utilities(self):
110
+ pass
111
+
112
+ @property
113
+ @abstractmethod
114
+ def column_utilities(self):
115
+ pass
116
+
117
+ def dimension(self, player):
118
+ match player:
119
+ case 0:
120
+ dimension = self.row_dimension
121
+ case 1:
122
+ dimension = self.column_dimension
123
+ case _:
124
+ raise ValueError(f'Player {player} does not exist')
125
+
126
+ return dimension
127
+
128
+ @property
129
+ def row_dimension(self):
130
+ return self.row_utilities.shape[0]
131
+
132
+ @property
133
+ def column_dimension(self):
134
+ return self.row_utilities.shape[1]
135
+
136
+ def utility(self, player, opponent_strategy):
137
+ match player:
138
+ case 0:
139
+ utility = self.row_utility(opponent_strategy)
140
+ case 1:
141
+ utility = self.column_utility(opponent_strategy)
142
+ case _:
143
+ raise ValueError(f'Player {player} does not exist')
144
+
145
+ return utility
146
+
147
+ def row_utility(self, column_strategy):
148
+ return self.row_utilities @ column_strategy
149
+
150
+ def column_utility(self, row_strategy):
151
+ return row_strategy @ self.column_utilities
152
+
153
+ def value(self, player, row_strategy, column_strategy):
154
+ match player:
155
+ case 0:
156
+ value = self.row_value(row_strategy, column_strategy)
157
+ case 1:
158
+ value = self.column_value(row_strategy, column_strategy)
159
+ case _:
160
+ raise ValueError(f'Player {player} does not exist')
161
+
162
+ return value
163
+
164
+ def row_value(self, row_strategy, column_strategy):
165
+ return row_strategy @ self.row_utilities @ column_strategy
166
+
167
+ def column_value(self, row_strategy, column_strategy):
168
+ return row_strategy @ self.column_utilities @ column_strategy
169
+
170
+ def correlated_value(self, player, row_strategies, column_strategies):
171
+ match player:
172
+ case 0:
173
+ value = self.correlated_row_value(
174
+ row_strategies,
175
+ column_strategies,
176
+ )
177
+ case 1:
178
+ value = self.correlated_column_value(
179
+ row_strategies,
180
+ column_strategies,
181
+ )
182
+ case _:
183
+ raise ValueError(f'Player {player} does not exist')
184
+
185
+ return value
186
+
187
+ def correlated_row_value(self, row_strategies, column_strategies):
188
+ return (
189
+ row_strategies @ self.row_utilities * column_strategies
190
+ ).sum(1).mean()
191
+
192
+ def correlated_column_value(self, row_strategies, column_strategies):
193
+ return (
194
+ row_strategies @ self.column_utilities * column_strategies
195
+ ).sum(1).mean()
196
+
197
+ def best_response(self, player, opponent_strategy):
198
+ match player:
199
+ case 0:
200
+ best_response = self.row_best_response(opponent_strategy)
201
+ case 1:
202
+ best_response = self.column_best_response(opponent_strategy)
203
+ case _:
204
+ raise ValueError(f'Player {player} does not exist')
205
+
206
+ return best_response
207
+
208
+ @abstractmethod
209
+ def row_best_response(self, column_strategy):
210
+ pass
211
+
212
+ @abstractmethod
213
+ def column_best_response(self, row_strategy):
214
+ pass
215
+
216
+
217
+ @dataclass
218
+ class TwoPlayerZeroSumGame(TwoPlayerGame, ABC):
219
+ """Two-player zero-sum (2p0s) game."""
220
+
221
+ @property
222
+ def column_utilities(self):
223
+ return -self.row_utilities
224
+
225
+ def values(self, row_strategy, column_strategy):
226
+ value = self.row_value(row_strategy, column_strategy)
227
+
228
+ return np.array((value, -value))
229
+
230
+ def correlated_values(self, row_strategies, column_strategies):
231
+ value = self.correlated_row_value(row_strategies, column_strategies)
232
+
233
+ return np.array((value, -value))
234
+
235
+ def nash_gap(self, row_strategy, column_strategy):
236
+ _, row_best_response_value = self.row_best_response(column_strategy)
237
+ _, column_best_response_value = self.column_best_response(row_strategy)
238
+
239
+ return row_best_response_value + column_best_response_value
240
+
241
+ def exploitability(self, row_strategy, column_strategy):
242
+ return self.nash_gap(row_strategy, column_strategy) / 2
243
+
244
+
245
+ @dataclass
246
+ class NormalFormGame(Serializable, Game):
247
+ """Normal-form game.
248
+
249
+ Each player optimizes over the probability simplex.
250
+ """
251
+
252
+ @classmethod
253
+ def deserialize(cls, raw_data):
254
+ return cls(raw_data['actions'], np.array(raw_data['utilities']))
255
+
256
+ actions: Any
257
+ utilities: Any
258
+ indices: Any = field(init=False, default_factory=list)
259
+
260
+ def __post_init__(self):
261
+ super().__post_init__()
262
+
263
+ for i, actions in enumerate(self.actions):
264
+ self.indices.append(dict(zip(actions, count())))
265
+
266
+ def _verify(self, *, utilities_shape=None, **kwargs):
267
+ super()._verify(**kwargs)
268
+
269
+ if utilities_shape is None:
270
+ utilities_shape = (*map(len, self.actions), self.player_count)
271
+
272
+ if self.utilities.shape != utilities_shape:
273
+ raise ValueError('utilities do not match actions and players')
274
+
275
+ @property
276
+ def player_count(self):
277
+ return len(self.actions)
278
+
279
+ def utility(self, player, *opponent_strategies):
280
+ raise NotImplementedError
281
+
282
+ def value(self, player, *strategies):
283
+ raise NotImplementedError
284
+
285
+ def correlated_value(self, player, *strategies):
286
+ raise NotImplementedError
287
+
288
+ def best_response(self, player, *opponent_strategies):
289
+ raise NotImplementedError
290
+
291
+ def serialize(self):
292
+ return {'actions': self.actions, 'utilities': self.utilities.tolist()}
293
+
294
+
295
+ @dataclass
296
+ class TwoPlayerNormalFormGame(TwoPlayerGame, NormalFormGame):
297
+ """Two-player (2p) normal-form game."""
298
+
299
+ @property
300
+ def row_actions(self):
301
+ return self.actions[0]
302
+
303
+ @property
304
+ def column_actions(self):
305
+ return self.actions[1]
306
+
307
+ @property
308
+ def row_indices(self):
309
+ return self.indices[0]
310
+
311
+ @property
312
+ def column_indices(self):
313
+ return self.indices[1]
314
+
315
+ @property
316
+ def row_utilities(self):
317
+ return self.utilities[:, :, 0]
318
+
319
+ @property
320
+ def column_utilities(self):
321
+ return self.utilities[:, :, 1]
322
+
323
+ def row_best_response(self, column_strategy):
324
+ strategy = np.zeros(len(self.row_actions))
325
+ utility = self.row_utility(column_strategy)
326
+ index = utility.argmax()
327
+ strategy[index] = 1
328
+
329
+ return strategy, utility[index]
330
+
331
+ def column_best_response(self, row_strategy):
332
+ strategy = np.zeros(len(self.column_actions))
333
+ utility = self.column_utility(row_strategy)
334
+ index = utility.argmax()
335
+ strategy[index] = 1
336
+
337
+ return strategy, utility[index]
338
+
339
+
340
+ @dataclass
341
+ class TwoPlayerZeroSumNormalFormGame(
342
+ TwoPlayerZeroSumGame,
343
+ TwoPlayerNormalFormGame,
344
+ ):
345
+ """Two-player zero-sum (2p0s) normal-form game.
346
+
347
+ The utility matrix is from the viewpoint of the row player.
348
+ """
349
+
350
+ def _verify(self, **kwargs):
351
+ super()._verify(
352
+ **kwargs,
353
+ utilities_shape=(len(self.row_actions), len(self.column_actions)),
354
+ )
355
+
356
+ @property
357
+ def row_utilities(self):
358
+ return self.utilities
359
+
360
+
361
+ @dataclass
362
+ class ExtensiveFormGame(Serializable, Game):
363
+ """Extensive-form game (EFG).
364
+
365
+ Each player optimizes over the sequence-form polytope.
366
+ """
367
+
368
+ @classmethod
369
+ def deserialize(cls, raw_data):
370
+ raise NotImplementedError
371
+
372
+ tree_form_sequential_decision_processes: Any
373
+ utilities: Any
374
+
375
+ @property
376
+ def player_count(self):
377
+ return len(self.tree_form_sequential_decision_processes)
378
+
379
+ def utility(self, player, *opponent_strategies):
380
+ raise NotImplementedError
381
+
382
+ def value(self, player, *strategies):
383
+ raise NotImplementedError
384
+
385
+ def correlated_value(self, player, *strategies):
386
+ raise NotImplementedError
387
+
388
+ def best_response(self, player, *opponent_strategies):
389
+ raise NotImplementedError
390
+
391
+ def serialize(self):
392
+ raise NotImplementedError
393
+
394
+
395
+ @dataclass
396
+ class TwoPlayerExtensiveFormGame(TwoPlayerGame, ExtensiveFormGame):
397
+ """Two-player (2p) extensive-form game (EFG)."""
398
+
399
+ @classmethod
400
+ def deserialize(cls, raw_data):
401
+ tfsdps = TreeFormSequentialDecisionProcess.deserialize_all(
402
+ raw_data['tree_form_sequential_decision_processes'],
403
+ )
404
+ shape = tuple(len(tfsdp.sequences) for tfsdp in tfsdps)
405
+ row_utilities = lil_array(shape)
406
+ column_utilities = lil_array(shape)
407
+
408
+ for raw_utility in raw_data['utilities']:
409
+ if len(raw_utility['values']) != 2:
410
+ raise ValueError('utility is not of a 2-player game')
411
+
412
+ indices = []
413
+
414
+ for tfsdp, sequence in zip(tfsdps, raw_utility['sequences']):
415
+ sequence = tuple(sequence)
416
+
417
+ indices.append(tfsdp.indices[sequence])
418
+
419
+ indices = tuple(indices)
420
+ row_utilities[indices] = raw_utility['values'][0]
421
+ column_utilities[indices] = raw_utility['values'][1]
422
+
423
+ return cls(tfsdps, [row_utilities.tocsr(), column_utilities.tocsr()])
424
+
425
+ def _verify(self, **kwargs):
426
+ super()._verify(**kwargs)
427
+
428
+ if not (
429
+ self.row_utilities.shape
430
+ == self.column_utilities.shape
431
+ == (len(self.row_sequences), len(self.column_sequences))
432
+ ):
433
+ raise ValueError('utilities do not match sequences')
434
+
435
+ @property
436
+ def row_tree_form_sequential_decision_process(self):
437
+ return self.tree_form_sequential_decision_processes[0]
438
+
439
+ @property
440
+ def column_tree_form_sequential_decision_process(self):
441
+ return self.tree_form_sequential_decision_processes[1]
442
+
443
+ @property
444
+ def row_sequences(self):
445
+ return self.row_tree_form_sequential_decision_process.sequences
446
+
447
+ @property
448
+ def column_sequences(self):
449
+ return self.column_tree_form_sequential_decision_process.sequences
450
+
451
+ @property
452
+ def row_indices(self):
453
+ return self.row_tree_form_sequential_decision_process.indices
454
+
455
+ @property
456
+ def column_indices(self):
457
+ return self.column_tree_form_sequential_decision_process.indices
458
+
459
+ @property
460
+ def row_utilities(self):
461
+ return self.utilities[0]
462
+
463
+ @property
464
+ def column_utilities(self):
465
+ return self.utilities[1]
466
+
467
+ def row_best_response(self, column_strategy):
468
+ best_response = (
469
+ self
470
+ .row_tree_form_sequential_decision_process
471
+ .sequence_form_best_response(self.row_utility(column_strategy))
472
+ )
473
+
474
+ return best_response
475
+
476
+ def column_best_response(self, row_strategy):
477
+ best_response = (
478
+ self
479
+ .column_tree_form_sequential_decision_process
480
+ .sequence_form_best_response(self.column_utility(row_strategy))
481
+ )
482
+
483
+ return best_response
484
+
485
+ def serialize(self):
486
+ tfsdps = self.tree_form_sequential_decision_processes
487
+ raw_tfsdps = [tfsdp.to_list() for tfsdp in tfsdps]
488
+ raw_utilities = []
489
+ abs_utility_sums = abs(self.row_utilities) + abs(self.column_utilities)
490
+
491
+ for indices in zip(*abs_utility_sums.nonzero()):
492
+ sequences = []
493
+
494
+ for tfsdp, index in zip(tfsdps, indices):
495
+ sequences.append(tfsdp.sequences[index])
496
+
497
+ row_value = self.row_utilities[indices].item()
498
+ column_value = self.column_utilities[indices].item()
499
+ values = row_value, column_value
500
+
501
+ raw_utilities.append({'sequences': sequences, 'values': values})
502
+
503
+ return {
504
+ 'tree_form_sequential_decision_processes': raw_tfsdps,
505
+ 'utilities': raw_utilities,
506
+ }
507
+
508
+
509
+ @dataclass
510
+ class TwoPlayerZeroSumExtensiveFormGame(
511
+ TwoPlayerZeroSumGame,
512
+ TwoPlayerExtensiveFormGame,
513
+ ):
514
+ """Two-player zero-sum (2p0s) extensive-form game (EFG).
515
+
516
+ The utility matrix is from the viewpoint of the row player.
517
+ """
518
+
519
+ @classmethod
520
+ def deserialize(cls, raw_data):
521
+ tfsdps = TreeFormSequentialDecisionProcess.deserialize_all(
522
+ raw_data['tree_form_sequential_decision_processes'],
523
+ )
524
+ shape = tuple(len(tfsdp.sequences) for tfsdp in tfsdps)
525
+ utilities = lil_array(shape)
526
+
527
+ for raw_utility in raw_data['utilities']:
528
+ indices = []
529
+
530
+ for tfsdp, sequence in zip(tfsdps, raw_utility['sequences']):
531
+ sequence = tuple(sequence)
532
+
533
+ indices.append(tfsdp.indices[sequence])
534
+
535
+ indices = tuple(indices)
536
+ utilities[indices] = raw_utility['value']
537
+
538
+ return cls(tfsdps, utilities.tocsr())
539
+
540
+ @property
541
+ def row_utilities(self):
542
+ return self.utilities
543
+
544
+ def serialize(self):
545
+ tfsdps = self.tree_form_sequential_decision_processes
546
+ raw_tfsdps = [tfsdp.to_list() for tfsdp in tfsdps]
547
+ raw_utilities = []
548
+
549
+ for indices in zip(*self.utilities.nonzero()):
550
+ sequences = []
551
+
552
+ for tfsdp, index in zip(tfsdps, indices):
553
+ sequences.append(tfsdp.sequences[index])
554
+
555
+ value = self.utilities[indices].item()
556
+
557
+ raw_utilities.append({'sequences': sequences, 'value': value})
558
+
559
+ return {
560
+ 'tree_form_sequential_decision_processes': raw_tfsdps,
561
+ 'utilities': raw_utilities,
562
+ }
563
+
564
+
565
+ @dataclass
566
+ class SymmetrizedGame(Game):
567
+ """Symmetrized game.
568
+
569
+ Each player optimizes over the cartesian product of probability
570
+ simplices.
571
+ """
572
+
573
+ game: Any
574
+
575
+ @property
576
+ def player_count(self):
577
+ return self.game.player_count
578
+
579
+ def dimension(self, player):
580
+ return sum(self.game.dimensions)
581
+
582
+ def utility(self, player, *opponent_strategies):
583
+ strategies = []
584
+
585
+ for opponent_strategy in opponent_strategies:
586
+ strategies.append(split(opponent_strategy, self.game.dimensions))
587
+
588
+ strategies.insert(player, None)
589
+
590
+ utilities = [0] * self.player_count
591
+
592
+ for permutation in permutations(range(self.player_count)):
593
+ utilities[permutation.index(player)] += self.game.utility(
594
+ permutation.index(player),
595
+ *(
596
+ strategies[permutation[i]][i]
597
+ for i in range(self.player_count)
598
+ if permutation[i] != player
599
+ ),
600
+ )
601
+
602
+ utility = np.concatenate(utilities)
603
+ utility /= factorial(self.player_count)
604
+
605
+ return utility
606
+
607
+ def value(self, player, *strategies):
608
+ strategies = list(strategies)
609
+
610
+ for i in range(self.player_count):
611
+ strategies[i] = split(strategies[i], self.game.dimensions)
612
+
613
+ value = 0
614
+
615
+ for permutation in permutations(range(self.player_count)):
616
+ value += self.game.value(
617
+ permutation.index(player),
618
+ *(
619
+ strategies[permutation[i]][i]
620
+ for i in range(self.player_count)
621
+ ),
622
+ )
623
+
624
+ value /= factorial(self.player_count)
625
+
626
+ return value
627
+
628
+ def correlated_value(self, player, *strategies):
629
+ raise NotImplementedError
630
+
631
+ def best_response(self, player, *opponent_strategies):
632
+ raise NotImplementedError