noregret 0.0.0.dev1__tar.gz → 0.0.0.dev3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {noregret-0.0.0.dev1 → noregret-0.0.0.dev3}/PKG-INFO +1 -1
- {noregret-0.0.0.dev1 → noregret-0.0.0.dev3}/noregret/games.py +178 -5
- {noregret-0.0.0.dev1 → noregret-0.0.0.dev3}/noregret/regret_minimizers.py +165 -0
- {noregret-0.0.0.dev1 → noregret-0.0.0.dev3}/noregret/utilities.py +6 -1
- {noregret-0.0.0.dev1 → noregret-0.0.0.dev3}/noregret.egg-info/PKG-INFO +1 -1
- {noregret-0.0.0.dev1 → noregret-0.0.0.dev3}/setup.py +1 -1
- {noregret-0.0.0.dev1 → noregret-0.0.0.dev3}/LICENSE +0 -0
- {noregret-0.0.0.dev1 → noregret-0.0.0.dev3}/README.md +0 -0
- {noregret-0.0.0.dev1 → noregret-0.0.0.dev3}/noregret/__init__.py +0 -0
- {noregret-0.0.0.dev1 → noregret-0.0.0.dev3}/noregret.egg-info/SOURCES.txt +0 -0
- {noregret-0.0.0.dev1 → noregret-0.0.0.dev3}/noregret.egg-info/dependency_links.txt +0 -0
- {noregret-0.0.0.dev1 → noregret-0.0.0.dev3}/noregret.egg-info/requires.txt +0 -0
- {noregret-0.0.0.dev1 → noregret-0.0.0.dev3}/noregret.egg-info/top_level.txt +0 -0
- {noregret-0.0.0.dev1 → noregret-0.0.0.dev3}/setup.cfg +0 -0
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
|
+
from collections import defaultdict
|
|
2
3
|
from dataclasses import dataclass
|
|
3
|
-
from functools import partial
|
|
4
|
+
from functools import cache, partial
|
|
4
5
|
from itertools import permutations
|
|
5
6
|
from math import factorial
|
|
6
7
|
from typing import Any
|
|
@@ -73,8 +74,11 @@ class Game(ABC):
|
|
|
73
74
|
|
|
74
75
|
for i, value in enumerate(self.values(*strategies)):
|
|
75
76
|
opponent_strategies = strategies[:i] + strategies[i + 1:]
|
|
76
|
-
_,
|
|
77
|
-
|
|
77
|
+
_, best_response_value = self.best_response(
|
|
78
|
+
i,
|
|
79
|
+
*opponent_strategies,
|
|
80
|
+
)
|
|
81
|
+
gap += best_response_value - value
|
|
78
82
|
|
|
79
83
|
return gap
|
|
80
84
|
|
|
@@ -86,8 +90,11 @@ class Game(ABC):
|
|
|
86
90
|
average_opponent_strategies = (
|
|
87
91
|
average_strategies[:i] + average_strategies[i + 1:]
|
|
88
92
|
)
|
|
89
|
-
_,
|
|
90
|
-
|
|
93
|
+
_, best_response_value = self.best_response(
|
|
94
|
+
i,
|
|
95
|
+
*average_opponent_strategies,
|
|
96
|
+
)
|
|
97
|
+
gap += best_response_value - value
|
|
91
98
|
|
|
92
99
|
return gap
|
|
93
100
|
|
|
@@ -613,3 +620,169 @@ class SymmetrizedGame(Game):
|
|
|
613
620
|
|
|
614
621
|
def best_response(self, player, *opponent_strategies):
|
|
615
622
|
raise NotImplementedError
|
|
623
|
+
|
|
624
|
+
|
|
625
|
+
class ExtensiveFormGame2(ABC):
|
|
626
|
+
"""Extensive-form game (EFG)."""
|
|
627
|
+
|
|
628
|
+
@dataclass(frozen=True)
|
|
629
|
+
class State:
|
|
630
|
+
"""State of an extensive-form game."""
|
|
631
|
+
|
|
632
|
+
@property
|
|
633
|
+
@abstractmethod
|
|
634
|
+
def utilities(self):
|
|
635
|
+
pass
|
|
636
|
+
|
|
637
|
+
@property
|
|
638
|
+
@abstractmethod
|
|
639
|
+
def chance_action_probabilities(self):
|
|
640
|
+
pass
|
|
641
|
+
|
|
642
|
+
@property
|
|
643
|
+
@abstractmethod
|
|
644
|
+
def actions(self):
|
|
645
|
+
pass
|
|
646
|
+
|
|
647
|
+
@property
|
|
648
|
+
@abstractmethod
|
|
649
|
+
def infoset(self):
|
|
650
|
+
pass
|
|
651
|
+
|
|
652
|
+
@property
|
|
653
|
+
@abstractmethod
|
|
654
|
+
def player(self):
|
|
655
|
+
pass
|
|
656
|
+
|
|
657
|
+
@abstractmethod
|
|
658
|
+
def is_terminal(self):
|
|
659
|
+
pass
|
|
660
|
+
|
|
661
|
+
@abstractmethod
|
|
662
|
+
def is_chance(self):
|
|
663
|
+
pass
|
|
664
|
+
|
|
665
|
+
@abstractmethod
|
|
666
|
+
def utility(self, player):
|
|
667
|
+
pass
|
|
668
|
+
|
|
669
|
+
@abstractmethod
|
|
670
|
+
def apply(self, action):
|
|
671
|
+
pass
|
|
672
|
+
|
|
673
|
+
@property
|
|
674
|
+
@abstractmethod
|
|
675
|
+
def players(self):
|
|
676
|
+
pass
|
|
677
|
+
|
|
678
|
+
@property
|
|
679
|
+
@abstractmethod
|
|
680
|
+
def initial_state(self):
|
|
681
|
+
pass
|
|
682
|
+
|
|
683
|
+
def values(self, strategy_profile, state=None):
|
|
684
|
+
if state is None:
|
|
685
|
+
values = self.values(strategy_profile, self.initial_state)
|
|
686
|
+
elif state.is_terminal():
|
|
687
|
+
values = state.utilities
|
|
688
|
+
else:
|
|
689
|
+
if state.is_chance():
|
|
690
|
+
actions, probabilities = zip(
|
|
691
|
+
*state.chance_action_probabilities,
|
|
692
|
+
)
|
|
693
|
+
else:
|
|
694
|
+
actions = state.actions
|
|
695
|
+
probabilities = strategy_profile(state)
|
|
696
|
+
|
|
697
|
+
values = 0
|
|
698
|
+
|
|
699
|
+
for action, probability in zip(actions, probabilities):
|
|
700
|
+
values += (
|
|
701
|
+
probability
|
|
702
|
+
* self.values(strategy_profile, state.apply(action))
|
|
703
|
+
)
|
|
704
|
+
|
|
705
|
+
return values
|
|
706
|
+
|
|
707
|
+
def best_response_value(self, player, strategy_profile):
|
|
708
|
+
states = defaultdict(list)
|
|
709
|
+
counterfactual_reach_probabilities = {}
|
|
710
|
+
|
|
711
|
+
def dfs(state, counterfactual_reach_probability):
|
|
712
|
+
counterfactual_reach_probabilities[state] = (
|
|
713
|
+
counterfactual_reach_probability
|
|
714
|
+
)
|
|
715
|
+
|
|
716
|
+
if state.is_terminal():
|
|
717
|
+
return
|
|
718
|
+
|
|
719
|
+
if not state.is_chance():
|
|
720
|
+
states[state.infoset].append(state)
|
|
721
|
+
|
|
722
|
+
if state.is_chance() or state.player != player:
|
|
723
|
+
if state.is_chance():
|
|
724
|
+
actions, probabilities = zip(
|
|
725
|
+
*state.chance_action_probabilities,
|
|
726
|
+
)
|
|
727
|
+
else:
|
|
728
|
+
actions = state.actions
|
|
729
|
+
probabilities = strategy_profile(state)
|
|
730
|
+
|
|
731
|
+
for action, probability in zip(actions, probabilities):
|
|
732
|
+
dfs(
|
|
733
|
+
state.apply(action),
|
|
734
|
+
probability * counterfactual_reach_probability,
|
|
735
|
+
)
|
|
736
|
+
else:
|
|
737
|
+
for action in state.actions:
|
|
738
|
+
dfs(state.apply(action), counterfactual_reach_probability)
|
|
739
|
+
|
|
740
|
+
dfs(self.initial_state, 1)
|
|
741
|
+
|
|
742
|
+
@cache
|
|
743
|
+
def solve(state):
|
|
744
|
+
if state.is_terminal():
|
|
745
|
+
value = state.utility(player)
|
|
746
|
+
elif state.is_chance() or state.player != player:
|
|
747
|
+
if state.is_chance():
|
|
748
|
+
actions, probabilities = zip(
|
|
749
|
+
*state.chance_action_probabilities,
|
|
750
|
+
)
|
|
751
|
+
else:
|
|
752
|
+
actions = state.actions
|
|
753
|
+
probabilities = strategy_profile(state)
|
|
754
|
+
|
|
755
|
+
value = 0
|
|
756
|
+
|
|
757
|
+
for action, probability in zip(actions, probabilities):
|
|
758
|
+
value += probability * solve(state.apply(action))
|
|
759
|
+
else:
|
|
760
|
+
value = solve2(state.infoset)
|
|
761
|
+
|
|
762
|
+
return value
|
|
763
|
+
|
|
764
|
+
@cache
|
|
765
|
+
def solve2(infoset):
|
|
766
|
+
values = defaultdict(int)
|
|
767
|
+
|
|
768
|
+
for state in states[infoset]:
|
|
769
|
+
weight = counterfactual_reach_probabilities[state]
|
|
770
|
+
|
|
771
|
+
for i, action in enumerate(state.actions):
|
|
772
|
+
values[i] += weight * solve(state.apply(action))
|
|
773
|
+
|
|
774
|
+
return max(values.values())
|
|
775
|
+
|
|
776
|
+
return solve(self.initial_state)
|
|
777
|
+
|
|
778
|
+
def nash_gap(self, strategy_profile):
|
|
779
|
+
gap = 0
|
|
780
|
+
|
|
781
|
+
for player, value in zip(self.players, self.values(strategy_profile)):
|
|
782
|
+
best_response_value = self.best_response_value(
|
|
783
|
+
player,
|
|
784
|
+
strategy_profile,
|
|
785
|
+
)
|
|
786
|
+
gap += best_response_value - value
|
|
787
|
+
|
|
788
|
+
return gap
|
|
@@ -10,6 +10,7 @@ import numpy as np
|
|
|
10
10
|
|
|
11
11
|
from noregret.utilities import (
|
|
12
12
|
euclidean_projection_on_probability_simplex,
|
|
13
|
+
sample,
|
|
13
14
|
split,
|
|
14
15
|
stationary_distribution,
|
|
15
16
|
)
|
|
@@ -52,6 +53,9 @@ class RegretMinimizer(ABC):
|
|
|
52
53
|
def next_strategy(self, prediction=False):
|
|
53
54
|
pass
|
|
54
55
|
|
|
56
|
+
def undo_next_strategy(self):
|
|
57
|
+
self.strategies.pop()
|
|
58
|
+
|
|
55
59
|
def observe_utility(self, utility):
|
|
56
60
|
if len(self.strategies) == len(self.utilities):
|
|
57
61
|
raise ValueError('next strategy not yet outputted')
|
|
@@ -390,6 +394,9 @@ class BlumMansour(ProbabilitySimplexSwapRegretMinimizer):
|
|
|
390
394
|
|
|
391
395
|
return strategy
|
|
392
396
|
|
|
397
|
+
def undo_next_strategy(self):
|
|
398
|
+
raise NotImplementedError
|
|
399
|
+
|
|
393
400
|
def observe_utility(self, utility):
|
|
394
401
|
super().observe_utility(utility)
|
|
395
402
|
|
|
@@ -471,6 +478,9 @@ class CounterfactualRegretMinimization(SequenceFormPolytopeRegretMinimizer):
|
|
|
471
478
|
|
|
472
479
|
return strategy
|
|
473
480
|
|
|
481
|
+
def undo_next_strategy(self):
|
|
482
|
+
raise NotImplementedError
|
|
483
|
+
|
|
474
484
|
def observe_utility(self, utility):
|
|
475
485
|
super().observe_utility(utility)
|
|
476
486
|
|
|
@@ -568,6 +578,9 @@ class CartesianProductRegretCircuit(RegretCircuit):
|
|
|
568
578
|
|
|
569
579
|
return strategy
|
|
570
580
|
|
|
581
|
+
def undo_next_strategy(self):
|
|
582
|
+
raise NotImplementedError
|
|
583
|
+
|
|
571
584
|
def observe_utility(self, utility):
|
|
572
585
|
super().observe_utility(utility)
|
|
573
586
|
|
|
@@ -634,6 +647,9 @@ class ConvexHullRegretCircuit(RegretCircuit):
|
|
|
634
647
|
|
|
635
648
|
return strategy
|
|
636
649
|
|
|
650
|
+
def undo_next_strategy(self):
|
|
651
|
+
raise NotImplementedError
|
|
652
|
+
|
|
637
653
|
def observe_utility(self, utility):
|
|
638
654
|
super().observe_utility(utility)
|
|
639
655
|
|
|
@@ -643,3 +659,152 @@ class ConvexHullRegretCircuit(RegretCircuit):
|
|
|
643
659
|
self.previous_outputs = self.outputs.copy()
|
|
644
660
|
|
|
645
661
|
self.mixing_regret_minimizer.observe_utility(self.outputs @ utility)
|
|
662
|
+
|
|
663
|
+
|
|
664
|
+
@dataclass
|
|
665
|
+
class StochasticRegretMinimization(ABC):
|
|
666
|
+
"""Stochastic regret minimization."""
|
|
667
|
+
|
|
668
|
+
extensive_form_game: Any
|
|
669
|
+
|
|
670
|
+
@property
|
|
671
|
+
def average_strategy_profile(self):
|
|
672
|
+
return lambda state: (
|
|
673
|
+
self._local_regret_minimizer(state).average_strategy
|
|
674
|
+
)
|
|
675
|
+
|
|
676
|
+
@abstractmethod
|
|
677
|
+
def _local_regret_minimizer(self, state):
|
|
678
|
+
pass
|
|
679
|
+
|
|
680
|
+
def external_sampling(self):
|
|
681
|
+
for player in self.extensive_form_game.players:
|
|
682
|
+
self._external_sampling(
|
|
683
|
+
player,
|
|
684
|
+
self.extensive_form_game.initial_state,
|
|
685
|
+
)
|
|
686
|
+
|
|
687
|
+
def _external_sampling(self, player, state):
|
|
688
|
+
if state.is_terminal():
|
|
689
|
+
utility = state.utility(player)
|
|
690
|
+
elif state.is_chance():
|
|
691
|
+
actions, probabilities = zip(*state.chance_action_probabilities)
|
|
692
|
+
action = sample(actions, probabilities)
|
|
693
|
+
utility = self._external_sampling(player, state.apply(action))
|
|
694
|
+
else:
|
|
695
|
+
local_regret_minimizer = self._local_regret_minimizer(state)
|
|
696
|
+
actions = state.actions
|
|
697
|
+
probabilities = local_regret_minimizer.next_strategy()
|
|
698
|
+
|
|
699
|
+
if state.player == player:
|
|
700
|
+
utilities = list(
|
|
701
|
+
map(
|
|
702
|
+
partial(self._external_sampling, player),
|
|
703
|
+
map(state.apply, actions),
|
|
704
|
+
),
|
|
705
|
+
)
|
|
706
|
+
utility = utilities @ probabilities
|
|
707
|
+
|
|
708
|
+
local_regret_minimizer.observe_utility(utilities)
|
|
709
|
+
else:
|
|
710
|
+
action = sample(actions, probabilities)
|
|
711
|
+
utility = self._external_sampling(player, state.apply(action))
|
|
712
|
+
|
|
713
|
+
local_regret_minimizer.undo_next_strategy()
|
|
714
|
+
|
|
715
|
+
return utility
|
|
716
|
+
|
|
717
|
+
def outcome_sampling(self, reference_strategy_profile):
|
|
718
|
+
for player in self.extensive_form_game.players:
|
|
719
|
+
self._outcome_sampling(
|
|
720
|
+
reference_strategy_profile,
|
|
721
|
+
player,
|
|
722
|
+
self.extensive_form_game.initial_state,
|
|
723
|
+
1,
|
|
724
|
+
)
|
|
725
|
+
|
|
726
|
+
def _outcome_sampling(
|
|
727
|
+
self,
|
|
728
|
+
reference_strategy_profile,
|
|
729
|
+
player,
|
|
730
|
+
state,
|
|
731
|
+
reference_reach_probability,
|
|
732
|
+
):
|
|
733
|
+
if state.is_terminal():
|
|
734
|
+
utility = state.utility(player) / reference_reach_probability
|
|
735
|
+
elif state.is_chance():
|
|
736
|
+
actions, probabilities = zip(*state.chance_action_probabilities)
|
|
737
|
+
action = sample(actions, probabilities)
|
|
738
|
+
utility = self._outcome_sampling(
|
|
739
|
+
reference_strategy_profile,
|
|
740
|
+
player,
|
|
741
|
+
state.apply(action),
|
|
742
|
+
reference_reach_probability,
|
|
743
|
+
)
|
|
744
|
+
else:
|
|
745
|
+
local_regret_minimizer = self._local_regret_minimizer(state)
|
|
746
|
+
actions = state.actions
|
|
747
|
+
|
|
748
|
+
if state.player == player:
|
|
749
|
+
probabilities = reference_strategy_profile(state)
|
|
750
|
+
index = sample(range(len(actions)), probabilities)
|
|
751
|
+
action = actions[index]
|
|
752
|
+
probability = probabilities[index]
|
|
753
|
+
utility = (
|
|
754
|
+
probability
|
|
755
|
+
* self._outcome_sampling(
|
|
756
|
+
reference_strategy_profile,
|
|
757
|
+
player,
|
|
758
|
+
state.apply(action),
|
|
759
|
+
probability * reference_reach_probability,
|
|
760
|
+
)
|
|
761
|
+
)
|
|
762
|
+
utilities = np.zeros(len(actions))
|
|
763
|
+
utilities[index] = utility
|
|
764
|
+
|
|
765
|
+
local_regret_minimizer.next_strategy()
|
|
766
|
+
local_regret_minimizer.observe_utility(utilities)
|
|
767
|
+
else:
|
|
768
|
+
probabilities = local_regret_minimizer.next_strategy()
|
|
769
|
+
action = sample(actions, probabilities)
|
|
770
|
+
utility = self._outcome_sampling(
|
|
771
|
+
reference_strategy_profile,
|
|
772
|
+
player,
|
|
773
|
+
state.apply(action),
|
|
774
|
+
reference_reach_probability,
|
|
775
|
+
)
|
|
776
|
+
|
|
777
|
+
local_regret_minimizer.undo_next_strategy()
|
|
778
|
+
|
|
779
|
+
return utility
|
|
780
|
+
|
|
781
|
+
|
|
782
|
+
@dataclass
|
|
783
|
+
class MonteCarloCounterfactualRegretMinimization(StochasticRegretMinimization):
|
|
784
|
+
"""Monte Carlo Counterfactual regret minimization (MCCFR)."""
|
|
785
|
+
|
|
786
|
+
regret_minimizer_factory: Any = partial(
|
|
787
|
+
RegretMatching,
|
|
788
|
+
is_time_symmetric=True,
|
|
789
|
+
)
|
|
790
|
+
_: KW_ONLY
|
|
791
|
+
local_regret_minimizers: Any = field(init=False, default_factory=dict)
|
|
792
|
+
|
|
793
|
+
@property
|
|
794
|
+
def iteration_count(self):
|
|
795
|
+
iteration_count = 0
|
|
796
|
+
|
|
797
|
+
for R in self.local_regret_minimizers.values():
|
|
798
|
+
iteration_count += R.iteration_count
|
|
799
|
+
|
|
800
|
+
return iteration_count
|
|
801
|
+
|
|
802
|
+
def _local_regret_minimizer(self, state):
|
|
803
|
+
if state.infoset in self.local_regret_minimizers:
|
|
804
|
+
R = self.local_regret_minimizers[state.infoset]
|
|
805
|
+
else:
|
|
806
|
+
action_count = len(state.actions)
|
|
807
|
+
R = self.regret_minimizer_factory(action_count)
|
|
808
|
+
self.local_regret_minimizers[state.infoset] = R
|
|
809
|
+
|
|
810
|
+
return R
|
|
@@ -6,6 +6,7 @@ from functools import partial
|
|
|
6
6
|
from importlib import import_module
|
|
7
7
|
from json import dump, dumps, load, loads
|
|
8
8
|
from math import inf
|
|
9
|
+
from random import choices
|
|
9
10
|
from typing import Any
|
|
10
11
|
|
|
11
12
|
from ordered_set import OrderedSet
|
|
@@ -47,7 +48,7 @@ def stationary_distribution(stochastic_matrix):
|
|
|
47
48
|
P = stochastic_matrix
|
|
48
49
|
|
|
49
50
|
if not np.allclose(P.sum(1), 1):
|
|
50
|
-
raise ValueError('matrix not stochastic')
|
|
51
|
+
raise ValueError('matrix not left stochastic')
|
|
51
52
|
|
|
52
53
|
eigenvalues, eigenvectors = LA.eig(P.T)
|
|
53
54
|
pi = eigenvectors[:, np.isclose(eigenvalues, 1)][:, 0]
|
|
@@ -96,6 +97,10 @@ def split(values, counts):
|
|
|
96
97
|
return splits
|
|
97
98
|
|
|
98
99
|
|
|
100
|
+
def sample(values, probabilities):
|
|
101
|
+
return choices(values, probabilities)[0]
|
|
102
|
+
|
|
103
|
+
|
|
99
104
|
class Serializable(ABC):
|
|
100
105
|
@classmethod
|
|
101
106
|
@abstractmethod
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|