noregret 0.0.0.dev0__tar.gz → 0.0.0.dev2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {noregret-0.0.0.dev0 → noregret-0.0.0.dev2}/PKG-INFO +1 -1
- {noregret-0.0.0.dev0 → noregret-0.0.0.dev2}/noregret/games.py +184 -28
- {noregret-0.0.0.dev0 → noregret-0.0.0.dev2}/noregret/regret_minimizers.py +166 -1
- {noregret-0.0.0.dev0 → noregret-0.0.0.dev2}/noregret/utilities.py +12 -14
- {noregret-0.0.0.dev0 → noregret-0.0.0.dev2}/noregret.egg-info/PKG-INFO +1 -1
- {noregret-0.0.0.dev0 → noregret-0.0.0.dev2}/setup.py +1 -1
- {noregret-0.0.0.dev0 → noregret-0.0.0.dev2}/LICENSE +0 -0
- {noregret-0.0.0.dev0 → noregret-0.0.0.dev2}/README.md +0 -0
- {noregret-0.0.0.dev0 → noregret-0.0.0.dev2}/noregret/__init__.py +0 -0
- {noregret-0.0.0.dev0 → noregret-0.0.0.dev2}/noregret.egg-info/SOURCES.txt +0 -0
- {noregret-0.0.0.dev0 → noregret-0.0.0.dev2}/noregret.egg-info/dependency_links.txt +0 -0
- {noregret-0.0.0.dev0 → noregret-0.0.0.dev2}/noregret.egg-info/requires.txt +0 -0
- {noregret-0.0.0.dev0 → noregret-0.0.0.dev2}/noregret.egg-info/top_level.txt +0 -0
- {noregret-0.0.0.dev0 → noregret-0.0.0.dev2}/setup.cfg +0 -0
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
|
-
from
|
|
3
|
-
from
|
|
4
|
-
from
|
|
2
|
+
from collections import defaultdict
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from functools import cache, partial
|
|
5
|
+
from itertools import permutations
|
|
5
6
|
from math import factorial
|
|
6
7
|
from typing import Any
|
|
7
8
|
|
|
9
|
+
from ordered_set import OrderedSet
|
|
8
10
|
from scipy.sparse import lil_array
|
|
9
11
|
import numpy as np
|
|
10
12
|
|
|
@@ -72,8 +74,11 @@ class Game(ABC):
|
|
|
72
74
|
|
|
73
75
|
for i, value in enumerate(self.values(*strategies)):
|
|
74
76
|
opponent_strategies = strategies[:i] + strategies[i + 1:]
|
|
75
|
-
_,
|
|
76
|
-
|
|
77
|
+
_, best_response_value = self.best_response(
|
|
78
|
+
i,
|
|
79
|
+
*opponent_strategies,
|
|
80
|
+
)
|
|
81
|
+
gap += best_response_value - value
|
|
77
82
|
|
|
78
83
|
return gap
|
|
79
84
|
|
|
@@ -85,8 +90,11 @@ class Game(ABC):
|
|
|
85
90
|
average_opponent_strategies = (
|
|
86
91
|
average_strategies[:i] + average_strategies[i + 1:]
|
|
87
92
|
)
|
|
88
|
-
_,
|
|
89
|
-
|
|
93
|
+
_, best_response_value = self.best_response(
|
|
94
|
+
i,
|
|
95
|
+
*average_opponent_strategies,
|
|
96
|
+
)
|
|
97
|
+
gap += best_response_value - value
|
|
90
98
|
|
|
91
99
|
return gap
|
|
92
100
|
|
|
@@ -255,13 +263,11 @@ class NormalFormGame(Serializable, Game):
|
|
|
255
263
|
|
|
256
264
|
actions: Any
|
|
257
265
|
utilities: Any
|
|
258
|
-
indices: Any = field(init=False, default_factory=list)
|
|
259
266
|
|
|
260
267
|
def __post_init__(self):
|
|
261
268
|
super().__post_init__()
|
|
262
269
|
|
|
263
|
-
|
|
264
|
-
self.indices.append(dict(zip(actions, count())))
|
|
270
|
+
self.actions = tuple(map(OrderedSet, self.actions))
|
|
265
271
|
|
|
266
272
|
def _verify(self, *, utilities_shape=None, **kwargs):
|
|
267
273
|
super()._verify(**kwargs)
|
|
@@ -304,14 +310,6 @@ class TwoPlayerNormalFormGame(TwoPlayerGame, NormalFormGame):
|
|
|
304
310
|
def column_actions(self):
|
|
305
311
|
return self.actions[1]
|
|
306
312
|
|
|
307
|
-
@property
|
|
308
|
-
def row_indices(self):
|
|
309
|
-
return self.indices[0]
|
|
310
|
-
|
|
311
|
-
@property
|
|
312
|
-
def column_indices(self):
|
|
313
|
-
return self.indices[1]
|
|
314
|
-
|
|
315
313
|
@property
|
|
316
314
|
def row_utilities(self):
|
|
317
315
|
return self.utilities[:, :, 0]
|
|
@@ -414,7 +412,7 @@ class TwoPlayerExtensiveFormGame(TwoPlayerGame, ExtensiveFormGame):
|
|
|
414
412
|
for tfsdp, sequence in zip(tfsdps, raw_utility['sequences']):
|
|
415
413
|
sequence = tuple(sequence)
|
|
416
414
|
|
|
417
|
-
indices.append(tfsdp.
|
|
415
|
+
indices.append(tfsdp.sequences.index(sequence))
|
|
418
416
|
|
|
419
417
|
indices = tuple(indices)
|
|
420
418
|
row_utilities[indices] = raw_utility['values'][0]
|
|
@@ -448,14 +446,6 @@ class TwoPlayerExtensiveFormGame(TwoPlayerGame, ExtensiveFormGame):
|
|
|
448
446
|
def column_sequences(self):
|
|
449
447
|
return self.column_tree_form_sequential_decision_process.sequences
|
|
450
448
|
|
|
451
|
-
@property
|
|
452
|
-
def row_indices(self):
|
|
453
|
-
return self.row_tree_form_sequential_decision_process.indices
|
|
454
|
-
|
|
455
|
-
@property
|
|
456
|
-
def column_indices(self):
|
|
457
|
-
return self.column_tree_form_sequential_decision_process.indices
|
|
458
|
-
|
|
459
449
|
@property
|
|
460
450
|
def row_utilities(self):
|
|
461
451
|
return self.utilities[0]
|
|
@@ -530,7 +520,7 @@ class TwoPlayerZeroSumExtensiveFormGame(
|
|
|
530
520
|
for tfsdp, sequence in zip(tfsdps, raw_utility['sequences']):
|
|
531
521
|
sequence = tuple(sequence)
|
|
532
522
|
|
|
533
|
-
indices.append(tfsdp.
|
|
523
|
+
indices.append(tfsdp.sequences.index(sequence))
|
|
534
524
|
|
|
535
525
|
indices = tuple(indices)
|
|
536
526
|
utilities[indices] = raw_utility['value']
|
|
@@ -630,3 +620,169 @@ class SymmetrizedGame(Game):
|
|
|
630
620
|
|
|
631
621
|
def best_response(self, player, *opponent_strategies):
|
|
632
622
|
raise NotImplementedError
|
|
623
|
+
|
|
624
|
+
|
|
625
|
+
class ExtensiveFormGame2(ABC):
|
|
626
|
+
"""Extensive-form game (EFG)."""
|
|
627
|
+
|
|
628
|
+
@dataclass(frozen=True)
|
|
629
|
+
class State:
|
|
630
|
+
"""State of an extensive-form game."""
|
|
631
|
+
|
|
632
|
+
@property
|
|
633
|
+
@abstractmethod
|
|
634
|
+
def utilities(self):
|
|
635
|
+
pass
|
|
636
|
+
|
|
637
|
+
@property
|
|
638
|
+
@abstractmethod
|
|
639
|
+
def chance_action_probabilities(self):
|
|
640
|
+
pass
|
|
641
|
+
|
|
642
|
+
@property
|
|
643
|
+
@abstractmethod
|
|
644
|
+
def actions(self):
|
|
645
|
+
pass
|
|
646
|
+
|
|
647
|
+
@property
|
|
648
|
+
@abstractmethod
|
|
649
|
+
def infoset(self):
|
|
650
|
+
pass
|
|
651
|
+
|
|
652
|
+
@property
|
|
653
|
+
@abstractmethod
|
|
654
|
+
def player(self):
|
|
655
|
+
pass
|
|
656
|
+
|
|
657
|
+
@abstractmethod
|
|
658
|
+
def is_terminal(self):
|
|
659
|
+
pass
|
|
660
|
+
|
|
661
|
+
@abstractmethod
|
|
662
|
+
def is_chance(self):
|
|
663
|
+
pass
|
|
664
|
+
|
|
665
|
+
@abstractmethod
|
|
666
|
+
def utility(self, player):
|
|
667
|
+
pass
|
|
668
|
+
|
|
669
|
+
@abstractmethod
|
|
670
|
+
def apply(self, action):
|
|
671
|
+
pass
|
|
672
|
+
|
|
673
|
+
@property
|
|
674
|
+
@abstractmethod
|
|
675
|
+
def players(self):
|
|
676
|
+
pass
|
|
677
|
+
|
|
678
|
+
@property
|
|
679
|
+
@abstractmethod
|
|
680
|
+
def initial_state(self):
|
|
681
|
+
pass
|
|
682
|
+
|
|
683
|
+
def values(self, strategy_profile, state=None):
|
|
684
|
+
if state is None:
|
|
685
|
+
values = self.values(strategy_profile, self.initial_state)
|
|
686
|
+
elif state.is_terminal():
|
|
687
|
+
values = state.utilities
|
|
688
|
+
else:
|
|
689
|
+
if state.is_chance():
|
|
690
|
+
actions, probabilities = zip(
|
|
691
|
+
*state.chance_action_probabilities,
|
|
692
|
+
)
|
|
693
|
+
else:
|
|
694
|
+
actions = state.actions
|
|
695
|
+
probabilities = strategy_profile(state)
|
|
696
|
+
|
|
697
|
+
values = 0
|
|
698
|
+
|
|
699
|
+
for action, probability in zip(actions, probabilities):
|
|
700
|
+
values += (
|
|
701
|
+
probability
|
|
702
|
+
* self.values(strategy_profile, state.apply(action))
|
|
703
|
+
)
|
|
704
|
+
|
|
705
|
+
return values
|
|
706
|
+
|
|
707
|
+
def best_response_value(self, player, strategy_profile):
|
|
708
|
+
states = defaultdict(list)
|
|
709
|
+
counterfactual_reach_probabilities = {}
|
|
710
|
+
|
|
711
|
+
def dfs(state, counterfactual_reach_probability):
|
|
712
|
+
counterfactual_reach_probabilities[state] = (
|
|
713
|
+
counterfactual_reach_probability
|
|
714
|
+
)
|
|
715
|
+
|
|
716
|
+
if state.is_terminal():
|
|
717
|
+
return
|
|
718
|
+
|
|
719
|
+
if not state.is_chance():
|
|
720
|
+
states[state.infoset].append(state)
|
|
721
|
+
|
|
722
|
+
if state.is_chance() or state.player != player:
|
|
723
|
+
if state.is_chance():
|
|
724
|
+
actions, probabilities = zip(
|
|
725
|
+
*state.chance_action_probabilities,
|
|
726
|
+
)
|
|
727
|
+
else:
|
|
728
|
+
actions = state.actions
|
|
729
|
+
probabilities = strategy_profile(state)
|
|
730
|
+
|
|
731
|
+
for action, probability in zip(actions, probabilities):
|
|
732
|
+
dfs(
|
|
733
|
+
state.apply(action),
|
|
734
|
+
probability * counterfactual_reach_probability,
|
|
735
|
+
)
|
|
736
|
+
else:
|
|
737
|
+
for action in state.actions:
|
|
738
|
+
dfs(state.apply(action), counterfactual_reach_probability)
|
|
739
|
+
|
|
740
|
+
dfs(self.initial_state, 1)
|
|
741
|
+
|
|
742
|
+
@cache
|
|
743
|
+
def solve(state):
|
|
744
|
+
if state.is_terminal():
|
|
745
|
+
value = state.utility(player)
|
|
746
|
+
elif state.is_chance() or state.player != player:
|
|
747
|
+
if state.is_chance():
|
|
748
|
+
actions, probabilities = zip(
|
|
749
|
+
*state.chance_action_probabilities,
|
|
750
|
+
)
|
|
751
|
+
else:
|
|
752
|
+
actions = state.actions
|
|
753
|
+
probabilities = strategy_profile(state)
|
|
754
|
+
|
|
755
|
+
value = 0
|
|
756
|
+
|
|
757
|
+
for action, probability in zip(actions, probabilities):
|
|
758
|
+
value += probability * solve(state.apply(action))
|
|
759
|
+
else:
|
|
760
|
+
value = solve2(state.infoset)
|
|
761
|
+
|
|
762
|
+
return value
|
|
763
|
+
|
|
764
|
+
@cache
|
|
765
|
+
def solve2(infoset):
|
|
766
|
+
values = defaultdict(int)
|
|
767
|
+
|
|
768
|
+
for state in states[infoset]:
|
|
769
|
+
weight = counterfactual_reach_probabilities[state]
|
|
770
|
+
|
|
771
|
+
for i, action in enumerate(state.actions):
|
|
772
|
+
values[i] += weight * solve(state.apply(action))
|
|
773
|
+
|
|
774
|
+
return max(values.values())
|
|
775
|
+
|
|
776
|
+
return solve(self.initial_state)
|
|
777
|
+
|
|
778
|
+
def nash_gap(self, strategy_profile):
|
|
779
|
+
gap = 0
|
|
780
|
+
|
|
781
|
+
for player, value in zip(self.players, self.values(strategy_profile)):
|
|
782
|
+
best_response_value = self.best_response_value(
|
|
783
|
+
player,
|
|
784
|
+
strategy_profile,
|
|
785
|
+
)
|
|
786
|
+
gap += best_response_value - value
|
|
787
|
+
|
|
788
|
+
return gap
|
|
@@ -10,6 +10,7 @@ import numpy as np
|
|
|
10
10
|
|
|
11
11
|
from noregret.utilities import (
|
|
12
12
|
euclidean_projection_on_probability_simplex,
|
|
13
|
+
sample,
|
|
13
14
|
split,
|
|
14
15
|
stationary_distribution,
|
|
15
16
|
)
|
|
@@ -52,6 +53,9 @@ class RegretMinimizer(ABC):
|
|
|
52
53
|
def next_strategy(self, prediction=False):
|
|
53
54
|
pass
|
|
54
55
|
|
|
56
|
+
def undo_next_strategy(self):
|
|
57
|
+
self.strategies.pop()
|
|
58
|
+
|
|
55
59
|
def observe_utility(self, utility):
|
|
56
60
|
if len(self.strategies) == len(self.utilities):
|
|
57
61
|
raise ValueError('next strategy not yet outputted')
|
|
@@ -384,12 +388,15 @@ class BlumMansour(ProbabilitySimplexSwapRegretMinimizer):
|
|
|
384
388
|
self.previous_strategy[a] * prediction,
|
|
385
389
|
)
|
|
386
390
|
|
|
387
|
-
strategy = stationary_distribution(self.outputs
|
|
391
|
+
strategy = stationary_distribution(self.outputs)
|
|
388
392
|
|
|
389
393
|
self.strategies.append(strategy)
|
|
390
394
|
|
|
391
395
|
return strategy
|
|
392
396
|
|
|
397
|
+
def undo_next_strategy(self):
|
|
398
|
+
raise NotImplementedError
|
|
399
|
+
|
|
393
400
|
def observe_utility(self, utility):
|
|
394
401
|
super().observe_utility(utility)
|
|
395
402
|
|
|
@@ -471,6 +478,9 @@ class CounterfactualRegretMinimization(SequenceFormPolytopeRegretMinimizer):
|
|
|
471
478
|
|
|
472
479
|
return strategy
|
|
473
480
|
|
|
481
|
+
def undo_next_strategy(self):
|
|
482
|
+
raise NotImplementedError
|
|
483
|
+
|
|
474
484
|
def observe_utility(self, utility):
|
|
475
485
|
super().observe_utility(utility)
|
|
476
486
|
|
|
@@ -568,6 +578,9 @@ class CartesianProductRegretCircuit(RegretCircuit):
|
|
|
568
578
|
|
|
569
579
|
return strategy
|
|
570
580
|
|
|
581
|
+
def undo_next_strategy(self):
|
|
582
|
+
raise NotImplementedError
|
|
583
|
+
|
|
571
584
|
def observe_utility(self, utility):
|
|
572
585
|
super().observe_utility(utility)
|
|
573
586
|
|
|
@@ -634,6 +647,9 @@ class ConvexHullRegretCircuit(RegretCircuit):
|
|
|
634
647
|
|
|
635
648
|
return strategy
|
|
636
649
|
|
|
650
|
+
def undo_next_strategy(self):
|
|
651
|
+
raise NotImplementedError
|
|
652
|
+
|
|
637
653
|
def observe_utility(self, utility):
|
|
638
654
|
super().observe_utility(utility)
|
|
639
655
|
|
|
@@ -643,3 +659,152 @@ class ConvexHullRegretCircuit(RegretCircuit):
|
|
|
643
659
|
self.previous_outputs = self.outputs.copy()
|
|
644
660
|
|
|
645
661
|
self.mixing_regret_minimizer.observe_utility(self.outputs @ utility)
|
|
662
|
+
|
|
663
|
+
|
|
664
|
+
@dataclass
|
|
665
|
+
class StochasticRegretMinimization(ABC):
|
|
666
|
+
"""Stochastic regret minimization."""
|
|
667
|
+
|
|
668
|
+
extensive_form_game: Any
|
|
669
|
+
|
|
670
|
+
@property
|
|
671
|
+
def average_strategy_profile(self):
|
|
672
|
+
return lambda state: (
|
|
673
|
+
self._local_regret_minimizer(state).average_strategy
|
|
674
|
+
)
|
|
675
|
+
|
|
676
|
+
@abstractmethod
|
|
677
|
+
def _local_regret_minimizer(self, state):
|
|
678
|
+
pass
|
|
679
|
+
|
|
680
|
+
def external_sampling(self):
|
|
681
|
+
for player in self.extensive_form_game.players:
|
|
682
|
+
self._external_sampling(
|
|
683
|
+
player,
|
|
684
|
+
self.extensive_form_game.initial_state,
|
|
685
|
+
)
|
|
686
|
+
|
|
687
|
+
def _external_sampling(self, player, state):
|
|
688
|
+
if state.is_terminal():
|
|
689
|
+
utility = state.utility(player)
|
|
690
|
+
elif state.is_chance():
|
|
691
|
+
actions, probabilities = zip(*state.chance_action_probabilities)
|
|
692
|
+
action = sample(actions, probabilities)
|
|
693
|
+
utility = self._external_sampling(player, state.apply(action))
|
|
694
|
+
else:
|
|
695
|
+
local_regret_minimizer = self._local_regret_minimizer(state)
|
|
696
|
+
actions = state.actions
|
|
697
|
+
probabilities = local_regret_minimizer.next_strategy()
|
|
698
|
+
|
|
699
|
+
if state.player == player:
|
|
700
|
+
utilities = list(
|
|
701
|
+
map(
|
|
702
|
+
partial(self._external_sampling, player),
|
|
703
|
+
map(state.apply, actions),
|
|
704
|
+
),
|
|
705
|
+
)
|
|
706
|
+
utility = utilities @ probabilities
|
|
707
|
+
|
|
708
|
+
local_regret_minimizer.observe_utility(utilities)
|
|
709
|
+
else:
|
|
710
|
+
action = sample(actions, probabilities)
|
|
711
|
+
utility = self._external_sampling(player, state.apply(action))
|
|
712
|
+
|
|
713
|
+
local_regret_minimizer.undo_next_strategy()
|
|
714
|
+
|
|
715
|
+
return utility
|
|
716
|
+
|
|
717
|
+
def outcome_sampling(self, reference_strategy_profile):
|
|
718
|
+
for player in self.extensive_form_game.players:
|
|
719
|
+
self._outcome_sampling(
|
|
720
|
+
reference_strategy_profile,
|
|
721
|
+
player,
|
|
722
|
+
self.extensive_form_game.initial_state,
|
|
723
|
+
1,
|
|
724
|
+
)
|
|
725
|
+
|
|
726
|
+
def _outcome_sampling(
|
|
727
|
+
self,
|
|
728
|
+
reference_strategy_profile,
|
|
729
|
+
player,
|
|
730
|
+
state,
|
|
731
|
+
reference_reach_probability,
|
|
732
|
+
):
|
|
733
|
+
if state.is_terminal():
|
|
734
|
+
utility = state.utility(player) / reference_reach_probability
|
|
735
|
+
elif state.is_chance():
|
|
736
|
+
actions, probabilities = zip(*state.chance_action_probabilities)
|
|
737
|
+
action = sample(actions, probabilities)
|
|
738
|
+
utility = self._outcome_sampling(
|
|
739
|
+
reference_strategy_profile,
|
|
740
|
+
player,
|
|
741
|
+
state.apply(action),
|
|
742
|
+
reference_reach_probability,
|
|
743
|
+
)
|
|
744
|
+
else:
|
|
745
|
+
local_regret_minimizer = self._local_regret_minimizer(state)
|
|
746
|
+
actions = state.actions
|
|
747
|
+
|
|
748
|
+
if state.player == player:
|
|
749
|
+
probabilities = reference_strategy_profile(state)
|
|
750
|
+
index = sample(range(len(actions)), probabilities)
|
|
751
|
+
action = actions[index]
|
|
752
|
+
probability = probabilities[index]
|
|
753
|
+
utility = (
|
|
754
|
+
probability
|
|
755
|
+
* self._outcome_sampling(
|
|
756
|
+
reference_strategy_profile,
|
|
757
|
+
player,
|
|
758
|
+
state.apply(action),
|
|
759
|
+
probability * reference_reach_probability,
|
|
760
|
+
)
|
|
761
|
+
)
|
|
762
|
+
utilities = np.zeros(len(actions))
|
|
763
|
+
utilities[index] = utility
|
|
764
|
+
|
|
765
|
+
local_regret_minimizer.next_strategy()
|
|
766
|
+
local_regret_minimizer.observe_utility(utilities)
|
|
767
|
+
else:
|
|
768
|
+
probabilities = local_regret_minimizer.next_strategy()
|
|
769
|
+
action = sample(actions, probabilities)
|
|
770
|
+
utility = self._outcome_sampling(
|
|
771
|
+
reference_strategy_profile,
|
|
772
|
+
player,
|
|
773
|
+
state.apply(action),
|
|
774
|
+
reference_reach_probability,
|
|
775
|
+
)
|
|
776
|
+
|
|
777
|
+
local_regret_minimizer.undo_next_strategy()
|
|
778
|
+
|
|
779
|
+
return utility
|
|
780
|
+
|
|
781
|
+
|
|
782
|
+
@dataclass
|
|
783
|
+
class MonteCarloCounterfactualRegretMinimization(StochasticRegretMinimization):
|
|
784
|
+
"""Monte Carlo Counterfactual regret minimization (MCCFR)."""
|
|
785
|
+
|
|
786
|
+
regret_minimizer_factory: Any = partial(
|
|
787
|
+
RegretMatching,
|
|
788
|
+
is_time_symmetric=True,
|
|
789
|
+
)
|
|
790
|
+
_: KW_ONLY
|
|
791
|
+
local_regret_minimizers: Any = field(init=False, default_factory=dict)
|
|
792
|
+
|
|
793
|
+
@property
|
|
794
|
+
def iteration_count(self):
|
|
795
|
+
iteration_count = 0
|
|
796
|
+
|
|
797
|
+
for R in self.local_regret_minimizers.values():
|
|
798
|
+
iteration_count += R.iteration_count
|
|
799
|
+
|
|
800
|
+
return iteration_count
|
|
801
|
+
|
|
802
|
+
def _local_regret_minimizer(self, state):
|
|
803
|
+
if state.infoset in self.local_regret_minimizers:
|
|
804
|
+
R = self.local_regret_minimizers[state.infoset]
|
|
805
|
+
else:
|
|
806
|
+
action_count = len(state.actions)
|
|
807
|
+
R = self.regret_minimizer_factory(action_count)
|
|
808
|
+
self.local_regret_minimizers[state.infoset] = R
|
|
809
|
+
|
|
810
|
+
return R
|
|
@@ -6,6 +6,7 @@ from functools import partial
|
|
|
6
6
|
from importlib import import_module
|
|
7
7
|
from json import dump, dumps, load, loads
|
|
8
8
|
from math import inf
|
|
9
|
+
from random import choices
|
|
9
10
|
from typing import Any
|
|
10
11
|
|
|
11
12
|
from ordered_set import OrderedSet
|
|
@@ -45,11 +46,7 @@ def euclidean_projection_on_probability_simplex(input_):
|
|
|
45
46
|
|
|
46
47
|
def stationary_distribution(stochastic_matrix):
|
|
47
48
|
P = stochastic_matrix
|
|
48
|
-
|
|
49
|
-
if not np.allclose(P.sum(1), 1):
|
|
50
|
-
raise ValueError('matrix not stochastic')
|
|
51
|
-
|
|
52
|
-
eigenvalues, eigenvectors = LA.eig(P.T)
|
|
49
|
+
eigenvalues, eigenvectors = LA.eig(P)
|
|
53
50
|
pi = eigenvectors[:, np.isclose(eigenvalues, 1)][:, 0]
|
|
54
51
|
pi /= pi.sum()
|
|
55
52
|
pi = pi.real
|
|
@@ -96,6 +93,10 @@ def split(values, counts):
|
|
|
96
93
|
return splits
|
|
97
94
|
|
|
98
95
|
|
|
96
|
+
def sample(values, probabilities):
|
|
97
|
+
return choices(values, probabilities)[0]
|
|
98
|
+
|
|
99
|
+
|
|
99
100
|
class Serializable(ABC):
|
|
100
101
|
@classmethod
|
|
101
102
|
@abstractmethod
|
|
@@ -153,7 +154,6 @@ class TreeFormSequentialDecisionProcess(Serializable):
|
|
|
153
154
|
decision_points: Any = field(init=False, default_factory=OrderedSet)
|
|
154
155
|
observation_points: Any = field(init=False, default_factory=OrderedSet)
|
|
155
156
|
sequences: Any = field(init=False, default_factory=OrderedSet)
|
|
156
|
-
indices: Any = field(init=False, default_factory=dict)
|
|
157
157
|
parent_sequences: Any = field(init=False, default_factory=dict)
|
|
158
158
|
actions: Any = field(
|
|
159
159
|
init=False,
|
|
@@ -203,8 +203,6 @@ class TreeFormSequentialDecisionProcess(Serializable):
|
|
|
203
203
|
if is_sequence:
|
|
204
204
|
self.sequences.add(parent_edge)
|
|
205
205
|
|
|
206
|
-
self.indices[parent_edge] = len(self.indices)
|
|
207
|
-
|
|
208
206
|
self.parent_sequences[p] = parent_sequence
|
|
209
207
|
|
|
210
208
|
def behavioral_uniform_strategy(self):
|
|
@@ -227,7 +225,7 @@ class TreeFormSequentialDecisionProcess(Serializable):
|
|
|
227
225
|
|
|
228
226
|
for i, a in enumerate(self.actions[p]):
|
|
229
227
|
value = (
|
|
230
|
-
utility[self.
|
|
228
|
+
utility[self.sequences.index((p, a))]
|
|
231
229
|
+ V[self.transitions[p, a]]
|
|
232
230
|
)
|
|
233
231
|
|
|
@@ -250,15 +248,15 @@ class TreeFormSequentialDecisionProcess(Serializable):
|
|
|
250
248
|
|
|
251
249
|
def behavioral_to_sequence_form(self, behavioral_strategy):
|
|
252
250
|
strategy = np.zeros(len(self.sequences))
|
|
253
|
-
strategy[self.
|
|
251
|
+
strategy[self.sequences.index(())] = 1
|
|
254
252
|
|
|
255
253
|
for j in self.decision_points:
|
|
256
254
|
p_j = self.parent_sequences[j]
|
|
257
255
|
|
|
258
256
|
for i, a in enumerate(self.actions[j]):
|
|
259
|
-
strategy[self.
|
|
257
|
+
strategy[self.sequences.index((j, a))] = (
|
|
260
258
|
behavioral_strategy[j][i]
|
|
261
|
-
* strategy[self.
|
|
259
|
+
* strategy[self.sequences.index(p_j)]
|
|
262
260
|
)
|
|
263
261
|
|
|
264
262
|
return strategy
|
|
@@ -273,7 +271,7 @@ class TreeFormSequentialDecisionProcess(Serializable):
|
|
|
273
271
|
V[p] += (
|
|
274
272
|
behavioral_strategy[p][i]
|
|
275
273
|
* (
|
|
276
|
-
utility[self.
|
|
274
|
+
utility[self.sequences.index((p, a))]
|
|
277
275
|
+ V[self.transitions[p, a]]
|
|
278
276
|
)
|
|
279
277
|
)
|
|
@@ -288,7 +286,7 @@ class TreeFormSequentialDecisionProcess(Serializable):
|
|
|
288
286
|
|
|
289
287
|
for i, a in enumerate(self.actions[j]):
|
|
290
288
|
utilities[j][i] = (
|
|
291
|
-
utility[self.
|
|
289
|
+
utility[self.sequences.index((j, a))]
|
|
292
290
|
+ V[self.transitions[j, a]]
|
|
293
291
|
)
|
|
294
292
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|