PyPI - noregret - Versions diffs - 0.0.0.dev8__tar.gz → 0.0.0.dev9__tar.gz - Mend

noregret 0.0.0.dev8tar.gz → 0.0.0.dev9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

{noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: noregret
-Version: 0.0.0.dev8
+Version: 0.0.0.dev9
 Summary: No-regret learning dynamics
 Home-page: https://github.com/uoftcprg/noregret
 Author: Universal, Open, Free, and Transparent Computer Poker Research Group

{noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/__init__.py RENAMED Viewed

@@ -18,6 +18,7 @@ from noregret.games import (
     RockPaperScissorsPlus,
     RockPaperSuperscissors,
     StagHunt,
+    StrategyProfile,
     to_extensive_form_game,
     TwoPlayerExtensiveFormGame,
     TwoPlayerGame,
@@ -27,6 +28,7 @@ from noregret.games import (
     TwoPlayerZeroSumGame,
     TwoPlayerZeroSumMultilinearGame,
     TwoPlayerZeroSumNormalFormGame,
+    UniformStrategyProfile,
 )
 from noregret.kernels import (
     CUDAKernel,
@@ -46,6 +48,7 @@ from noregret.regret_minimizers import (
     EuclideanRegularization,
     FollowTheRegularizedLeader,
     MirrorDescent,
+    MonteCarloCounterfactualRegretMinimization,
     MultiplicativeWeightsUpdate,
     OnlineGradientDescent,
     ProbabilitySimplexRegretMinimizer,
@@ -54,15 +57,17 @@ from noregret.regret_minimizers import (
     RegretMatchingPlus,
     RegretMinimizer,
     SequenceFormPolytopeRegretMinimizer,
+    StochasticRegretMinimizer,
     SwapRegretMinimizer,
 )
 from noregret.sequence_form_polytopes import SequenceFormPolytope
 from noregret.solvers import (
     linear_programming,
     regret_minimization,
+    stochastic_regret_minimization,
     symmetric_regret_minimization,
 )
-from noregret.utilities import import_object, tuple_or_none
+from noregret.utilities import import_object, sample, tuple_or_none
 BM = BlumMansour
 """Alias for :class:`noregret.BlumMansour`."""
@@ -94,6 +99,10 @@ FTRL = FollowTheRegularizedLeader
 """Alias for :class:`noregret.FollowTheRegularizedLeader`."""
 lp = linear_programming
 """Alias for :func:`noregret.linear_programming`."""
+MCCFR = MonteCarloCounterfactualRegretMinimization
+"""Alias for
+:class:`noregret.MonteCarloCounterfactualRegretMinimization`.
+"""
 MD = MirrorDescent
 """Alias for :class:`noregret.MirrorDescent`."""
 MWU = MultiplicativeWeightsUpdate
@@ -112,6 +121,8 @@ RM = RegretMatching
 """Alias for :class:`noregret.RegretMatching`."""
 rm = regret_minimization
 """Alias for :func:`noregret.regret_minimization`."""
+stochastic_rm = stochastic_regret_minimization
+"""Alias for :func:`noregret.stochastic_regret_minimization`."""
 symmetric_rm = symmetric_regret_minimization
 """Alias for :func:`noregret.symmetric_regret_minimization`."""
 to_efg = to_extensive_form_game
@@ -155,8 +166,10 @@ __all__ = (
     'lp',
     'MatchingPennies',
     'matrix_game',
+    'MCCFR',
     'MD',
     'MirrorDescent',
+    'MonteCarloCounterfactualRegretMinimization',
     'MultilinearGame',
     'MultiplicativeWeightsUpdate',
     'MWU',
@@ -181,10 +194,15 @@ __all__ = (
     'RockPaperScissors',
     'RockPaperScissorsPlus',
     'RockPaperSuperscissors',
+    'sample',
     'SequenceFormPolytope',
     'SequenceFormPolytopeRegretMinimizer',
     'Serializable',
     'StagHunt',
+    'stochastic_regret_minimization',
+    'StochasticRegretMinimizer',
+    'stochastic_rm',
+    'StrategyProfile',
     'SwapRegretMinimizer',
     'symmetric_regret_minimization',
     'symmetric_rm',
@@ -199,4 +217,5 @@ __all__ = (
     'TwoPlayerZeroSumGame',
     'TwoPlayerZeroSumMultilinearGame',
     'TwoPlayerZeroSumNormalFormGame',
+    'UniformStrategyProfile',
 )

{noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/__init__.py RENAMED Viewed

@@ -1,5 +1,10 @@
 """Module for games."""
-from noregret.games.black_box import BlackBoxGame, open_spiel_game
+from noregret.games.black_box import (
+    BlackBoxGame,
+    open_spiel_game,
+    StrategyProfile,
+    UniformStrategyProfile,
+)
 from noregret.games.extensive_form import (
     ExtensiveFormGame,
     to_extensive_form_game,
@@ -49,6 +54,7 @@ __all__ = (
     'RockPaperScissorsPlus',
     'RockPaperSuperscissors',
     'StagHunt',
+    'StrategyProfile',
     'to_extensive_form_game',
     'TwoPlayerExtensiveFormGame',
     'TwoPlayerGame',
@@ -58,4 +64,5 @@ __all__ = (
     'TwoPlayerZeroSumGame',
     'TwoPlayerZeroSumMultilinearGame',
     'TwoPlayerZeroSumNormalFormGame',
+    'UniformStrategyProfile',
 )

{noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/black_box.py RENAMED Viewed

@@ -4,7 +4,7 @@ from dataclasses import dataclass, field
 from functools import partial
 from ordered_set import OrderedSet
-from pyspiel import GameType, load_game
+from pyspiel import exploitability, GameType, load_game
 from noregret.kernels import Kernel
@@ -139,6 +139,12 @@ class BlackBoxGame(ABC):
         return np.array(ps, dtype)
+    def exploitability(self, strategy_profile):
+        if not self.is_two_player or not self.is_zero_sum:
+            raise ValueError('not 2p0s')
+        raise NotImplementedError
 @dataclass
 class _OpenSpielBlackBoxGame(BlackBoxGame):
@@ -167,17 +173,14 @@ class _OpenSpielBlackBoxGame(BlackBoxGame):
         return node.child(node.string_to_action(action))
     def children(self, node):
-        return list(node.child(a) for a in node.legal_actions())
+        return list(map(node.child, node.legal_actions()))
     def actions_and_children(self, node):
-        actions = []
-        children = []
-        for a in node.legal_actions():
-            actions.append(node.action_to_string(a))
-            children.append(node.child(a))
+        A = node.legal_actions()
+        actions = OrderedSet(map(node.action_to_string, A))
+        children = list(map(node.child, A))
-        return OrderedSet(actions), children
+        return actions, children
     def player(self, node):
         i = node.current_player()
@@ -212,6 +215,27 @@ class _OpenSpielBlackBoxGame(BlackBoxGame):
         return np.array([p for _, p in node.chance_outcomes()], dtype)
+    def _sigma(self, strategy_profile, h, sigma):
+        A = h.legal_actions()
+        h_primes = list(map(h.child, A))
+        i = self.player(h)
+        if A and i is not None and (j := self.information_set(h)) not in sigma:
+            sigma[j] = list(zip(A, strategy_profile(h).tolist()))
+        for h_prime in h_primes:
+            self._sigma(strategy_profile, h_prime, sigma)
+    def _sigma2(self, strategy_profile):
+        sigma = {}
+        self._sigma(strategy_profile, self.root_node, sigma)
+        return sigma
+    def exploitability(self, strategy_profile):
+        return exploitability(self._game, self._sigma2(strategy_profile))
 def open_spiel_game(kernel, game):
     """Load a game from OpenSpiel.
@@ -221,3 +245,28 @@ def open_spiel_game(kernel, game):
     :return: Game.
     """
     return _OpenSpielBlackBoxGame(kernel, game)
+@dataclass
+class StrategyProfile(ABC):
+    """Abstract base class for strategy profiles."""
+    kernel: Kernel
+    """Kernel."""
+    game: BlackBoxGame
+    """Game."""
+    @abstractmethod
+    def __call__(self, node):
+        pass
+@dataclass
+class UniformStrategyProfile(StrategyProfile):
+    """Class for uniform strategy profiles."""
+    def __call__(self, node):
+        np = self.kernel.numpy
+        dtype = self.kernel.data_type
+        n = len(self.game.actions(node))
+        return np.full(n, 1 / n, dtype)

{noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/games/extensive_form.py RENAMED Viewed

@@ -178,9 +178,9 @@ def _nfg2efg(ker, game, decision_points='p{}'.format):
     payoffs = scipy.sparse.csr_array(payoffs)
     sfps = []
-    for i, A_j in enumerate(game.actions):
+    for i, A in enumerate(game.actions):
         j = decision_points(i)
-        sfp = SequenceFormPolytope(ker, {j: A_j}, {j: None})
+        sfp = SequenceFormPolytope(ker, {j: A}, {j: None})
         sfps.append(sfp)
@@ -198,11 +198,11 @@ def _bbg2efg(ker, game):
     raw_payoffs = defaultdict(int)
     def dfs(h, p, seqs, us):
-        A_j, h_primes = game.actions_and_children(h)
+        A, h_primes = game.actions_and_children(h)
         i = game.player(h)
         us = us + game.utilities(h)
-        if not A_j:
+        if not A:
             raw_payoffs[tuple(seqs)] += p * us
         elif i is None:
             p_primes = game.chance_probabilities(h)
@@ -214,7 +214,7 @@ def _bbg2efg(ker, game):
             p_j = seqs[i]
             p_js[i][j] = p_j
-            for a, h_prime in zip(A_j, h_primes):
+            for a, h_prime in zip(A, h_primes):
                 next_seqs = seqs.copy()
                 next_seqs[i] = j, a

{noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/regret_minimizers/__init__.py RENAMED Viewed

@@ -24,6 +24,10 @@ from noregret.regret_minimizers.sequence_form_polytopes import (
     DiscountedCounterfactualRegretMinimization,
     SequenceFormPolytopeRegretMinimizer,
 )
+from noregret.regret_minimizers.stochastic import (
+    MonteCarloCounterfactualRegretMinimization,
+    StochasticRegretMinimizer,
+)
 __all__ = (
     'BlumMansour',
@@ -36,6 +40,7 @@ __all__ = (
     'EuclideanRegularization',
     'FollowTheRegularizedLeader',
     'MirrorDescent',
+    'MonteCarloCounterfactualRegretMinimization',
     'MultiplicativeWeightsUpdate',
     'OnlineGradientDescent',
     'ProbabilitySimplexRegretMinimizer',
@@ -44,5 +49,6 @@ __all__ = (
     'RegretMatchingPlus',
     'RegretMinimizer',
     'SequenceFormPolytopeRegretMinimizer',
+    'StochasticRegretMinimizer',
     'SwapRegretMinimizer',
 )

noregret-0.0.0.dev9/noregret/regret_minimizers/stochastic.py ADDED Viewed

@@ -0,0 +1,202 @@
+"""Module for regret minimizers operating over sequence-form polytopes."""
+from abc import ABC
+from collections.abc import Callable
+from dataclasses import dataclass, field
+from typing import Any
+from noregret.games.black_box import BlackBoxGame
+from noregret.kernels import Kernel
+from noregret.regret_minimizers.probability_simplices import (
+    ProbabilitySimplexRegretMinimizer,
+    RegretMatching,
+)
+from noregret.utilities import sample
+@dataclass
+class StochasticRegretMinimizer(ABC):
+    """Abstract base class for stochastic regret minimizers."""
+    kernel: Kernel
+    """Kernel."""
+    game: BlackBoxGame
+    """Game."""
+    regret_minimizer_type: type[ProbabilitySimplexRegretMinimizer]
+    """Regret minimizer type."""
+    reference_strategy_profile: Callable[[Any], Any] | None = None
+    """Reference strategy profile."""
+    sample_count: int = field(default=0, init=False)
+    """number of samples."""
+    next_sample_count: int = field(default=0, init=False)
+    """Next number of samples."""
+    node_visit_count: int = field(default=0, init=False)
+    """Number of node visits."""
+    next_node_visit_count: int = field(default=0, init=False)
+    """Next number of node visits."""
+    regret_minimizers: dict[str, ProbabilitySimplexRegretMinimizer] = field(
+        default_factory=dict,
+        init=False,
+    )
+    def regret_minimizer(self, node):
+        """Return the regret minimizer given an information set.
+        :param node: Node.
+        :return: Regret minimizer.
+        """
+        j = self.game.information_set(node)
+        if j not in self.regret_minimizers:
+            self.regret_minimizers[j] = self.regret_minimizer_type(
+                self.kernel,
+                len(self.game.actions(node)),
+            )
+        return self.regret_minimizers[j]
+    def average_action_probabilities(self, node):
+        """Return the average action probabilities given a node.
+        :param node: Node.
+        :return: Average action probabilities.
+        """
+        np = self.kernel.numpy
+        dtype = self.kernel.data_type
+        R = self.regret_minimizer(node)
+        ps = R.average_strategy
+        if np.isscalar(ps):
+            ps = np.full(R.dimension, 1 / R.dimension, dtype)
+        return ps
+    def _action_probabilities(self, h):
+        R = self.regret_minimizer(h)
+        ps = R.next_strategy
+        if ps is None:
+            ps = R.output()
+        return ps
+    def _external_sampling(self, i, us, h):
+        np = self.kernel.numpy
+        dtype = self.kernel.data_type
+        self.next_node_visit_count += 1
+        u = self.game.utility(h, i)
+        A = self.game.actions(h)
+        if A:
+            i_prime = self.game.player(h)
+            if i_prime is None:
+                ps = self.game.chance_probabilities(h)
+            else:
+                ps = self._action_probabilities(h)
+            if i_prime == i:
+                u_primes = []
+                for a in A:
+                    h_prime = self.game.apply(h, a)
+                    u_primes.append(self._external_sampling(i, us, h_prime))
+                j = self.game.information_set(h)
+                us[j] = np.array(u_primes, dtype)
+                u += us[j] @ ps
+            else:
+                a = sample(A, ps)
+                h_prime = self.game.apply(h, a)
+                u += self._external_sampling(i, us, h_prime)
+        return u
+    def _external_sampling2(self, player):
+        us = {}
+        self._external_sampling(player, us, self.game.root_node)
+        return us
+    def _outcome_sampling(self, i, us, h, p):
+        np = self.kernel.numpy
+        dtype = self.kernel.data_type
+        self.next_node_visit_count += 1
+        u = self.game.utility(h, i) / p
+        A = self.game.actions(h)
+        if A:
+            i_prime = self.game.player(h)
+            if i_prime is None:
+                ps = self.game.chance_probabilities(h)
+            elif i_prime == i:
+                ps = self.reference_strategy_profile(h)
+            else:
+                ps = self._action_probabilities(h)
+            k = sample(range(len(A)), ps)
+            a = A[k]
+            h_prime = self.game.apply(h, a)
+            p_prime = ps[k] * p
+            u_prime = ps[k] * self._outcome_sampling(i, us, h_prime, p_prime)
+            u += u_prime
+            if i_prime == i:
+                self.regret_minimizer(h)
+                j = self.game.information_set(h)
+                us[j] = np.zeros(len(A), dtype)
+                us[j][k] = u_prime
+        return u
+    def _outcome_sampling2(self, player):
+        us = {}
+        self._outcome_sampling(player, us, self.game.root_node, 1)
+        return us
+    def sample(self, player):
+        """Sample.
+        :param player: Player.
+        :return: Utilities.
+        """
+        self.next_sample_count += 1
+        if self.reference_strategy_profile is None:
+            us = self._external_sampling2(player)
+        else:
+            us = self._outcome_sampling2(player)
+        return us
+    def observe(self, utilities):
+        """Observe utilities.
+        :param utilities: Utilities.
+        :return: ``None``.
+        """
+        self.sample_count = self.next_sample_count
+        self.node_visit_count = self.next_node_visit_count
+        for j, u in utilities.items():
+            R = self.regret_minimizers[j]
+            if R.next_strategy is None:
+                R.output()
+            R.observe(u)
+@dataclass
+class MonteCarloCounterfactualRegretMinimization(
+        StochasticRegretMinimizer,
+        ABC,
+):
+    """Class for Monte Carlo counterfactual regret minimization (MCCFR)."""
+    regret_minimizer_type: type[ProbabilitySimplexRegretMinimizer] = (
+        RegretMatching
+    )

{noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/solvers/__init__.py RENAMED Viewed

@@ -2,11 +2,13 @@
 from noregret.solvers.linear_programming import linear_programming
 from noregret.solvers.regret_minimization import (
     regret_minimization,
+    stochastic_regret_minimization,
     symmetric_regret_minimization,
 )
 __all__ = (
     'linear_programming',
     'regret_minimization',
+    'stochastic_regret_minimization',
     'symmetric_regret_minimization',
 )

{noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/solvers/regret_minimization.py RENAMED Viewed

@@ -1,6 +1,6 @@
 """Module or regret minimization."""
 from collections.abc import Iterable, Mapping
-from itertools import count
+from itertools import count, repeat
 from tqdm import tqdm
@@ -29,10 +29,6 @@ def regret_minimization(
     :param progress_bar: Whether to show a progress bar.
     :return: Average strategy profile.
     """
-    np = game.kernel.numpy
-    if len(regret_minimizers) != game.player_count:
-        raise ValueError('inconsistent number of regret minimizers')
     def average_strategy_profile():
         average_strategy_profile = []
@@ -45,6 +41,11 @@ def regret_minimization(
     def exploitability():
         return game.exploitability(*average_strategy_profile())
+    np = game.kernel.numpy
+    if len(regret_minimizers) != game.player_count:
+        raise ValueError('inconsistent number of regret minimizers')
     if iteration_count is None or np.isposinf(iteration_count):
         iterations = count()
     else:
@@ -57,24 +58,24 @@ def regret_minimization(
     elif isinstance(progress_bar, Iterable):
         iterations = tqdm(iterations, *progress_bar)
-    s = []
+    sigma = []
     for R in regret_minimizers:
-        s.append(R.output(prediction))
+        sigma.append(R.output(prediction))
     for t in iterations:
         if alternation:
             for i, R in enumerate(regret_minimizers):
-                R.observe(game.utility(i, *s[:i], *s[i + 1:]))
+                R.observe(game.utility(i, *sigma[:i], *sigma[i + 1:]))
-                s[i] = R.output(prediction)
+                sigma[i] = R.output(prediction)
         else:
-            U = game.utilities(*s)
+            us = game.utilities(*sigma)
-            for i, (R, u) in enumerate(zip(regret_minimizers, U)):
+            for i, (R, u) in enumerate(zip(regret_minimizers, us)):
                 R.observe(u)
-                s[i] = R.output(prediction)
+                sigma[i] = R.output(prediction)
         if not checkpoints or t in checkpoints:
             if update is not None:
@@ -116,19 +117,18 @@ def symmetric_regret_minimization(
     :param progress_bar: Whether to show a progress bar.
     :return: Average strategy profile.
     """
-    np = game.kernel.numpy
-    if not game.is_symmetric:
-        raise ValueError('game is asymmetric')
-    R = regret_minimizer
     def average_strategy_profile():
-        return [R.average_strategy] * game.player_count
+        return [regret_minimizer.average_strategy] * game.player_count
     def exploitability():
         return game.exploitability(*average_strategy_profile())
+    np = game.kernel.numpy
+    if not game.is_symmetric:
+        raise ValueError('game is asymmetric')
     if iteration_count is None or np.isposinf(iteration_count):
         iterations = count()
     else:
@@ -141,12 +141,14 @@ def symmetric_regret_minimization(
     elif isinstance(progress_bar, Iterable):
         iterations = tqdm(iterations, *progress_bar)
-    s_neg_1 = [R.output(prediction)] * (game.player_count - 1)
+    sigma_1 = regret_minimizer.output(prediction)
     for t in iterations:
-        R.observe(game.utility(0, *s_neg_1))
+        u = game.utility(0, *repeat(sigma_1, game.player_count - 1))
-        s_neg_1 = [R.output(prediction)] * (game.player_count - 1)
+        regret_minimizer.observe(u)
+        sigma_1 = regret_minimizer.output(prediction)
         if not checkpoints or t in checkpoints:
             if update is not None:
@@ -164,3 +166,62 @@ def symmetric_regret_minimization(
                 break
     return average_strategy_profile()
+def stochastic_regret_minimization(
+        game,
+        regret_minimizer,
+        alternation=False,
+        sample_count=1000000,
+        checkpoints=(),
+        update=None,
+        progress_bar=True,
+):
+    """Solve a game using stochastic regret minimization.
+    :param game: Game.
+    :param regret_minimizer: Regret minimizer.
+    :param alternation: Whether to alternate, defaults to ``True''.
+    :param sample_count: Number of samples, defaults to ``1000000''.
+    :param checkpoints: Checkpoints.
+    :param update: Update.
+    :param progress_bar: Whether to show a progress bar.
+    :return: Average action probabilities.
+    """
+    np = game.kernel.numpy
+    if sample_count is None or np.isposinf(sample_count):
+        samples = count()
+    else:
+        samples = range(sample_count)
+    if progress_bar is True:
+        samples = tqdm(samples)
+    elif isinstance(progress_bar, Mapping):
+        samples = tqdm(samples, **progress_bar)
+    elif isinstance(progress_bar, Iterable):
+        samples = tqdm(samples, *progress_bar)
+    for s in samples:
+        if alternation:
+            for i in range(game.player_count):
+                regret_minimizer.observe(regret_minimizer.sample(i))
+        else:
+            uss = []
+            for i in range(game.player_count):
+                uss.append(regret_minimizer.sample(i))
+            for us in uss:
+                regret_minimizer.observe(us)
+        if not checkpoints or s in checkpoints:
+            if update is not None:
+                status = update()
+            else:
+                status = False
+            if status:
+                break
+    return regret_minimizer.average_action_probabilities

{noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/tests/test_games.py RENAMED Viewed

@@ -197,6 +197,19 @@ class BlackBoxGameTestCase(TestCase):
             np.testing.assert_equal(ps, ps2)
+    def test_exploitability(self):
+        for game in self.GAMES:
+            sigma = nr.UniformStrategyProfile(self.KER, game)
+            epsilon = game.exploitability(sigma)
+            game = nr.to_efg(self.KER, game)
+            sfps = game.sequence_form_polytopes
+            bs = [sfp.behavioral_form_uniform_strategy for sfp in sfps]
+            sigma = [sfp.to_sequence_form(b) for sfp, b in zip(sfps, bs)]
+            epsilon2 = game.exploitability(*sigma)
+            self.assertAlmostEqual(epsilon, epsilon2)
 if __name__ == '__main__':
     main()  # pragma: no cover

{noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/tests/test_linear_programming.py RENAMED Viewed

@@ -23,12 +23,12 @@ class LinearProgrammingTestCase(TestCase):
         for game, value in self.GAME_VALUES:
             x, y = nr.lp(game)
-            e = game.exploitability(x, y)
+            epsilon = game.exploitability(x, y)
             v = game.expected_row_utility(x, y)
-            self.assertAlmostEqual(e, 0)
+            self.assertAlmostEqual(epsilon, 0)
             self.assertAlmostEqual(v, value)
-            self.assertEqual(e.dtype, dtype)
+            self.assertEqual(epsilon.dtype, dtype)
             self.assertEqual(v.dtype, dtype)

{noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/tests/test_regret_minimization.py RENAMED Viewed

@@ -1,5 +1,6 @@
 from functools import partial
 from math import inf
+from random import seed
 from unittest import main, TestCase
 import noregret as nr
@@ -44,12 +45,12 @@ class ProbabilitySimplexRegretMinimizationTestCase(TestCase):
                             target_exploitability=self.TARGET_EXPLOITABILITY,
                             progress_bar=False,
                         )
-                        e = game.exploitability(x_bar, y_bar)
+                        epsilon = game.exploitability(x_bar, y_bar)
                         v = game.expected_row_utility(x_bar, y_bar)
-                        self.assertLess(e, self.TARGET_EXPLOITABILITY)
+                        self.assertLess(epsilon, self.TARGET_EXPLOITABILITY)
                         self.assertAlmostEqual(v, value, delta=self.DELTA)
-                        self.assertEqual(e.dtype, dtype)
+                        self.assertEqual(epsilon.dtype, dtype)
                         self.assertEqual(v.dtype, dtype)
     def test_last_iterate_convergence(self):
@@ -70,12 +71,12 @@ class ProbabilitySimplexRegretMinimizationTestCase(TestCase):
                         target_exploitability=self.TARGET_EXPLOITABILITY,
                         progress_bar=False,
                     )
-                    e = game.exploitability(x_bar, y_bar)
+                    epsilon = game.exploitability(x_bar, y_bar)
                     v = game.expected_row_utility(x_bar, y_bar)
-                    self.assertLess(e, self.TARGET_EXPLOITABILITY)
+                    self.assertLess(epsilon, self.TARGET_EXPLOITABILITY)
                     self.assertAlmostEqual(v, value, delta=self.DELTA)
-                    self.assertEqual(e.dtype, dtype)
+                    self.assertEqual(epsilon.dtype, dtype)
                     self.assertEqual(v.dtype, dtype)
     def test_frequent_iterate_convergence(self):
@@ -94,12 +95,12 @@ class ProbabilitySimplexRegretMinimizationTestCase(TestCase):
                     target_exploitability=self.TARGET_EXPLOITABILITY,
                     progress_bar=False,
                 )
-                e = game.exploitability(x_bar, y_bar)
+                epsilon = game.exploitability(x_bar, y_bar)
                 v = game.expected_row_utility(x_bar, y_bar)
-                self.assertLess(e, self.TARGET_EXPLOITABILITY)
+                self.assertLess(epsilon, self.TARGET_EXPLOITABILITY)
                 self.assertAlmostEqual(v, value, delta=self.DELTA)
-                self.assertEqual(e.dtype, dtype)
+                self.assertEqual(epsilon.dtype, dtype)
                 self.assertEqual(v.dtype, dtype)
@@ -141,12 +142,12 @@ class SequenceFormPolytopeRegretMinimizationTestCase(TestCase):
                     target_exploitability=self.TARGET_EXPLOITABILITY,
                     progress_bar=False,
                 )
-                e = game.exploitability(x_bar, y_bar)
+                epsilon = game.exploitability(x_bar, y_bar)
                 v = game.expected_row_utility(x_bar, y_bar)
-                self.assertLess(e, self.TARGET_EXPLOITABILITY)
+                self.assertLess(epsilon, self.TARGET_EXPLOITABILITY)
                 self.assertAlmostEqual(v, value, delta=self.DELTA)
-                self.assertEqual(e.dtype, dtype)
+                self.assertEqual(epsilon.dtype, dtype)
                 self.assertEqual(v.dtype, dtype)
@@ -172,7 +173,7 @@ class SequenceFormPolytopeRegretMinimization2TestCase(TestCase):
                 nr.CFR(self.KER, game.column_sequence_form_polytope),
                 progress_bar=False,
             )
-            e = game.exploitability(x_bar, y_bar)
+            epsilon = game.exploitability(x_bar, y_bar)
             v = game.expected_row_utility(x_bar, y_bar)
             x_bar2, y_bar2 = nr.rm(
                 game,
@@ -180,10 +181,10 @@ class SequenceFormPolytopeRegretMinimization2TestCase(TestCase):
                 nr.CFR2(self.KER, game.column_sequence_form_polytope),
                 progress_bar=False,
             )
-            e2 = game.exploitability(x_bar2, y_bar2)
+            epsilon2 = game.exploitability(x_bar2, y_bar2)
             v2 = game.expected_row_utility(x_bar2, y_bar2)
-            self.assertAlmostEqual(e, e2, self.PLACES)
+            self.assertAlmostEqual(epsilon, epsilon2, self.PLACES)
             self.assertAlmostEqual(v, v2, self.PLACES)
             x_bar, y_bar = nr.rm(
@@ -193,7 +194,7 @@ class SequenceFormPolytopeRegretMinimization2TestCase(TestCase):
                 prediction=True,
                 progress_bar=False,
             )
-            e = game.exploitability(x_bar, y_bar)
+            epsilon = game.exploitability(x_bar, y_bar)
             v = game.expected_row_utility(x_bar, y_bar)
             x_bar2, y_bar2 = nr.rm(
                 game,
@@ -202,12 +203,61 @@ class SequenceFormPolytopeRegretMinimization2TestCase(TestCase):
                 prediction=True,
                 progress_bar=False,
             )
-            e2 = game.exploitability(x_bar2, y_bar2)
+            epsilon2 = game.exploitability(x_bar2, y_bar2)
             v2 = game.expected_row_utility(x_bar2, y_bar2)
-            self.assertAlmostEqual(e, e2, self.PLACES)
+            self.assertAlmostEqual(epsilon, epsilon2, self.PLACES)
             self.assertAlmostEqual(v, v2, self.PLACES)
+class StochasticRegretMinimizationTestCase(TestCase):
+    KER = nr.FPKer()
+    GAME = nr.open_spiel_game(KER, 'kuhn_poker')
+    SAMPLE_COUNT = 100000
+    TARGET_EXPLOITABILITY = 1e-1
+    SEED = 42
+    def test_external_sampling(self):
+        assert self.GAME.is_two_player and self.GAME.is_zero_sum
+        seed(self.SEED)
+        R = nr.MCCFR(self.KER, self.GAME)
+        sigma = nr.stochastic_rm(
+            self.GAME,
+            R,
+            alternation=True,
+            sample_count=self.SAMPLE_COUNT,
+            progress_bar=False,
+        )
+        epsilon = self.GAME.exploitability(sigma)
+        self.assertLess(epsilon, self.TARGET_EXPLOITABILITY)
+    def test_outcome_sampling(self):
+        assert self.GAME.is_two_player and self.GAME.is_zero_sum
+        seed(self.SEED)
+        R = nr.MCCFR(
+            self.KER,
+            self.GAME,
+            reference_strategy_profile=nr.UniformStrategyProfile(
+                self.KER,
+                self.GAME,
+            ),
+        )
+        sigma = nr.stochastic_rm(
+            self.GAME,
+            R,
+            alternation=True,
+            sample_count=self.SAMPLE_COUNT,
+            progress_bar=False,
+        )
+        epsilon = self.GAME.exploitability(sigma)
+        self.assertLess(epsilon, self.TARGET_EXPLOITABILITY)
 if __name__ == '__main__':
     main()  # pragma: no cover

{noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret/utilities.py RENAMED Viewed

@@ -1,5 +1,6 @@
 """Module for utilities."""
 from importlib import import_module
+from random import choices
 def import_object(object_path):
@@ -34,3 +35,16 @@ def tuple_or_none(values):
     :return: Tuple or ``None``.
     """
     return None if values is None else tuple(values)
+def sample(values, probabilities):
+    """Sample a random value as per the probabilities.
+    >>> sample(range(5), [0, 0, 1, 0, 0])
+    2
+    :param values: Values to be sampled from.
+    :param probabilities: The probabilities of sampling each value.
+    :return: The sampled value.
+    """
+    return choices(values, probabilities)[0]

{noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: noregret
-Version: 0.0.0.dev8
+Version: 0.0.0.dev9
 Summary: No-regret learning dynamics
 Home-page: https://github.com/uoftcprg/noregret
 Author: Universal, Open, Free, and Transparent Computer Poker Research Group

{noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/noregret.egg-info/SOURCES.txt RENAMED Viewed

@@ -31,6 +31,7 @@ noregret/regret_minimizers/__init__.py
 noregret/regret_minimizers/probability_simplices.py
 noregret/regret_minimizers/regret_minimizers.py
 noregret/regret_minimizers/sequence_form_polytopes.py
+noregret/regret_minimizers/stochastic.py
 noregret/solvers/__init__.py
 noregret/solvers/linear_programming.py
 noregret/solvers/regret_minimization.py

{noregret-0.0.0.dev8 → noregret-0.0.0.dev9}/setup.py RENAMED Viewed

@@ -4,7 +4,7 @@ from setuptools import find_packages, setup
 setup(
     name='noregret',
-    version='0.0.0.dev8',
+    version='0.0.0.dev9',
     description='No-regret learning dynamics',
     long_description=open('README.rst').read(),
     long_description_content_type='text/x-rst',