PyPI - gym-csle-stopping-game - Versions diffs - 0.6.3__tar.gz → 0.6.5__tar.gz - Mend

gym-csle-stopping-game 0.6.3tar.gz → 0.6.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of gym-csle-stopping-game might be problematic. Click here for more details.

Files changed (31) hide show

{gym_csle_stopping_game-0.6.3 → gym_csle_stopping_game-0.6.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: gym_csle_stopping_game
-Version: 0.6.3
+Version: 0.6.5
 Summary: OpenAI gym reinforcement learning environment of a Dynkin (Optimal stopping) game in CSLE
 Author: Kim Hammar
 Author-email: hammar.kim@gmail.com

{gym_csle_stopping_game-0.6.3 → gym_csle_stopping_game-0.6.5}/setup.cfg RENAMED Viewed

@@ -20,11 +20,11 @@ classifiers =
 [options]
 install_requires =
 	gymnasium>=0.27.1
-	csle-base>=0.6.3
-	csle-common>=0.6.3
-	csle-attacker>=0.6.3
-	csle-defender>=0.6.3
-	csle-collector>=0.6.3
+	csle-base>=0.6.5
+	csle-common>=0.6.5
+	csle-attacker>=0.6.5
+	csle-defender>=0.6.5
+	csle-collector>=0.6.5
 python_requires = >=3.8
 package_dir =
 	=src

gym_csle_stopping_game-0.6.5/src/gym_csle_stopping_game/__version__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = '0.6.5'

{gym_csle_stopping_game-0.6.3 → gym_csle_stopping_game-0.6.5}/src/gym_csle_stopping_game/dao/stopping_game_config.py RENAMED Viewed

@@ -14,7 +14,7 @@ class StoppingGameConfig(SimulationEnvInputConfig):
                  T: npt.NDArray[Any], O: npt.NDArray[np.int_], Z: npt.NDArray[Any],
                  R: npt.NDArray[Any], S: npt.NDArray[np.int_], A1: npt.NDArray[np.int_],
                  A2: npt.NDArray[np.int_], L: int, R_INT: int, R_COST: int, R_SLA: int, R_ST: int,
-                 b1: npt.NDArray[np.float_],
+                 b1: npt.NDArray[np.float64],
                  save_dir: str, checkpoint_traces_freq: int, gamma: float = 1, compute_beliefs: bool = True,
                  save_trace: bool = True) -> None:
         """

{gym_csle_stopping_game-0.6.3 → gym_csle_stopping_game-0.6.5}/src/gym_csle_stopping_game/dao/stopping_game_state.py RENAMED Viewed

@@ -10,7 +10,7 @@ class StoppingGameState(JSONSerializable):
     Represents the state of the optimal stopping game
     """
-    def __init__(self, b1: npt.NDArray[np.float_], L: int) -> None:
+    def __init__(self, b1: npt.NDArray[np.float64], L: int) -> None:
         """
         Intializes the state

{gym_csle_stopping_game-0.6.3 → gym_csle_stopping_game-0.6.5}/src/gym_csle_stopping_game/envs/stopping_game_mdp_attacker_env.py RENAMED Viewed

@@ -48,7 +48,7 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
         self.reset()
         super().__init__()
-    def step(self, pi2: Union[npt.NDArray[Any], int, float, np.int_, np.float_]) \
+    def step(self, pi2: Union[npt.NDArray[Any], int, float, np.int_, np.float64]) \
             -> Tuple[npt.NDArray[Any], int, bool, bool, Dict[str, Any]]:
         """
         Takes a step in the environment by executing the given action

{gym_csle_stopping_game-0.6.3 → gym_csle_stopping_game-0.6.5}/src/gym_csle_stopping_game/util/stopping_game_util.py RENAMED Viewed

@@ -11,7 +11,7 @@ class StoppingGameUtil:
     """
     @staticmethod
-    def b1() -> npt.NDArray[np.float_]:
+    def b1() -> npt.NDArray[np.float64]:
         """
         Gets the initial belief
@@ -233,7 +233,7 @@ class StoppingGameUtil:
         return int(np.random.choice(np.arange(0, len(S)), p=state_probs))
     @staticmethod
-    def sample_initial_state(b1: npt.NDArray[np.float_]) -> int:
+    def sample_initial_state(b1: npt.NDArray[np.float64]) -> int:
         """
         Samples the initial state
@@ -264,7 +264,7 @@ class StoppingGameUtil:
         return int(o)
     @staticmethod
-    def bayes_filter(s_prime: int, o: int, a1: int, b: npt.NDArray[np.float_], pi2: npt.NDArray[Any], l: int,
+    def bayes_filter(s_prime: int, o: int, a1: int, b: npt.NDArray[np.float64], pi2: npt.NDArray[Any], l: int,
                      config: StoppingGameConfig) -> float:
         """
         A Bayesian filter to compute the belief of player 1
@@ -302,8 +302,8 @@ class StoppingGameUtil:
         return float(b_prime_s_prime)
     @staticmethod
-    def next_belief(o: int, a1: int, b: npt.NDArray[np.float_], pi2: npt.NDArray[Any],
-                    config: StoppingGameConfig, l: int, a2: int = 0, s: int = 0) -> npt.NDArray[np.float_]:
+    def next_belief(o: int, a1: int, b: npt.NDArray[np.float64], pi2: npt.NDArray[Any],
+                    config: StoppingGameConfig, l: int, a2: int = 0, s: int = 0) -> npt.NDArray[np.float64]:
         """
         Computes the next belief using a Bayesian filter
@@ -337,3 +337,52 @@ class StoppingGameUtil:
         :return: a2 is the attacker action
         """
         return int(np.random.choice(np.arange(0, len(pi2[s])), p=pi2[s]))
+    @staticmethod
+    def pomdp_solver_file(config: StoppingGameConfig, discount_factor: float, pi2: npt.NDArray[Any]) -> str:
+        """
+        Gets the POMDP environment specification based on the format at http://www.pomdp.org/code/index.html,
+        for the defender's local problem against a static attacker
+        :param config: the POMDP config
+        :param discount_factor: the discount factor
+        :param pi2: the attacker strategy
+        :return: the file content as a string
+        """
+        file_str = ""
+        file_str = file_str + f"discount: {discount_factor}\n\n"
+        file_str = file_str + "values: reward\n\n"
+        file_str = file_str + f"states: {len(config.S)}\n\n"
+        file_str = file_str + f"actions: {len(config.A1)}\n\n"
+        file_str = file_str + f"observations: {len(config.O)}\n\n"
+        initial_belief_str = " ".join(list(map(lambda x: str(x), config.b1)))
+        file_str = file_str + f"start: {initial_belief_str}\n\n\n"
+        num_transitions = 0
+        for s in config.S:
+            for a1 in config.A1:
+                probs = []
+                for s_prime in range(len(config.S)):
+                    num_transitions += 1
+                    prob = 0
+                    for a2 in config.A2:
+                        prob += config.T[0][a1][a2][s][s_prime] * pi2[s][a2]
+                    file_str = file_str + f"T: {a1} : {s} : {s_prime} {prob:.80f}\n"
+                    probs.append(prob)
+                assert round(sum(probs), 3) == 1
+        file_str = file_str + "\n\n"
+        for a1 in config.A1:
+            for s_prime in config.S:
+                probs = []
+                for o in range(len(config.O)):
+                    prob = config.Z[0][0][s_prime][o]
+                    file_str = file_str + f"O : {a1} : {s_prime} : {o} {prob:.80f}\n"
+                    probs.append(prob)
+                assert round(sum(probs), 3) == 1
+        file_str = file_str + "\n\n"
+        for s in config.S:
+            for a1 in config.A1:
+                for s_prime in config.S:
+                    for o in config.O:
+                        r = config.R[0][a1][0][s]
+                        file_str = file_str + f"R: {a1} : {s} : {s_prime} : {o} {r:.80f}\n"
+        return file_str

{gym_csle_stopping_game-0.6.3 → gym_csle_stopping_game-0.6.5}/src/gym_csle_stopping_game.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: gym-csle-stopping-game
-Version: 0.6.3
+Version: 0.6.5
 Summary: OpenAI gym reinforcement learning environment of a Dynkin (Optimal stopping) game in CSLE
 Author: Kim Hammar
 Author-email: hammar.kim@gmail.com

{gym_csle_stopping_game-0.6.3 → gym_csle_stopping_game-0.6.5}/src/gym_csle_stopping_game.egg-info/requires.txt RENAMED Viewed

@@ -1,9 +1,9 @@
 gymnasium>=0.27.1
-csle-base>=0.6.3
-csle-common>=0.6.3
-csle-attacker>=0.6.3
-csle-defender>=0.6.3
-csle-collector>=0.6.3
+csle-base>=0.6.5
+csle-common>=0.6.5
+csle-attacker>=0.6.5
+csle-defender>=0.6.5
+csle-collector>=0.6.5
 [testing]
 pytest>=6.0

{gym_csle_stopping_game-0.6.3 → gym_csle_stopping_game-0.6.5}/tests/test_stopping_game_env.py RENAMED Viewed

@@ -3,6 +3,7 @@ import pytest
 from unittest.mock import patch, MagicMock
 from gymnasium.spaces import Box, Discrete
 import numpy as np
+from gym_csle_stopping_game.util.stopping_game_util import StoppingGameUtil
 from gym_csle_stopping_game.envs.stopping_game_env import StoppingGameEnv
 from gym_csle_stopping_game.dao.stopping_game_config import StoppingGameConfig
 from gym_csle_stopping_game.dao.stopping_game_state import StoppingGameState
@@ -23,19 +24,19 @@ class TestStoppingGameEnvSuite:
         :return: None
         """
         env_name = "test_env"
-        T = np.array([[[0.1, 0.9], [0.4, 0.6]], [[0.7, 0.3], [0.2, 0.8]]])
-        O = np.array([0, 1])
-        Z = np.array([[[0.8, 0.2], [0.5, 0.5]], [[0.4, 0.6], [0.9, 0.1]]])
+        T = StoppingGameUtil.transition_tensor(L=3, p=0)
+        O = StoppingGameUtil.observation_space(n=100)
+        Z = StoppingGameUtil.observation_tensor(n=100)
         R = np.zeros((2, 3, 3, 3))
-        S = np.array([0, 1, 2])
-        A1 = np.array([0, 1, 2])
-        A2 = np.array([0, 1, 2])
+        S = StoppingGameUtil.state_space()
+        A1 = StoppingGameUtil.defender_actions()
+        A2 = StoppingGameUtil.attacker_actions()
         L = 2
         R_INT = 1
         R_COST = 2
         R_SLA = 3
         R_ST = 4
-        b1 = np.array([0.6, 0.4])
+        b1 = StoppingGameUtil.b1()
         save_dir = "save_directory"
         checkpoint_traces_freq = 100
         gamma = 0.9
@@ -69,12 +70,12 @@ class TestStoppingGameEnvSuite:
         :return: None
         """
-        T = np.array([[[0.1, 0.9], [0.4, 0.6]], [[0.7, 0.3], [0.2, 0.8]]])
-        O = np.array([0, 1])
-        A1 = np.array([0, 1, 2])
-        A2 = np.array([0, 1, 2])
+        T = StoppingGameUtil.transition_tensor(L=3, p=0)
+        O = StoppingGameUtil.observation_space(n=100)
+        A1 = StoppingGameUtil.defender_actions()
+        A2 = StoppingGameUtil.attacker_actions()
         L = 2
-        b1 = np.array([0.6, 0.4])
+        b1 = StoppingGameUtil.b1()
         attacker_observation_space = Box(
             low=np.array([0.0, 0.0, 0.0]),
             high=np.array([float(L), 1.0, 2.0]),
@@ -304,7 +305,7 @@ class TestStoppingGameEnvSuite:
         assert not env.is_state_terminal(state_tuple)
         with pytest.raises(ValueError):
-            env.is_state_terminal([1, 2, 3]) # type: ignore
+            env.is_state_terminal([1, 2, 3])  # type: ignore
     def test_get_observation_from_history(self) -> None:
         """
@@ -346,26 +347,6 @@ class TestStoppingGameEnvSuite:
         :return: None
         """
         env = StoppingGameEnv(self.config)
-        env.state = MagicMock()
-        env.state.s = 1
-        env.state.l = 2
-        env.state.t = 0
-        env.state.attacker_observation.return_value = np.array([1, 2, 3])
-        env.state.defender_observation.return_value = np.array([4, 5, 6])
-        env.state.b = np.array([0.5, 0.5, 0.0])
-        env.trace = MagicMock()
-        env.trace.defender_rewards = []
-        env.trace.attacker_rewards = []
-        env.trace.attacker_actions = []
-        env.trace.defender_actions = []
-        env.trace.infos = []
-        env.trace.states = []
-        env.trace.beliefs = []
-        env.trace.infrastructure_metrics = []
-        env.trace.attacker_observations = []
-        env.trace.defender_observations = []
         with patch("gym_csle_stopping_game.util.stopping_game_util.StoppingGameUtil.sample_next_state",
                    return_value=2):
             with patch("gym_csle_stopping_game.util.stopping_game_util.StoppingGameUtil.sample_next_observation",
@@ -376,7 +357,7 @@ class TestStoppingGameEnvSuite:
                         1,
                         (
                             np.array(
-                                [[0.2, 0.8, 0.0], [0.6, 0.4, 0.0], [0.5, 0.5, 0.0]]
+                                [[0.2, 0.8], [0.6, 0.4], [0.5, 0.5]]
                             ),
                             2,
                         ),
@@ -384,24 +365,12 @@ class TestStoppingGameEnvSuite:
                     observations, rewards, terminated, truncated, info = env.step(
                         action_profile
                     )
-                    assert (observations[0] == np.array([4, 5, 6])).all(), "Incorrect defender observations"
-                    assert (observations[1] == np.array([1, 2, 3])).all(), "Incorrect attacker observations"
+                    assert observations[0].all() == np.array([1, 0.7]).all(), "Incorrect defender observations"
+                    assert observations[1].all() == np.array([1, 2, 3]).all(), "Incorrect attacker observations"
                     assert rewards == (0, 0)
                     assert not terminated
                     assert not truncated
-                    assert env.trace.defender_rewards[-1] == 0
-                    assert env.trace.attacker_rewards[-1] == 0
-                    assert env.trace.attacker_actions[-1] == 2
-                    assert env.trace.defender_actions[-1] == 1
-                    assert env.trace.infos[-1] == info
-                    assert env.trace.states[-1] == 2
-                    print(env.trace.beliefs)
-                    assert env.trace.beliefs[-1] == 0.7
-                    assert env.trace.infrastructure_metrics[-1] == 1
-                    assert (env.trace.attacker_observations[-1] == np.array([1, 2, 3])).all()
-                    assert (env.trace.defender_observations[-1] == np.array([4, 5, 6])).all()
     def test_info(self) -> None:
         """
         Tests the function of adding the cumulative reward and episode length to the info dict

{gym_csle_stopping_game-0.6.3 → gym_csle_stopping_game-0.6.5}/tests/test_stopping_game_mdp_attacker_env.py RENAMED Viewed

@@ -5,8 +5,12 @@ from gym_csle_stopping_game.dao.stopping_game_config import StoppingGameConfig
 from gym_csle_stopping_game.dao.stopping_game_attacker_mdp_config import (
     StoppingGameAttackerMdpConfig,
 )
+from gym_csle_stopping_game.util.stopping_game_util import StoppingGameUtil
 from gym_csle_stopping_game.envs.stopping_game_env import StoppingGameEnv
 from csle_common.dao.training.policy import Policy
+from csle_common.dao.training.random_policy import RandomPolicy
+from csle_common.dao.training.player_type import PlayerType
+from csle_common.dao.simulation_config.action import Action
 import pytest
 from unittest.mock import MagicMock
 import numpy as np
@@ -25,19 +29,19 @@ class TestStoppingGameMdpAttackerEnvSuite:
         :return: None
         """
         env_name = "test_env"
-        T = np.array([[[0.1, 0.9], [0.4, 0.6]], [[0.7, 0.3], [0.2, 0.8]]])
-        O = np.array([0, 1])
-        Z = np.array([[[0.8, 0.2], [0.5, 0.5]], [[0.4, 0.6], [0.9, 0.1]]])
+        T = StoppingGameUtil.transition_tensor(L=3, p=0)
+        O = StoppingGameUtil.observation_space(n=100)
+        Z = StoppingGameUtil.observation_tensor(n=100)
         R = np.zeros((2, 3, 3, 3))
-        S = np.array([0, 1, 2])
-        A1 = np.array([0, 1, 2])
-        A2 = np.array([0, 1, 2])
+        S = StoppingGameUtil.state_space()
+        A1 = StoppingGameUtil.defender_actions()
+        A2 = StoppingGameUtil.attacker_actions()
         L = 2
         R_INT = 1
         R_COST = 2
         R_SLA = 3
         R_ST = 4
-        b1 = np.array([0.6, 0.4])
+        b1 = StoppingGameUtil.b1()
         save_dir = "save_directory"
         checkpoint_traces_freq = 100
         gamma = 0.9
@@ -107,9 +111,8 @@ class TestStoppingGameMdpAttackerEnvSuite:
         )
         env = StoppingGameMdpAttackerEnv(config=attacker_mdp_config)
-        attacker_obs, info = env.reset()
-        assert env.latest_defender_obs.all() == np.array([2, 0.4]).all() # type: ignore
-        assert info == {}
+        info = env.reset()
+        assert info[-1] == {}
     def test_set_model(self) -> None:
         """
@@ -144,7 +147,7 @@ class TestStoppingGameMdpAttackerEnvSuite:
         )
         env = StoppingGameMdpAttackerEnv(config=attacker_mdp_config)
-        assert not env.set_state(1) # type: ignore
+        assert not env.set_state(1)  # type: ignore
     def test_calculate_stage_policy(self) -> None:
         """
@@ -190,7 +193,7 @@ class TestStoppingGameMdpAttackerEnvSuite:
     def test_render(self) -> None:
         """
         Tests the function for rendering the environment
         :return: None
         """
         defender_strategy = MagicMock(spec=Policy)
@@ -317,7 +320,7 @@ class TestStoppingGameMdpAttackerEnvSuite:
         particles = [1, 2, 3]
         t = 0
         observation = 0
-        expected_actions = [0, 1, 2]
+        expected_actions = [0, 1]
         assert (
             env.get_actions_from_particles(particles, t, observation)
             == expected_actions
@@ -326,18 +329,32 @@ class TestStoppingGameMdpAttackerEnvSuite:
     def test_step(self) -> None:
         """
         Tests the function for taking a step in the environment by executing the given action
         :return: None
         """
-        defender_strategy = MagicMock(spec=Policy)
+        defender_stage_strategy = np.zeros((3, 2))
+        defender_stage_strategy[0][0] = 0.9
+        defender_stage_strategy[0][1] = 0.1
+        defender_stage_strategy[1][0] = 0.9
+        defender_stage_strategy[1][1] = 0.1
+        defender_actions = list(map(lambda x: Action(id=x, descr=""), self.config.A1))
+        defender_strategy = RandomPolicy(
+            actions=defender_actions,
+            player_type=PlayerType.DEFENDER,
+            stage_policy_tensor=list(defender_stage_strategy),
+        )
         attacker_mdp_config = StoppingGameAttackerMdpConfig(
             env_name="test_env",
             stopping_game_config=self.config,
             defender_strategy=defender_strategy,
             stopping_game_name="csle-stopping-game-v1",
         )
         env = StoppingGameMdpAttackerEnv(config=attacker_mdp_config)
-        pi2 = np.array([[0.5, 0.5]])
-        with pytest.raises(AssertionError):
-            env.step(pi2)
+        env.reset()
+        pi2 = env.calculate_stage_policy(o=list(env.latest_attacker_obs), a2=0)  # type: ignore
+        attacker_obs, reward, terminated, truncated, info = env.step(pi2)
+        assert isinstance(attacker_obs[0], float)  # type: ignore
+        assert isinstance(terminated, bool)  # type: ignore
+        assert isinstance(truncated, bool)  # type: ignore
+        assert isinstance(reward, float)  # type: ignore
+        assert isinstance(info, dict)  # type: ignore

{gym_csle_stopping_game-0.6.3 → gym_csle_stopping_game-0.6.5}/tests/test_stopping_game_pomdp_defender_env.py RENAMED Viewed

@@ -1,9 +1,14 @@
-from gym_csle_stopping_game.envs.stopping_game_pomdp_defender_env import StoppingGamePomdpDefenderEnv
+from gym_csle_stopping_game.envs.stopping_game_pomdp_defender_env import (
+    StoppingGamePomdpDefenderEnv,
+)
 from gym_csle_stopping_game.dao.stopping_game_config import StoppingGameConfig
-from gym_csle_stopping_game.dao.stopping_game_defender_pomdp_config import StoppingGameDefenderPomdpConfig
+from gym_csle_stopping_game.dao.stopping_game_defender_pomdp_config import (
+    StoppingGameDefenderPomdpConfig,
+)
 from gym_csle_stopping_game.envs.stopping_game_env import StoppingGameEnv
 from gym_csle_stopping_game.util.stopping_game_util import StoppingGameUtil
 from csle_common.dao.training.policy import Policy
+from csle_common.dao.simulation_config.action import Action
 from csle_common.dao.training.random_policy import RandomPolicy
 from csle_common.dao.training.player_type import PlayerType
 import pytest
@@ -219,7 +224,7 @@ class TestStoppingGamePomdpDefenderEnvSuite:
             stopping_game_name="csle-stopping-game-v1",
         )
         env = StoppingGamePomdpDefenderEnv(config=defender_pomdp_config)
-        assert env.set_state(1) is None # type: ignore
+        assert env.set_state(1) is None  # type: ignore
     def test_get_observation_from_history(self) -> None:
         """
@@ -301,7 +306,10 @@ class TestStoppingGamePomdpDefenderEnvSuite:
         t = 0
         observation = 0
         expected_actions = [0, 1]
-        assert env.get_actions_from_particles(particles, t, observation) == expected_actions
+        assert (
+            env.get_actions_from_particles(particles, t, observation)
+            == expected_actions
+        )
     def test_step(self) -> None:
         """
@@ -315,8 +323,12 @@ class TestStoppingGamePomdpDefenderEnvSuite:
         attacker_stage_strategy[1][0] = 0.9
         attacker_stage_strategy[1][1] = 0.1
         attacker_stage_strategy[2] = attacker_stage_strategy[1]
-        attacker_strategy = RandomPolicy(actions=list(self.config.A2), player_type=PlayerType.ATTACKER,
-                                         stage_policy_tensor=list(attacker_stage_strategy))
+        attacker_actions = list(map(lambda x: Action(id=x, descr=""), self.config.A2))
+        attacker_strategy = RandomPolicy(
+            actions=attacker_actions,
+            player_type=PlayerType.ATTACKER,
+            stage_policy_tensor=list(attacker_stage_strategy),
+        )
         defender_pomdp_config = StoppingGameDefenderPomdpConfig(
             env_name="test_env",
             stopping_game_config=self.config,
@@ -328,9 +340,9 @@ class TestStoppingGamePomdpDefenderEnvSuite:
         env.reset()
         defender_obs, reward, terminated, truncated, info = env.step(a1)
         assert len(defender_obs) == 2
-        assert isinstance(defender_obs[0], float) # type: ignore
-        assert isinstance(defender_obs[1], float) # type: ignore
-        assert isinstance(reward, float) # type: ignore
-        assert isinstance(terminated, bool) # type: ignore
-        assert isinstance(truncated, bool) # type: ignore
-        assert isinstance(info, dict) # type: ignore
+        assert isinstance(defender_obs[0], float)  # type: ignore
+        assert isinstance(defender_obs[1], float)  # type: ignore
+        assert isinstance(reward, float)  # type: ignore
+        assert isinstance(terminated, bool)  # type: ignore
+        assert isinstance(truncated, bool)  # type: ignore
+        assert isinstance(info, dict)  # type: ignore