PyPI - gym-csle-stopping-game - Versions diffs - 0.6.2__tar.gz → 0.6.4__tar.gz - Mend

gym-csle-stopping-game 0.6.2tar.gz → 0.6.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of gym-csle-stopping-game might be problematic. Click here for more details.

Files changed (31) hide show

{gym_csle_stopping_game-0.6.2 → gym_csle_stopping_game-0.6.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: gym_csle_stopping_game
-Version: 0.6.2
+Version: 0.6.4
 Summary: OpenAI gym reinforcement learning environment of a Dynkin (Optimal stopping) game in CSLE
 Author: Kim Hammar
 Author-email: hammar.kim@gmail.com

{gym_csle_stopping_game-0.6.2 → gym_csle_stopping_game-0.6.4}/setup.cfg RENAMED Viewed

@@ -20,11 +20,11 @@ classifiers =
 [options]
 install_requires =
 	gymnasium>=0.27.1
-	csle-base>=0.6.2
-	csle-common>=0.6.2
-	csle-attacker>=0.6.2
-	csle-defender>=0.6.2
-	csle-collector>=0.6.2
+	csle-base>=0.6.4
+	csle-common>=0.6.4
+	csle-attacker>=0.6.4
+	csle-defender>=0.6.4
+	csle-collector>=0.6.4
 python_requires = >=3.8
 package_dir =
 	=src

gym_csle_stopping_game-0.6.4/src/gym_csle_stopping_game/__version__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = '0.6.4'

{gym_csle_stopping_game-0.6.2 → gym_csle_stopping_game-0.6.4}/src/gym_csle_stopping_game/envs/stopping_game_env.py RENAMED Viewed

@@ -7,24 +7,10 @@ import math
 import csle_common.constants.constants as constants
 from csle_common.dao.simulation_config.base_env import BaseEnv
 from csle_common.dao.simulation_config.simulation_trace import SimulationTrace
-from csle_common.dao.training.policy import Policy
-from csle_common.dao.emulation_config.emulation_env_state import EmulationEnvState
-from csle_common.dao.emulation_config.emulation_env_config import EmulationEnvConfig
-from csle_common.dao.simulation_config.simulation_env_config import SimulationEnvConfig
-from csle_common.dao.emulation_config.emulation_simulation_trace import EmulationSimulationTrace
-from csle_common.dao.emulation_action.attacker.emulation_attacker_stopping_actions \
-    import EmulationAttackerStoppingActions
-from csle_common.dao.emulation_action.attacker.emulation_attacker_action import EmulationAttackerAction
-from csle_common.dao.emulation_action.defender.emulation_defender_stopping_actions \
-    import EmulationDefenderStoppingActions
-from csle_common.metastore.metastore_facade import MetastoreFacade
-from csle_common.logging.log import Logger
-from csle_system_identification.emulator import Emulator
 from gym_csle_stopping_game.util.stopping_game_util import StoppingGameUtil
 from gym_csle_stopping_game.dao.stopping_game_config import StoppingGameConfig
 from gym_csle_stopping_game.dao.stopping_game_state import StoppingGameState
 import gym_csle_stopping_game.constants.constants as env_constants
-from csle_common.dao.emulation_config.emulation_trace import EmulationTrace
 class StoppingGameEnv(BaseEnv):
@@ -244,95 +230,6 @@ class StoppingGameEnv(BaseEnv):
         info[env_constants.ENV_METRICS.TIME_STEP] = self.state.t
         return (defender_obs, attacker_obs), info
-    @staticmethod
-    def emulation_evaluation(env: "StoppingGameEnv", n_episodes: int, intrusion_seq: List[EmulationAttackerAction],
-                             defender_policy: Policy,
-                             attacker_policy: Policy,
-                             emulation_env_config: EmulationEnvConfig,
-                             simulation_env_config: SimulationEnvConfig
-                             ) -> List[EmulationSimulationTrace]:
-        """
-        Utility function for evaluating a strategy profile in the emulation environment
-        :param env: the environment to use for evaluation
-        :param n_episodes: the number of evaluation episodes
-        :param intrusion_seq: the intrusion sequence for the evaluation (sequence of attacker actions)
-        :param defender_policy: the defender policy for the evaluation
-        :param attacker_policy: the attacker policy for the evaluation
-        :param emulation_env_config: configuration of the emulation environment for the evaluation
-        :param simulation_env_config: configuration of the simulation environment for the evaluation
-        :return: traces with the evaluation results
-        """
-        logger = Logger.__call__().get_logger()
-        traces = []
-        s = EmulationEnvState(emulation_env_config=emulation_env_config)
-        s.initialize_defender_machines()
-        for i in range(n_episodes):
-            done = False
-            defender_obs_space = simulation_env_config.joint_observation_space_config.observation_spaces[0]
-            b = env.state.b1
-            o, _ = env.reset()
-            (d_obs, a_obs) = o
-            t = 0
-            s.reset()
-            emulation_trace = EmulationTrace(initial_attacker_observation_state=s.attacker_obs_state,
-                                             initial_defender_observation_state=s.defender_obs_state,
-                                             emulation_name=emulation_env_config.name)
-            simulation_trace = SimulationTrace(simulation_env=env.config.env_name)
-            while not done:
-                a1 = defender_policy.action(d_obs)
-                a2 = attacker_policy.action(a_obs)
-                o, r, done, info, _ = env.step((a1, a2))
-                (d_obs, a_obs) = o
-                r_1, r_2 = r
-                logger.debug(f"a1:{a1}, a2:{a2}, d_obs:{d_obs}, a_obs:{a_obs}, r:{r}, done:{done}, info: {info}")
-                if a1 == 0:
-                    defender_action = EmulationDefenderStoppingActions.CONTINUE(index=-1)
-                else:
-                    defender_action = EmulationDefenderStoppingActions.CONTINUE(index=-1)
-                if env.state.s == 1:
-                    if t >= len(intrusion_seq):
-                        t = 0
-                    attacker_action = intrusion_seq[t]
-                else:
-                    attacker_action = EmulationAttackerStoppingActions.CONTINUE(index=-1)
-                emulation_trace, s = Emulator.run_actions(
-                    s=s,
-                    emulation_env_config=emulation_env_config, attacker_action=attacker_action,
-                    defender_action=defender_action, trace=emulation_trace,
-                    sleep_time=emulation_env_config.kafka_config.time_step_len_seconds)
-                o_components = [s.defender_obs_state.snort_ids_alert_counters.severe_alerts,
-                                s.defender_obs_state.snort_ids_alert_counters.warning_alerts,
-                                s.defender_obs_state.aggregated_host_metrics.num_failed_login_attempts]
-                o_components_str = ",".join(list(map(lambda x: str(x), o_components)))
-                logger.debug(f"o_components:{o_components}")
-                logger.debug(f"observation_id_to_observation_vector_inv:"
-                             f"{defender_obs_space.observation_id_to_observation_vector_inv}")
-                logger.debug(f"observation_id_to_observation_vector_inv:"
-                             f"{o_components_str in defender_obs_space.observation_id_to_observation_vector_inv}")
-                emulation_o = 0
-                if o_components_str in defender_obs_space.observation_id_to_observation_vector_inv:
-                    emulation_o = defender_obs_space.observation_id_to_observation_vector_inv[o_components_str]
-                logger.debug(f"o:{emulation_o}")
-                b = StoppingGameUtil.next_belief(o=emulation_o, a1=a1, b=b, pi2=a2, config=env.config,
-                                                 l=env.state.l, a2=a2)
-                d_obs[1] = b[1]
-                a_obs[1] = b[1]
-                logger.debug(f"b:{b}")
-                simulation_trace.defender_rewards.append(r_1)
-                simulation_trace.attacker_rewards.append(r_2)
-                simulation_trace.attacker_actions.append(a2)
-                simulation_trace.defender_actions.append(a1)
-                simulation_trace.infos.append(info)
-                simulation_trace.states.append(s)
-                simulation_trace.beliefs.append(b[1])
-                simulation_trace.infrastructure_metrics.append(emulation_o)
-            em_sim_trace = EmulationSimulationTrace(emulation_trace=emulation_trace, simulation_trace=simulation_trace)
-            MetastoreFacade.save_emulation_simulation_trace(em_sim_trace)
-            traces.append(em_sim_trace)
-        return traces
     def render(self, mode: str = 'human'):
         """
         Renders the environment.  Supported rendering modes: (1) human; and (2) rgb_array

{gym_csle_stopping_game-0.6.2 → gym_csle_stopping_game-0.6.4}/src/gym_csle_stopping_game/envs/stopping_game_pomdp_defender_env.py RENAMED Viewed

@@ -4,12 +4,7 @@ import numpy.typing as npt
 from csle_common.dao.simulation_config.base_env import BaseEnv
 from gym_csle_stopping_game.dao.stopping_game_defender_pomdp_config import StoppingGameDefenderPomdpConfig
 from csle_common.dao.simulation_config.simulation_trace import SimulationTrace
-from csle_common.dao.training.policy import Policy
-from csle_common.dao.emulation_config.emulation_env_config import EmulationEnvConfig
-from csle_common.dao.simulation_config.simulation_env_config import SimulationEnvConfig
-from csle_common.dao.emulation_config.emulation_simulation_trace import EmulationSimulationTrace
 from csle_common.dao.emulation_config.emulation_trace import EmulationTrace
-from csle_common.dao.emulation_action.attacker.emulation_attacker_action import EmulationAttackerAction
 from gym_csle_stopping_game.envs.stopping_game_env import StoppingGameEnv
 from gym_csle_stopping_game.util.stopping_game_util import StoppingGameUtil
@@ -103,28 +98,6 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
         defender_obs = o[0]
         return defender_obs, r[0], d, info
-    @staticmethod
-    def emulation_evaluation(env: "StoppingGamePomdpDefenderEnv",
-                             n_episodes: int, intrusion_seq: List[EmulationAttackerAction],
-                             defender_policy: Policy,
-                             emulation_env_config: EmulationEnvConfig, simulation_env_config: SimulationEnvConfig) \
-            -> List[EmulationSimulationTrace]:
-        """
-        Utility function for evaluating policies in the emulation environment
-        :param env: the environment to use for evaluation
-        :param n_episodes: the number of episodes to use for evaluation
-        :param intrusion_seq: the sequence of intrusion actions to use for evaluation
-        :param defender_policy: the defender policy to use for evaluation
-        :param emulation_env_config: the configuration of the emulation environment to use for evaluation
-        :param simulation_env_config: the configuration of the simulation environment to use for evaluation
-        :return: traces with the evaluation results
-        """
-        return StoppingGameEnv.emulation_evaluation(
-            env=env.stopping_game_env, n_episodes=n_episodes, intrusion_seq=intrusion_seq,
-            defender_policy=defender_policy, attacker_policy=env.static_attacker_strategy,
-            emulation_env_config=emulation_env_config, simulation_env_config=simulation_env_config)
     def is_defense_action_legal(self, defense_action_id: int) -> bool:
         """
         Checks whether a defender action in the environment is legal or not

{gym_csle_stopping_game-0.6.2 → gym_csle_stopping_game-0.6.4}/src/gym_csle_stopping_game.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: gym-csle-stopping-game
-Version: 0.6.2
+Version: 0.6.4
 Summary: OpenAI gym reinforcement learning environment of a Dynkin (Optimal stopping) game in CSLE
 Author: Kim Hammar
 Author-email: hammar.kim@gmail.com

{gym_csle_stopping_game-0.6.2 → gym_csle_stopping_game-0.6.4}/src/gym_csle_stopping_game.egg-info/requires.txt RENAMED Viewed

@@ -1,9 +1,9 @@
 gymnasium>=0.27.1
-csle-base>=0.6.2
-csle-common>=0.6.2
-csle-attacker>=0.6.2
-csle-defender>=0.6.2
-csle-collector>=0.6.2
+csle-base>=0.6.4
+csle-common>=0.6.4
+csle-attacker>=0.6.4
+csle-defender>=0.6.4
+csle-collector>=0.6.4
 [testing]
 pytest>=6.0

{gym_csle_stopping_game-0.6.2 → gym_csle_stopping_game-0.6.4}/tests/test_stopping_game_env.py RENAMED Viewed

@@ -1,8 +1,9 @@
 from typing import Dict, Any
 import pytest
 from unittest.mock import patch, MagicMock
-from gym.spaces import Box, Discrete
+from gymnasium.spaces import Box, Discrete
 import numpy as np
+from gym_csle_stopping_game.util.stopping_game_util import StoppingGameUtil
 from gym_csle_stopping_game.envs.stopping_game_env import StoppingGameEnv
 from gym_csle_stopping_game.dao.stopping_game_config import StoppingGameConfig
 from gym_csle_stopping_game.dao.stopping_game_state import StoppingGameState
@@ -23,19 +24,19 @@ class TestStoppingGameEnvSuite:
         :return: None
         """
         env_name = "test_env"
-        T = np.array([[[0.1, 0.9], [0.4, 0.6]], [[0.7, 0.3], [0.2, 0.8]]])
-        O = np.array([0, 1])
-        Z = np.array([[[0.8, 0.2], [0.5, 0.5]], [[0.4, 0.6], [0.9, 0.1]]])
+        T = StoppingGameUtil.transition_tensor(L=3, p=0)
+        O = StoppingGameUtil.observation_space(n=100)
+        Z = StoppingGameUtil.observation_tensor(n=100)
         R = np.zeros((2, 3, 3, 3))
-        S = np.array([0, 1, 2])
-        A1 = np.array([0, 1, 2])
-        A2 = np.array([0, 1, 2])
+        S = StoppingGameUtil.state_space()
+        A1 = StoppingGameUtil.defender_actions()
+        A2 = StoppingGameUtil.attacker_actions()
         L = 2
         R_INT = 1
         R_COST = 2
         R_SLA = 3
         R_ST = 4
-        b1 = np.array([0.6, 0.4])
+        b1 = StoppingGameUtil.b1()
         save_dir = "save_directory"
         checkpoint_traces_freq = 100
         gamma = 0.9
@@ -69,12 +70,12 @@ class TestStoppingGameEnvSuite:
         :return: None
         """
-        T = np.array([[[0.1, 0.9], [0.4, 0.6]], [[0.7, 0.3], [0.2, 0.8]]])
-        O = np.array([0, 1])
-        A1 = np.array([0, 1, 2])
-        A2 = np.array([0, 1, 2])
+        T = StoppingGameUtil.transition_tensor(L=3, p=0)
+        O = StoppingGameUtil.observation_space(n=100)
+        A1 = StoppingGameUtil.defender_actions()
+        A2 = StoppingGameUtil.attacker_actions()
         L = 2
-        b1 = np.array([0.6, 0.4])
+        b1 = StoppingGameUtil.b1()
         attacker_observation_space = Box(
             low=np.array([0.0, 0.0, 0.0]),
             high=np.array([float(L), 1.0, 2.0]),
@@ -304,7 +305,7 @@ class TestStoppingGameEnvSuite:
         assert not env.is_state_terminal(state_tuple)
         with pytest.raises(ValueError):
-            env.is_state_terminal([1, 2, 3]) # type: ignore
+            env.is_state_terminal([1, 2, 3])  # type: ignore
     def test_get_observation_from_history(self) -> None:
         """
@@ -346,26 +347,6 @@ class TestStoppingGameEnvSuite:
         :return: None
         """
         env = StoppingGameEnv(self.config)
-        env.state = MagicMock()
-        env.state.s = 1
-        env.state.l = 2
-        env.state.t = 0
-        env.state.attacker_observation.return_value = np.array([1, 2, 3])
-        env.state.defender_observation.return_value = np.array([4, 5, 6])
-        env.state.b = np.array([0.5, 0.5, 0.0])
-        env.trace = MagicMock()
-        env.trace.defender_rewards = []
-        env.trace.attacker_rewards = []
-        env.trace.attacker_actions = []
-        env.trace.defender_actions = []
-        env.trace.infos = []
-        env.trace.states = []
-        env.trace.beliefs = []
-        env.trace.infrastructure_metrics = []
-        env.trace.attacker_observations = []
-        env.trace.defender_observations = []
         with patch("gym_csle_stopping_game.util.stopping_game_util.StoppingGameUtil.sample_next_state",
                    return_value=2):
             with patch("gym_csle_stopping_game.util.stopping_game_util.StoppingGameUtil.sample_next_observation",
@@ -376,7 +357,7 @@ class TestStoppingGameEnvSuite:
                         1,
                         (
                             np.array(
-                                [[0.2, 0.8, 0.0], [0.6, 0.4, 0.0], [0.5, 0.5, 0.0]]
+                                [[0.2, 0.8], [0.6, 0.4], [0.5, 0.5], [0.5, 0.5]]
                             ),
                             2,
                         ),
@@ -384,24 +365,12 @@ class TestStoppingGameEnvSuite:
                     observations, rewards, terminated, truncated, info = env.step(
                         action_profile
                     )
-                    assert (observations[0] == np.array([4, 5, 6])).all(), "Incorrect defender observations"
-                    assert (observations[1] == np.array([1, 2, 3])).all(), "Incorrect attacker observations"
+                    assert observations[0].all() == np.array([1, 0.7]).all(), "Incorrect defender observations"
+                    assert observations[1].all() == np.array([1, 2, 3]).all(), "Incorrect attacker observations"
                     assert rewards == (0, 0)
                     assert not terminated
                     assert not truncated
-                    assert env.trace.defender_rewards[-1] == 0
-                    assert env.trace.attacker_rewards[-1] == 0
-                    assert env.trace.attacker_actions[-1] == 2
-                    assert env.trace.defender_actions[-1] == 1
-                    assert env.trace.infos[-1] == info
-                    assert env.trace.states[-1] == 2
-                    print(env.trace.beliefs)
-                    assert env.trace.beliefs[-1] == 0.7
-                    assert env.trace.infrastructure_metrics[-1] == 1
-                    assert (env.trace.attacker_observations[-1] == np.array([1, 2, 3])).all()
-                    assert (env.trace.defender_observations[-1] == np.array([4, 5, 6])).all()
     def test_info(self) -> None:
         """
         Tests the function of adding the cumulative reward and episode length to the info dict
@@ -418,11 +387,3 @@ class TestStoppingGameEnvSuite:
         info: Dict[str, Any] = {}
         updated_info = env._info(info)
         assert updated_info[env_constants.ENV_METRICS.RETURN] == sum(env.trace.defender_rewards)
-    def test_emulation_evaluation(self) -> None:
-        """
-        Tests the function for evaluating a strategy profile in the emulation environment
-        :return: None
-        """
-        StoppingGameEnv(self.config)

{gym_csle_stopping_game-0.6.2 → gym_csle_stopping_game-0.6.4}/tests/test_stopping_game_mdp_attacker_env.py RENAMED Viewed

@@ -5,8 +5,12 @@ from gym_csle_stopping_game.dao.stopping_game_config import StoppingGameConfig
 from gym_csle_stopping_game.dao.stopping_game_attacker_mdp_config import (
     StoppingGameAttackerMdpConfig,
 )
+from gym_csle_stopping_game.util.stopping_game_util import StoppingGameUtil
 from gym_csle_stopping_game.envs.stopping_game_env import StoppingGameEnv
 from csle_common.dao.training.policy import Policy
+from csle_common.dao.training.random_policy import RandomPolicy
+from csle_common.dao.training.player_type import PlayerType
+from csle_common.dao.simulation_config.action import Action
 import pytest
 from unittest.mock import MagicMock
 import numpy as np
@@ -25,19 +29,19 @@ class TestStoppingGameMdpAttackerEnvSuite:
         :return: None
         """
         env_name = "test_env"
-        T = np.array([[[0.1, 0.9], [0.4, 0.6]], [[0.7, 0.3], [0.2, 0.8]]])
-        O = np.array([0, 1])
-        Z = np.array([[[0.8, 0.2], [0.5, 0.5]], [[0.4, 0.6], [0.9, 0.1]]])
+        T = StoppingGameUtil.transition_tensor(L=3, p=0)
+        O = StoppingGameUtil.observation_space(n=100)
+        Z = StoppingGameUtil.observation_tensor(n=100)
         R = np.zeros((2, 3, 3, 3))
-        S = np.array([0, 1, 2])
-        A1 = np.array([0, 1, 2])
-        A2 = np.array([0, 1, 2])
+        S = StoppingGameUtil.state_space()
+        A1 = StoppingGameUtil.defender_actions()
+        A2 = StoppingGameUtil.attacker_actions()
         L = 2
         R_INT = 1
         R_COST = 2
         R_SLA = 3
         R_ST = 4
-        b1 = np.array([0.6, 0.4])
+        b1 = StoppingGameUtil.b1()
         save_dir = "save_directory"
         checkpoint_traces_freq = 100
         gamma = 0.9
@@ -107,9 +111,8 @@ class TestStoppingGameMdpAttackerEnvSuite:
         )
         env = StoppingGameMdpAttackerEnv(config=attacker_mdp_config)
-        attacker_obs, info = env.reset()
-        assert env.latest_defender_obs.all() == np.array([2, 0.4]).all() # type: ignore
-        assert info == {}
+        info = env.reset()
+        assert info[-1] == {}
     def test_set_model(self) -> None:
         """
@@ -144,7 +147,7 @@ class TestStoppingGameMdpAttackerEnvSuite:
         )
         env = StoppingGameMdpAttackerEnv(config=attacker_mdp_config)
-        assert not env.set_state(1) # type: ignore
+        assert not env.set_state(1)  # type: ignore
     def test_calculate_stage_policy(self) -> None:
         """
@@ -190,7 +193,7 @@ class TestStoppingGameMdpAttackerEnvSuite:
     def test_render(self) -> None:
         """
         Tests the function for rendering the environment
         :return: None
         """
         defender_strategy = MagicMock(spec=Policy)
@@ -317,7 +320,7 @@ class TestStoppingGameMdpAttackerEnvSuite:
         particles = [1, 2, 3]
         t = 0
         observation = 0
-        expected_actions = [0, 1, 2]
+        expected_actions = [0, 1]
         assert (
             env.get_actions_from_particles(particles, t, observation)
             == expected_actions
@@ -326,18 +329,32 @@ class TestStoppingGameMdpAttackerEnvSuite:
     def test_step(self) -> None:
         """
         Tests the function for taking a step in the environment by executing the given action
         :return: None
         """
-        defender_strategy = MagicMock(spec=Policy)
+        defender_stage_strategy = np.zeros((3, 2))
+        defender_stage_strategy[0][0] = 0.9
+        defender_stage_strategy[0][1] = 0.1
+        defender_stage_strategy[1][0] = 0.9
+        defender_stage_strategy[1][1] = 0.1
+        defender_actions = list(map(lambda x: Action(id=x, descr=""), self.config.A1))
+        defender_strategy = RandomPolicy(
+            actions=defender_actions,
+            player_type=PlayerType.DEFENDER,
+            stage_policy_tensor=list(defender_stage_strategy),
+        )
         attacker_mdp_config = StoppingGameAttackerMdpConfig(
             env_name="test_env",
             stopping_game_config=self.config,
             defender_strategy=defender_strategy,
             stopping_game_name="csle-stopping-game-v1",
         )
         env = StoppingGameMdpAttackerEnv(config=attacker_mdp_config)
-        pi2 = np.array([[0.5, 0.5]])
-        with pytest.raises(AssertionError):
-            env.step(pi2)
+        env.reset()
+        pi2 = env.calculate_stage_policy(o=list(env.latest_attacker_obs), a2=0)  # type: ignore
+        attacker_obs, reward, terminated, truncated, info = env.step(pi2)
+        assert isinstance(attacker_obs[0], float)  # type: ignore
+        assert isinstance(terminated, bool)  # type: ignore
+        assert isinstance(truncated, bool)  # type: ignore
+        assert isinstance(reward, float)  # type: ignore
+        assert isinstance(info, dict)  # type: ignore

{gym_csle_stopping_game-0.6.2 → gym_csle_stopping_game-0.6.4}/tests/test_stopping_game_pomdp_defender_env.py RENAMED Viewed

@@ -1,9 +1,14 @@
-from gym_csle_stopping_game.envs.stopping_game_pomdp_defender_env import StoppingGamePomdpDefenderEnv
+from gym_csle_stopping_game.envs.stopping_game_pomdp_defender_env import (
+    StoppingGamePomdpDefenderEnv,
+)
 from gym_csle_stopping_game.dao.stopping_game_config import StoppingGameConfig
-from gym_csle_stopping_game.dao.stopping_game_defender_pomdp_config import StoppingGameDefenderPomdpConfig
+from gym_csle_stopping_game.dao.stopping_game_defender_pomdp_config import (
+    StoppingGameDefenderPomdpConfig,
+)
 from gym_csle_stopping_game.envs.stopping_game_env import StoppingGameEnv
 from gym_csle_stopping_game.util.stopping_game_util import StoppingGameUtil
 from csle_common.dao.training.policy import Policy
+from csle_common.dao.simulation_config.action import Action
 from csle_common.dao.training.random_policy import RandomPolicy
 from csle_common.dao.training.player_type import PlayerType
 import pytest
@@ -219,7 +224,7 @@ class TestStoppingGamePomdpDefenderEnvSuite:
             stopping_game_name="csle-stopping-game-v1",
         )
         env = StoppingGamePomdpDefenderEnv(config=defender_pomdp_config)
-        assert env.set_state(1) is None # type: ignore
+        assert env.set_state(1) is None  # type: ignore
     def test_get_observation_from_history(self) -> None:
         """
@@ -301,7 +306,10 @@ class TestStoppingGamePomdpDefenderEnvSuite:
         t = 0
         observation = 0
         expected_actions = [0, 1]
-        assert env.get_actions_from_particles(particles, t, observation) == expected_actions
+        assert (
+            env.get_actions_from_particles(particles, t, observation)
+            == expected_actions
+        )
     def test_step(self) -> None:
         """
@@ -315,8 +323,12 @@ class TestStoppingGamePomdpDefenderEnvSuite:
         attacker_stage_strategy[1][0] = 0.9
         attacker_stage_strategy[1][1] = 0.1
         attacker_stage_strategy[2] = attacker_stage_strategy[1]
-        attacker_strategy = RandomPolicy(actions=list(self.config.A2), player_type=PlayerType.ATTACKER,
-                                         stage_policy_tensor=list(attacker_stage_strategy))
+        attacker_actions = list(map(lambda x: Action(id=x, descr=""), self.config.A2))
+        attacker_strategy = RandomPolicy(
+            actions=attacker_actions,
+            player_type=PlayerType.ATTACKER,
+            stage_policy_tensor=list(attacker_stage_strategy),
+        )
         defender_pomdp_config = StoppingGameDefenderPomdpConfig(
             env_name="test_env",
             stopping_game_config=self.config,
@@ -328,9 +340,9 @@ class TestStoppingGamePomdpDefenderEnvSuite:
         env.reset()
         defender_obs, reward, terminated, truncated, info = env.step(a1)
         assert len(defender_obs) == 2
-        assert isinstance(defender_obs[0], float) # type: ignore
-        assert isinstance(defender_obs[1], float) # type: ignore
-        assert isinstance(reward, float) # type: ignore
-        assert isinstance(terminated, bool) # type: ignore
-        assert isinstance(truncated, bool) # type: ignore
-        assert isinstance(info, dict) # type: ignore
+        assert isinstance(defender_obs[0], float)  # type: ignore
+        assert isinstance(defender_obs[1], float)  # type: ignore
+        assert isinstance(reward, float)  # type: ignore
+        assert isinstance(terminated, bool)  # type: ignore
+        assert isinstance(truncated, bool)  # type: ignore
+        assert isinstance(info, dict)  # type: ignore