PyPI - gym-csle-stopping-game - Versions diffs - 0.2.19__py3-none-any.whl → 0.2.20__py3-none-any.whl - Mend

gym-csle-stopping-game 0.2.19py3-none-any.whl → 0.2.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of gym-csle-stopping-game might be problematic. Click here for more details.

Files changed (14) hide show

gym_csle_stopping_game/__version__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = '0.2.19'
1	+ __version__ = '0.2.20'

gym_csle_stopping_game/dao/stopping_game_attacker_mdp_config.py CHANGED Viewed

@@ -37,7 +37,6 @@ class StoppingGameAttackerMdpConfig(SimulationEnvInputConfig):
         :param d: the dict to convert
         :return: the created instance
         """
-        defender_strategy = None
         try:
             defender_strategy = MultiThresholdStoppingPolicy.from_dict(d["defender_strategy"])
         except Exception:
@@ -58,7 +57,7 @@ class StoppingGameAttackerMdpConfig(SimulationEnvInputConfig):
         :return: a dict representation of the object
         """
-        d = {}
+        d: Dict[str, Any] = {}
         d["stopping_game_config"] = self.stopping_game_config.to_dict()
         d["defender_strategy"] = self.defender_strategy.to_dict()
         d["stopping_game_name"] = self.stopping_game_name

gym_csle_stopping_game/dao/stopping_game_config.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from typing import Dict, Any
 import gymnasium as gym
 import numpy as np
+import numpy.typing as npt
 from csle_common.dao.simulation_config.simulation_env_input_config import SimulationEnvInputConfig
@@ -10,8 +11,10 @@ class StoppingGameConfig(SimulationEnvInputConfig):
     """
     def __init__(self, env_name: str,
-                 T: np.ndarray, O: np.ndarray, Z: np.ndarray, R: np.ndarray, S: np.ndarray, A1: np.ndarray,
-                 A2: np.ndarray, L: int, R_INT: int, R_COST: int, R_SLA: int, R_ST: int, b1: np.ndarray,
+                 T: npt.NDArray[Any], O: npt.NDArray[np.int_], Z: npt.NDArray[Any],
+                 R: npt.NDArray[Any], S: npt.NDArray[np.int_], A1: npt.NDArray[np.int_],
+                 A2: npt.NDArray[np.int_], L: int, R_INT: int, R_COST: int, R_SLA: int, R_ST: int,
+                 b1: npt.NDArray[np.float_],
                  save_dir: str, checkpoint_traces_freq: int, gamma: float = 1) -> None:
         """
         Initializes the DTO
@@ -59,7 +62,7 @@ class StoppingGameConfig(SimulationEnvInputConfig):
         :return: a dict representation of the object
         """
-        d = {}
+        d: Dict[str, Any] = {}
         d["T"] = list(self.T.tolist())
         d["O"] = list(self.O.tolist())
         d["Z"] = list(self.Z.tolist())

gym_csle_stopping_game/dao/stopping_game_defender_pomdp_config.py CHANGED Viewed

@@ -38,7 +38,6 @@ class StoppingGameDefenderPomdpConfig(SimulationEnvInputConfig):
         :param d: the dict to convert
         :return: the created instance
         """
-        attacker_strategy = None
         try:
             attacker_strategy = MultiThresholdStoppingPolicy.from_dict(d["attacker_strategy"])
         except Exception:
@@ -63,7 +62,7 @@ class StoppingGameDefenderPomdpConfig(SimulationEnvInputConfig):
         :return: a dict representation of the object
         """
-        d = {}
+        d: Dict[str, Any] = {}
         d["stopping_game_config"] = self.stopping_game_config.to_dict()
         d["attacker_strategy"] = self.attacker_strategy.to_dict()
         d["stopping_game_name"] = self.stopping_game_name

gym_csle_stopping_game/dao/stopping_game_state.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from typing import Dict, Any
 import numpy as np
+import numpy.typing as npt
 from gym_csle_stopping_game.util.stopping_game_util import StoppingGameUtil
 from csle_base.json_serializable import JSONSerializable
@@ -9,7 +10,7 @@ class StoppingGameState(JSONSerializable):
     Represents the state of the optimal stopping game
     """
-    def __init__(self, b1: np.ndarray, L: int) -> None:
+    def __init__(self, b1: npt.NDArray[np.float_], L: int) -> None:
         """
         Intializes the state
@@ -34,13 +35,13 @@ class StoppingGameState(JSONSerializable):
         self.s = StoppingGameUtil.sample_initial_state(b1=self.b1)
         self.b = self.b1.copy()
-    def attacker_observation(self) -> np.ndarray:
+    def attacker_observation(self) -> npt.NDArray[Any]:
         """
         :return: the attacker's observation
         """
         return np.array([self.l, self.b[1], self.s])
-    def defender_observation(self) -> np.ndarray:
+    def defender_observation(self) -> npt.NDArray[Any]:
         """
         :return: the defender's observation
         """
@@ -73,7 +74,7 @@ class StoppingGameState(JSONSerializable):
         :return: a dict representation of the object
         """
-        d = {}
+        d: Dict[str, Any] = {}
         d["L"] = self.L
         d["b1"] = list(self.b1)
         d["b"] = list(self.b)

gym_csle_stopping_game/envs/stopping_game_env.py CHANGED Viewed

@@ -1,5 +1,6 @@
-from typing import Tuple, Dict, Union, List, Any
+from typing import Tuple, Dict, List, Any
 import numpy as np
+import numpy.typing as npt
 import time
 import math
 import csle_common.constants.constants as constants
@@ -59,15 +60,15 @@ class StoppingGameEnv(BaseEnv):
         }
         # Setup traces
-        self.traces = []
+        self.traces: List[SimulationTrace] = []
         self.trace = SimulationTrace(simulation_env=self.config.env_name)
         # Reset
         self.reset()
         super().__init__()
-    def step(self, action_profile: Tuple[int, Tuple[np.ndarray, int]]) \
-            -> Tuple[Tuple[np.ndarray, np.ndarray], Tuple[int, int], bool, bool, dict]:
+    def step(self, action_profile: Tuple[int, Tuple[npt.NDArray[Any], int]]) \
+            -> Tuple[Tuple[npt.NDArray[Any], npt.NDArray[Any]], Tuple[int, int], bool, bool, Dict[str, Any]]:
         """
         Takes a step in the environment by executing the given action
@@ -81,7 +82,7 @@ class StoppingGameEnv(BaseEnv):
         assert pi2.shape[0] == len(self.config.S)
         assert pi2.shape[1] == len(self.config.A1)
         done = False
-        info = {}
+        info: Dict[str, Any] = {}
         # Compute r, s', b',o'
         r = self.config.R[self.state.l - 1][a1][a2][self.state.s]
@@ -134,8 +135,8 @@ class StoppingGameEnv(BaseEnv):
         return (defender_obs, attacker_obs), (r, -r), done, done, info
-    def step_test(self, action_profile: Tuple[int, Tuple[np.ndarray, int]], sample_Z) \
-            -> Tuple[Tuple[np.ndarray, np.ndarray], Tuple[int, int], bool, dict]:
+    def step_test(self, action_profile: Tuple[int, Tuple[npt.NDArray[Any], int]], sample_Z) \
+            -> Tuple[Tuple[npt.NDArray[Any], npt.NDArray[Any]], Tuple[int, int], bool, Dict[str, Any]]:
         """
         Takes a step in the environment by executing the given action
@@ -149,7 +150,7 @@ class StoppingGameEnv(BaseEnv):
         assert pi2.shape[0] == len(self.config.S)
         assert pi2.shape[1] == len(self.config.A1)
         done = False
-        info = {}
+        info: Dict[str, Any] = {}
         # Compute r, s', b',o'
         r = self.config.R[self.state.l - 1][a1][a2][self.state.s]
@@ -201,8 +202,8 @@ class StoppingGameEnv(BaseEnv):
         return (defender_obs, attacker_obs), (r, -r), done, info
-    def step_trace(self, trace: EmulationTrace, a1: int, pi2: np.ndarray) \
-            -> Tuple[Tuple[np.ndarray, np.ndarray], Tuple[int, int], bool, dict]:
+    def step_trace(self, trace: EmulationTrace, a1: int, pi2: npt.NDArray[Any]) \
+            -> Tuple[Tuple[npt.NDArray[Any], npt.NDArray[Any]], Tuple[int, int], bool, Dict[str, Any]]:
         """
         Utility function for stepping a given trace
@@ -212,7 +213,7 @@ class StoppingGameEnv(BaseEnv):
         :return: the result of the step
         """
         done = False
-        info = {}
+        info: Dict[str, Any] = {}
         if (self.state.t - 1) < len(trace.attacker_actions):
             a2_emulation_action = trace.attacker_actions[self.state.t - 1]
             a2 = 0
@@ -300,7 +301,7 @@ class StoppingGameEnv(BaseEnv):
         else:
             return 1 - (min(10, (first_stop - (intrusion_start + 1))) / 2) / 10
-    def _info(self, info) -> Dict[str, Union[float, int]]:
+    def _info(self, info: Dict[str, Any]) -> Dict[str, Any]:
         """
         Adds the cumulative reward and episode length to the info dict
@@ -363,7 +364,8 @@ class StoppingGameEnv(BaseEnv):
             defender_baseline_stop_on_first_alert_return
         return info
-    def reset(self, seed: int = 0, soft: bool = False) -> Tuple[Tuple[np.ndarray, np.ndarray], Dict[str, Any]]:
+    def reset(self, seed: int = 0, soft: bool = False) \
+            -> Tuple[Tuple[npt.NDArray[Any], npt.NDArray[Any]], Dict[str, Any]]:
         """
         Resets the environment state, this should be called whenever step() returns <done>
@@ -378,7 +380,7 @@ class StoppingGameEnv(BaseEnv):
         defender_obs = self.state.defender_observation()
         self.trace.attacker_observations.append(attacker_obs)
         self.trace.defender_observations.append(defender_obs)
-        info = {}
+        info: Dict[str, Any] = {}
         return (defender_obs, attacker_obs), info
     @staticmethod
@@ -408,7 +410,7 @@ class StoppingGameEnv(BaseEnv):
             done = False
             defender_obs_space = simulation_env_config.joint_observation_space_config.observation_spaces[0]
             b = env.state.b1
-            o = env.reset()
+            o, _ = env.reset()
             (d_obs, a_obs) = o
             t = 0
             s.reset()
@@ -419,7 +421,7 @@ class StoppingGameEnv(BaseEnv):
             while not done:
                 a1 = defender_policy.action(d_obs)
                 a2 = attacker_policy.action(a_obs)
-                o, r, done, info = env.step((a1, a2))
+                o, r, done, info, _ = env.step((a1, a2))
                 (d_obs, a_obs) = o
                 r_1, r_2 = r
                 logger.debug(f"a1:{a1}, a2:{a2}, d_obs:{d_obs}, a_obs:{a_obs}, r:{r}, done:{done}, info: {info}")
@@ -448,12 +450,12 @@ class StoppingGameEnv(BaseEnv):
                              f"{defender_obs_space.observation_id_to_observation_vector_inv}")
                 logger.debug(f"observation_id_to_observation_vector_inv:"
                              f"{o_components_str in defender_obs_space.observation_id_to_observation_vector_inv}")
+                emulation_o = 0
                 if o_components_str in defender_obs_space.observation_id_to_observation_vector_inv:
-                    o = defender_obs_space.observation_id_to_observation_vector_inv[o_components_str]
-                else:
-                    o = 0
-                logger.debug(f"o:{o}")
-                b = StoppingGameUtil.next_belief(o=o, a1=a1, b=b, pi2=a2, config=env.config, l=env.state.l, a2=a2)
+                    emulation_o = defender_obs_space.observation_id_to_observation_vector_inv[o_components_str]
+                logger.debug(f"o:{emulation_o}")
+                b = StoppingGameUtil.next_belief(o=emulation_o, a1=a1, b=b, pi2=a2, config=env.config,
+                                                 l=env.state.l, a2=a2)
                 d_obs[1] = b[1]
                 a_obs[1] = b[1]
                 logger.debug(f"b:{b}")
@@ -464,7 +466,7 @@ class StoppingGameEnv(BaseEnv):
                 simulation_trace.infos.append(info)
                 simulation_trace.states.append(s)
                 simulation_trace.beliefs.append(b[1])
-                simulation_trace.infrastructure_metrics.append(o)
+                simulation_trace.infrastructure_metrics.append(emulation_o)
             em_sim_trace = EmulationSimulationTrace(emulation_trace=emulation_trace, simulation_trace=simulation_trace)
             MetastoreFacade.save_emulation_simulation_trace(em_sim_trace)
@@ -556,10 +558,10 @@ class StoppingGameEnv(BaseEnv):
                 stage_policy = []
                 for s in self.config.S:
                     if s != 2:
-                        dist = [0, 0]
-                        dist[a2] = 1
+                        dist = [0.0, 0.0]
+                        dist[a2] = 1.0
                         stage_policy.append(dist)
                     else:
                         stage_policy.append([0.5, 0.5])
-                stage_policy = np.array(stage_policy)
-                _, _, done, _ = self.step(action_profile=(a1, (stage_policy, a2)))
+                pi2 = np.array(stage_policy)
+                _, _, done, _, _ = self.step(action_profile=(a1, (pi2, a2)))

gym_csle_stopping_game/envs/stopping_game_mdp_attacker_env.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from typing import Tuple, List, Union, Dict, Any
-import gymnasium as gym
 import numpy as np
+import numpy.typing as npt
 import torch
 import math
 from csle_common.dao.simulation_config.base_env import BaseEnv
@@ -9,6 +9,7 @@ from gym_csle_stopping_game.dao.stopping_game_attacker_mdp_config import Stoppin
 from csle_common.dao.simulation_config.simulation_trace import SimulationTrace
 from gym_csle_stopping_game.util.stopping_game_util import StoppingGameUtil
 import gym_csle_stopping_game.constants.constants as env_constants
+from gym_csle_stopping_game.envs.stopping_game_env import StoppingGameEnv
 class StoppingGameMdpAttackerEnv(BaseEnv):
@@ -23,7 +24,7 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
         :param config: the configuration of the environment
         """
         self.config = config
-        self.stopping_game_env = gym.make(self.config.stopping_game_name, config=self.config.stopping_game_config)
+        self.stopping_game_env: StoppingGameEnv = StoppingGameEnv(config=self.config.stopping_game_config)
         # Setup spaces
         self.observation_space = self.config.stopping_game_config.attacker_observation_space()
@@ -33,41 +34,47 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
         self.static_defender_strategy = self.config.defender_strategy
         # Setup Config
-        self.viewer = None
+        self.viewer: Union[None, Any] = None
         self.metadata = {
             'render.modes': ['human', 'rgb_array'],
             'video.frames_per_second': 50  # Video rendering speed
         }
-        self.latest_defender_obs = None
-        self.latest_attacker_obs = None
-        self.model = None
+        self.latest_defender_obs: Union[None, List[Any], npt.NDArray[Any]] = None
+        self.latest_attacker_obs: Union[None, List[Any], npt.NDArray[Any]] = None
+        self.model: Union[None, Any] = None
         # Reset
         self.reset()
         super().__init__()
-    def step(self, pi2: Union[List[List[float]], int, float, np.int64, float, np.float64]) \
-            -> Tuple[np.ndarray, int, bool, bool, dict]:
+    def step(self, pi2: Union[npt.NDArray[Any], int, float, np.int_, np.float_]) \
+            -> Tuple[npt.NDArray[Any], int, bool, bool, Dict[str, Any]]:
         """
         Takes a step in the environment by executing the given action
         :param pi2: attacker stage policy
         :return: (obs, reward, terminated, truncated, info)
         """
-        if type(pi2) is int or type(pi2) is float or type(pi2) is np.int64 or type(pi2) is float \
-                or type(pi2) is np.float64:
+        if type(pi2) is int or type(pi2) is float or type(pi2) is np.int64 or type(pi2) is np.float64:
             a2 = pi2
-            pi2 = self.calculate_stage_policy(o=self.latest_attacker_obs, a2=a2)
+            if self.latest_attacker_obs is None:
+                raise ValueError("Attacker observation is None")
+            pi2 = self.calculate_stage_policy(o=list(self.latest_attacker_obs), a2=int(a2))
         else:
             if self.model is not None:
-                pi2 = self.calculate_stage_policy(o=self.latest_attacker_obs)
+                if self.latest_attacker_obs is None:
+                    raise ValueError("Attacker observation is None")
+                pi2 = self.calculate_stage_policy(o=list(self.latest_attacker_obs))
                 a2 = StoppingGameUtil.sample_attacker_action(pi2=pi2, s=self.stopping_game_env.state.s)
             else:
                 pi2 = np.array(pi2)
-                if (not pi2.shape[0] == len(self.config.stopping_game_config.S)
-                        or pi2.shape[1] != len(self.config.stopping_game_config.A1)) and self.model is not None:
-                    pi2 = self.calculate_stage_policy(o=self.latest_attacker_obs)
+                try:
+                    if self.latest_attacker_obs is None:
+                        raise ValueError("Attacker observation is None")
+                    pi2 = self.calculate_stage_policy(o=list(self.latest_attacker_obs))
+                except Exception:
+                    pass
                 a2 = StoppingGameUtil.sample_attacker_action(pi2=pi2, s=self.stopping_game_env.state.s)
         # a2 = pi2
@@ -83,7 +90,7 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
         a1 = self.static_defender_strategy.action(o=self.latest_defender_obs)
         # Step the game
-        o, r, d, _, info = self.stopping_game_env.step((a1, (pi2, a2)))
+        o, r, d, _, info = self.stopping_game_env.step((int(a1), (pi2, int(a2))))
         self.latest_defender_obs = o[0]
         self.latest_attacker_obs = o[1]
         attacker_obs = o[1]
@@ -94,7 +101,7 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
         return attacker_obs, r[1], d, d, info
-    def reset(self, seed: int = 0, soft: bool = False) -> Tuple[np.ndarray, Dict[str, Any]]:
+    def reset(self, seed: int = 0, soft: bool = False) -> Tuple[npt.NDArray[Any], Dict[str, Any]]:
         """
         Resets the environment state, this should be called whenever step() returns <done>
@@ -104,7 +111,7 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
         self.latest_defender_obs = o[0]
         self.latest_attacker_obs = o[1]
         attacker_obs = o[1]
-        info = {}
+        info: Dict[str, Any] = {}
         return attacker_obs, info
     def set_model(self, model) -> None:
@@ -116,7 +123,7 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
         """
         self.model = model
-    def calculate_stage_policy(self, o: List, a2: int = 0) -> np.ndarray:
+    def calculate_stage_policy(self, o: List[Any], a2: int = 0) -> npt.NDArray[Any]:
         """
         Calculates the stage policy of a given model and observation
@@ -127,15 +134,14 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
             stage_policy = []
             for s in self.config.stopping_game_config.S:
                 if s != 2:
-                    dist = [0, 0]
-                    dist[a2] = 1
+                    dist = [0.0, 0.0]
+                    dist[a2] = 1.0
                     stage_policy.append(dist)
                 else:
                     stage_policy.append([0.5, 0.5])
             return np.array(stage_policy)
         if isinstance(self.model, MixedMultiThresholdStoppingPolicy):
-            stage_policy = np.array(self.model.stage_policy(o=o))
-            return stage_policy
+            return np.array(self.model.stage_policy(o=o))
         else:
             b1 = o[1]
             l = int(o[0])
@@ -146,18 +152,19 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
                     stage_policy.append(self._get_attacker_dist(obs=o))
                 else:
                     stage_policy.append([0.5, 0.5])
-            stage_policy = np.array(stage_policy)
-            return stage_policy
+            return np.array(stage_policy)
-    def _get_attacker_dist(self, obs: List) -> List:
+    def _get_attacker_dist(self, obs: List[Any]) -> List[float]:
         """
         Utility function for getting the attacker's action distribution based on a given observation
         :param obs: the given observation
         :return:  the action distribution
         """
-        obs = np.array([obs])
-        actions, values, log_prob = self.model.policy.forward(obs=torch.tensor(obs).to(self.model.device))
+        np_obs = np.array([obs])
+        if self.model is None:
+            raise ValueError("Model is None")
+        actions, values, log_prob = self.model.policy.forward(obs=torch.tensor(np_obs).to(self.model.device))
         action = actions[0]
         if action == 1:
             stop_prob = math.exp(log_prob)
@@ -211,7 +218,7 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
         Closes the viewer (cleanup)
         :return: None
         """
-        if self.viewer:
+        if self.viewer is not None:
             self.viewer.close()
             self.viewer = None
@@ -244,4 +251,4 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
                 self.reset()
             else:
                 action_idx = int(raw_input)
-                _, _, done, _ = self.step(pi2=action_idx)
+                _, _, done, _, _ = self.step(pi2=action_idx)

gym_csle_stopping_game/envs/stopping_game_pomdp_defender_env.py CHANGED Viewed

@@ -1,6 +1,6 @@
-from typing import Tuple, List, Dict, Any
-import gymnasium as gym
+from typing import Tuple, List, Dict, Any, Union
 import numpy as np
+import numpy.typing as npt
 from csle_common.dao.simulation_config.base_env import BaseEnv
 from gym_csle_stopping_game.dao.stopping_game_defender_pomdp_config import StoppingGameDefenderPomdpConfig
 from csle_common.dao.simulation_config.simulation_trace import SimulationTrace
@@ -27,7 +27,7 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
         :param attacker_strategy: the strategy of the static attacker
         """
         self.config = config
-        self.stopping_game_env = gym.make(self.config.stopping_game_name, config=self.config.stopping_game_config)
+        self.stopping_game_env = StoppingGameEnv(config=self.config.stopping_game_config)
         # Setup spaces
         self.observation_space = self.config.stopping_game_config.defender_observation_space()
@@ -37,18 +37,18 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
         self.static_attacker_strategy = self.config.attacker_strategy
         # Setup Config
-        self.viewer = None
+        self.viewer: Union[None, Any] = None
         self.metadata = {
             'render.modes': ['human', 'rgb_array'],
             'video.frames_per_second': 50  # Video rendering speed
         }
-        self.latest_attacker_obs = None
+        self.latest_attacker_obs: Union[None, npt.NDArray[Any]] = None
         # Reset
         self.reset()
         super().__init__()
-    def step(self, a1: int) -> Tuple[np.ndarray, int, bool, bool, dict]:
+    def step(self, a1: int) -> Tuple[npt.NDArray[Any], int, bool, bool, Dict[str, Any]]:
         """
         Takes a step in the environment by executing the given action
@@ -66,7 +66,7 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
         return defender_obs, r[0], d, d, info
-    def step_test(self, a1: int, sample_Z) -> Tuple[np.ndarray, int, bool, dict]:
+    def step_test(self, a1: int, sample_Z) -> Tuple[npt.NDArray[Any], int, bool, Dict[str, Any]]:
         """
         Takes a step in the environment by executing the given action
@@ -84,7 +84,7 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
         return defender_obs, r[0], d, info
-    def reset(self, seed: int = 0, soft: bool = False) -> Tuple[np.ndarray, Dict[str, Any]]:
+    def reset(self, seed: int = 0, soft: bool = False) -> Tuple[npt.NDArray[Any], Dict[str, Any]]:
         """
         Resets the environment state, this should be called whenever step() returns <done>
@@ -93,7 +93,7 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
         o, _ = self.stopping_game_env.reset()
         self.latest_attacker_obs = o[1]
         defender_obs = o[0]
-        dict = {}
+        dict: Dict[str, Any] = {}
         return defender_obs, dict
     def render(self, mode: str = 'human'):
@@ -105,7 +105,7 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
         """
         raise NotImplementedError("Rendering is not implemented for this environment")
-    def step_trace(self, trace: EmulationTrace, a1: int) -> Tuple[np.ndarray, int, bool, dict]:
+    def step_trace(self, trace: EmulationTrace, a1: int) -> Tuple[npt.NDArray[Any], int, bool, Dict[str, Any]]:
         """
         Utility method for stopping a pre-recorded trace
@@ -178,7 +178,7 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
         Closes the viewer (cleanup)
         :return: None
         """
-        if self.viewer:
+        if self.viewer is not None:
             self.viewer.close()
             self.viewer = None
@@ -211,4 +211,4 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
                 self.reset()
             else:
                 action_idx = int(raw_input)
-                _, _, done, _ = self.step(pi2=action_idx)
+                _, _, done, _, _ = self.step(a1=action_idx)

gym_csle_stopping_game/util/stopping_game_util.py CHANGED Viewed

@@ -1,5 +1,6 @@
-from typing import List, Dict, Tuple
+from typing import List, Dict, Tuple, Any
 import numpy as np
+import numpy.typing as npt
 from scipy.stats import betabinom
 from csle_common.dao.system_identification.emulation_statistics import EmulationStatistics
 from csle_common.dao.simulation_config.observation_space_config import ObservationSpaceConfig
@@ -14,7 +15,7 @@ class StoppingGameUtil:
     """
     @staticmethod
-    def b1() -> np.ndarray:
+    def b1() -> npt.NDArray[np.int_]:
         """
         Gets the initial belief
@@ -32,7 +33,7 @@ class StoppingGameUtil:
         return np.array([0, 1, 2])
     @staticmethod
-    def defender_actions() -> np.ndarray:
+    def defender_actions() -> npt.NDArray[np.int_]:
         """
         Gets the action space of the defender
@@ -41,7 +42,7 @@ class StoppingGameUtil:
         return np.array([0, 1])
     @staticmethod
-    def attacker_actions() -> np.ndarray:
+    def attacker_actions() -> npt.NDArray[np.int_]:
         """
         Gets the action space of the attacker
@@ -60,7 +61,7 @@ class StoppingGameUtil:
         return np.array(list(range(n + 1)))
     @staticmethod
-    def reward_tensor(R_SLA: int, R_INT: int, R_COST: int, L: int, R_ST: int) -> np.ndarray:
+    def reward_tensor(R_SLA: int, R_INT: int, R_COST: int, L: int, R_ST: int) -> npt.NDArray[Any]:
         """
         Gets the reward tensor
@@ -89,11 +90,10 @@ class StoppingGameUtil:
                 ]
             ]
             R_l.append(R)
-        R = np.array(R_l)
-        return R
+        return np.array(R_l)
     @staticmethod
-    def transition_tensor(L: int, p: float) -> np.ndarray:
+    def transition_tensor(L: int, p: float) -> npt.NDArray[Any]:
         """
         Gets the transition tensor
@@ -171,15 +171,14 @@ class StoppingGameUtil:
                     ]
                 ]
             T_l.append(T)
-        T = np.array(T_l)
-        return T
+        return np.array(T_l)
     @staticmethod
     def observation_tensor_from_emulation_statistics(emulation_statistic: EmulationStatistics,
                                                      observation_space_defender: ObservationSpaceConfig,
                                                      joint_action_space: JointActionSpaceConfig,
                                                      state_space: StateSpaceConfig) \
-            -> Tuple[np.ndarray, Dict[str, List]]:
+            -> Tuple[npt.NDArray[Any], Dict[str, List[Any]]]:
         """
         Returns an observation tensor based on measured emulation statistics
@@ -189,9 +188,9 @@ class StoppingGameUtil:
         :param state_space: the state space
         :return: a |A1|x|A2|x|S|x|O| tensor
         """
-        intrusion_severe_alerts_probabilities = []
-        intrusion_warning_alerts_probabilities = []
-        intrusion_login_attempts_probabilities = []
+        intrusion_severe_alerts_probabilities: List[float] = []
+        intrusion_warning_alerts_probabilities: List[float] = []
+        intrusion_login_attempts_probabilities: List[float] = []
         norm = sum(emulation_statistic.conditionals_counts["intrusion"]["severe_alerts"].values())
         for severe_alert_obs in observation_space_defender.component_observations["severe_alerts"]:
             count = emulation_statistic.conditionals_counts["intrusion"]["severe_alerts"][severe_alert_obs.id]
@@ -229,14 +228,14 @@ class StoppingGameUtil:
             login_attempts_a1_a2_s_o_dist = []
             for a2 in range(len(joint_action_space.action_spaces[1].actions)):
                 a2_s_o_dist = []
-                severe_alerts_a2_s_o_dist = []
-                warning_alerts_a2_s_o_dist = []
-                login_attempts_a2_s_o_dist = []
+                severe_alerts_a2_s_o_dist: List[List[float]] = []
+                warning_alerts_a2_s_o_dist: List[List[float]] = []
+                login_attempts_a2_s_o_dist: List[List[float]] = []
                 for s in range(len(state_space.states)):
                     s_o_dist = []
-                    severe_alerts_s_o_dist = []
-                    warning_alerts_s_o_dist = []
-                    login_attempts_s_o_dist = []
+                    severe_alerts_s_o_dist: List[float] = []
+                    warning_alerts_s_o_dist: List[float] = []
+                    login_attempts_s_o_dist: List[float] = []
                     for o in range(len(observation_space_defender.observations)):
                         obs_vector = observation_space_defender.observation_id_to_observation_id_vector[o]
                         if s == 0:
@@ -256,8 +255,8 @@ class StoppingGameUtil:
                         s_o_dist.append(p)
                     a2_s_o_dist.append(s_o_dist)
                     severe_alerts_a2_s_o_dist.append(severe_alerts_s_o_dist)
-                    warning_alerts_a2_s_o_dist.append(warning_alerts_a2_s_o_dist)
-                    login_attempts_a2_s_o_dist.append(login_attempts_a2_s_o_dist)
+                    warning_alerts_a2_s_o_dist.append(warning_alerts_s_o_dist)
+                    login_attempts_a2_s_o_dist.append(login_attempts_s_o_dist)
                 a1_a2_s_o_dist.append(a2_s_o_dist)
                 severe_alerts_a1_a2_s_o_dist.append(severe_alerts_a2_s_o_dist)
                 warning_alerts_a1_a2_s_o_dist.append(warning_alerts_a2_s_o_dist)
@@ -316,7 +315,7 @@ class StoppingGameUtil:
         return Z
     @staticmethod
-    def sample_next_state(T: np.ndarray, l: int, s: int, a1: int, a2: int, S: np.ndarray) -> int:
+    def sample_next_state(T: npt.NDArray[Any], l: int, s: int, a1: int, a2: int, S: npt.NDArray[np.int_]) -> int:
         """
         Samples the next state
@@ -331,22 +330,20 @@ class StoppingGameUtil:
         state_probs = []
         for s_prime in S:
             state_probs.append(T[l - 1][a1][a2][s][s_prime])
-        s_prime = np.random.choice(np.arange(0, len(S)), p=state_probs)
-        return s_prime
+        return int(np.random.choice(np.arange(0, len(S)), p=state_probs))
     @staticmethod
-    def sample_initial_state(b1: np.ndarray) -> int:
+    def sample_initial_state(b1: npt.NDArray[np.float_]) -> int:
         """
         Samples the initial state
         :param b1: the initial belief
         :return: s1
         """
-        s1 = np.random.choice(np.arange(0, len(b1)), p=b1)
-        return s1
+        return int(np.random.choice(np.arange(0, len(b1)), p=b1))
     @staticmethod
-    def sample_next_observation(Z: np.ndarray, s_prime: int, O: np.ndarray) -> int:
+    def sample_next_observation(Z: npt.NDArray[Any], s_prime: int, O: npt.NDArray[np.int_]) -> int:
         """
         Samples the next observation
@@ -366,7 +363,7 @@ class StoppingGameUtil:
         return int(o)
     @staticmethod
-    def bayes_filter(s_prime: int, o: int, a1: int, b: np.ndarray, pi2: np.ndarray, l: int,
+    def bayes_filter(s_prime: int, o: int, a1: int, b: npt.NDArray[np.float_], pi2: npt.NDArray[Any], l: int,
                      config: StoppingGameConfig) -> float:
         """
         A Bayesian filter to compute the belief of player 1
@@ -404,7 +401,7 @@ class StoppingGameUtil:
         return b_prime_s_prime
     @staticmethod
-    def p_o_given_b_a1_a2(o: int, b: List, a1: int, a2: int, config: StoppingGameConfig) -> float:
+    def p_o_given_b_a1_a2(o: int, b: List[float], a1: int, a2: int, config: StoppingGameConfig) -> float:
         """
         Computes P[o|a,b]
@@ -423,8 +420,8 @@ class StoppingGameUtil:
         return prob
     @staticmethod
-    def next_belief(o: int, a1: int, b: np.ndarray, pi2: np.ndarray, config: StoppingGameConfig, l: int,
-                    a2: int = 0, s: int = 0) -> np.ndarray:
+    def next_belief(o: int, a1: int, b: npt.NDArray[np.float_], pi2: npt.NDArray[Any],
+                    config: StoppingGameConfig, l: int, a2: int = 0, s: int = 0) -> npt.NDArray[np.float_]:
         """
         Computes the next belief using a Bayesian filter
@@ -449,7 +446,7 @@ class StoppingGameUtil:
         return b_prime
     @staticmethod
-    def sample_attacker_action(pi2: np.ndarray, s: int) -> int:
+    def sample_attacker_action(pi2: npt.NDArray[Any], s: int) -> int:
         """
         Samples the attacker action
@@ -457,5 +454,4 @@ class StoppingGameUtil:
         :param s: the game state
         :return: a2 (the attacker action
         """
-        a2 = np.random.choice(np.arange(0, len(pi2[s])), p=pi2[s])
-        return a2
+        return int(np.random.choice(np.arange(0, len(pi2[s])), p=pi2[s]))

{gym_csle_stopping_game-0.2.19.dist-info → gym_csle_stopping_game-0.2.20.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: gym-csle-stopping-game
-Version: 0.2.19
+Version: 0.2.20
 Summary: OpenAI gym reinforcement learning environment of a Dynkin (Optimal stopping) game in CSLE
 Author: Kim Hammar
 Author-email: hammar.kim@gmail.com
@@ -15,10 +15,10 @@ Classifier: Programming Language :: Python :: 3.9
 Classifier: Intended Audience :: Science/Research
 Requires-Python: >=3.8
 Requires-Dist: gymnasium >=0.27.1
-Requires-Dist: csle-base >=0.2.19
-Requires-Dist: csle-common >=0.2.19
-Requires-Dist: csle-attacker >=0.2.19
-Requires-Dist: csle-defender >=0.2.19
+Requires-Dist: csle-base >=0.2.20
+Requires-Dist: csle-common >=0.2.20
+Requires-Dist: csle-attacker >=0.2.20
+Requires-Dist: csle-defender >=0.2.20
 Requires-Dist: csle-collector >=0.2.9
 Provides-Extra: testing
 Requires-Dist: pytest >=6.0 ; extra == 'testing'
@@ -26,6 +26,9 @@ Requires-Dist: pytest-cov >=2.0 ; extra == 'testing'
 Requires-Dist: pytest-mock >=3.6.0 ; extra == 'testing'
 Requires-Dist: pytest-grpc >=0.8.0 ; extra == 'testing'
 Requires-Dist: mypy >=1.4.1 ; extra == 'testing'
+Requires-Dist: mypy-extensions >=1.0.0 ; extra == 'testing'
+Requires-Dist: mypy-protobuf >=3.5.0 ; extra == 'testing'
+Requires-Dist: types-PyYAML >=6.0.12.11 ; extra == 'testing'
 Requires-Dist: types-paramiko >=3.2.0.0 ; extra == 'testing'
 Requires-Dist: types-protobuf >=4.23.0.3 ; extra == 'testing'
 Requires-Dist: types-requests >=2.31.0.1 ; extra == 'testing'

gym_csle_stopping_game-0.2.20.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,19 @@
+gym_csle_stopping_game/__init__.py,sha256=ooy6TjxvBi1sZMEX3_mVlvfskqI5GqwITWzI882tfk0,657
+gym_csle_stopping_game/__version__.py,sha256=qMaQjrsi_F2wUsxjrQ7pmHBHD2rS1ibAIXrI1iCRn08,23
+gym_csle_stopping_game/constants/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+gym_csle_stopping_game/constants/constants.py,sha256=eIoD9eXifZ73kP-lSlvG-IXCpe4n6D-_aDygx0zOr5U,1030
+gym_csle_stopping_game/dao/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+gym_csle_stopping_game/dao/stopping_game_attacker_mdp_config.py,sha256=kmtrVa2CCVbi5xfd6uPWqMvhGmP8ccrtn1e_VmVvH7k,3494
+gym_csle_stopping_game/dao/stopping_game_config.py,sha256=5jFMvSWkJ0_PqlVZlAf2pzJFttHeUdUv_G4GeXIrdm0,5595
+gym_csle_stopping_game/dao/stopping_game_defender_pomdp_config.py,sha256=3FfNi2-R6n1LqjA644EVq-v7wtp6sqyEkEdBN90-2n0,3753
+gym_csle_stopping_game/dao/stopping_game_state.py,sha256=Fyl19vIMeShYnHaixeJ-OHM4LHpJAswhBIp_5ytN3bM,2768
+gym_csle_stopping_game/envs/__init__.py,sha256=SQHaqXI0_2HYsC8i9swXEHDFcXKEYpb8GRP9l_S0Sw8,74
+gym_csle_stopping_game/envs/stopping_game_env.py,sha256=SY3vYUhtS42XIkzJfIhiJE-dKjSTRH7iiLpEQ7Id9P8,26178
+gym_csle_stopping_game/envs/stopping_game_mdp_attacker_env.py,sha256=JGKp2B3s7Hf6z_nfOmDtv9LZo26i9HayTcVBq6gW6sk,9570
+gym_csle_stopping_game/envs/stopping_game_pomdp_defender_env.py,sha256=JD7hGrBMYrNXh6A3JXFLvz4Op1ZWOZqlFEcDU4jS49k,8754
+gym_csle_stopping_game/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+gym_csle_stopping_game/util/stopping_game_util.py,sha256=Kgt6o8nobpdlx0zoPxfoA8sH24KxMxQ5gh4Txt9wWgs,19160
+gym_csle_stopping_game-0.2.20.dist-info/METADATA,sha256=Ez_SGYA2X1QFWfE4arGr2LLHKQIliBQJ8RIdP804FyU,2055
+gym_csle_stopping_game-0.2.20.dist-info/WHEEL,sha256=5sUXSg9e4bi7lTLOHcm6QEYwO5TIF1TNbTSVFVjcJcc,92
+gym_csle_stopping_game-0.2.20.dist-info/top_level.txt,sha256=3DBHkAEI00nq0aXZlJUkXJrLiwkcJCfaFoYcaOzEZUU,23
+gym_csle_stopping_game-0.2.20.dist-info/RECORD,,

gym_csle_stopping_game-0.2.19.dist-info/RECORD DELETED Viewed

@@ -1,19 +0,0 @@
-gym_csle_stopping_game/__init__.py,sha256=ooy6TjxvBi1sZMEX3_mVlvfskqI5GqwITWzI882tfk0,657
-gym_csle_stopping_game/__version__.py,sha256=8swO86HIyP6FWtTckB79yaNlkfWbV8msnhazFt9EOKQ,23
-gym_csle_stopping_game/constants/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-gym_csle_stopping_game/constants/constants.py,sha256=eIoD9eXifZ73kP-lSlvG-IXCpe4n6D-_aDygx0zOr5U,1030
-gym_csle_stopping_game/dao/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-gym_csle_stopping_game/dao/stopping_game_attacker_mdp_config.py,sha256=e8KwNZpZ_VswM57ZmjaGDeFTFButVCB8WUAHEctAWJk,3511
-gym_csle_stopping_game/dao/stopping_game_config.py,sha256=qWcfGLf7X6ymp8R_SLkXxNgH1AngIsGrhFXsB5ZnBxw,5448
-gym_csle_stopping_game/dao/stopping_game_defender_pomdp_config.py,sha256=niK4cK_YoyFW6Wq-rFK_5hbLsDtlQ-UivcVf5BtGUrA,3770
-gym_csle_stopping_game/dao/stopping_game_state.py,sha256=tqlFJ9sjlNzkgKJPBbLKhi_HFEnuTJfFcZzs-idrf4w,2701
-gym_csle_stopping_game/envs/__init__.py,sha256=SQHaqXI0_2HYsC8i9swXEHDFcXKEYpb8GRP9l_S0Sw8,74
-gym_csle_stopping_game/envs/stopping_game_env.py,sha256=J0fL4z6cNhmXiwSXtaU9wbrAvVBeDuW2bJQ0YWAqGMs,25889
-gym_csle_stopping_game/envs/stopping_game_mdp_attacker_env.py,sha256=wsOZRDgktz5ENvmhQI8DLCLIoN2JhmY4eoLyX0X0zsA,9060
-gym_csle_stopping_game/envs/stopping_game_pomdp_defender_env.py,sha256=IvY4gwg3Mz2hNBTuTToCbPLA8Zp1KRWc_P9H5KXVvug,8636
-gym_csle_stopping_game/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-gym_csle_stopping_game/util/stopping_game_util.py,sha256=RaR-onJtmuTk15B2m4KUYTbXNElKBw2iEkoP6m0n1b0,18912
-gym_csle_stopping_game-0.2.19.dist-info/METADATA,sha256=NJe-ly9n_tOtOTMS0AHj7OwBAsGuVL2Qxu5bAl08wUA,1876
-gym_csle_stopping_game-0.2.19.dist-info/WHEEL,sha256=5sUXSg9e4bi7lTLOHcm6QEYwO5TIF1TNbTSVFVjcJcc,92
-gym_csle_stopping_game-0.2.19.dist-info/top_level.txt,sha256=3DBHkAEI00nq0aXZlJUkXJrLiwkcJCfaFoYcaOzEZUU,23
-gym_csle_stopping_game-0.2.19.dist-info/RECORD,,

{gym_csle_stopping_game-0.2.19.dist-info → gym_csle_stopping_game-0.2.20.dist-info}/WHEEL RENAMED Viewed

File without changes

{gym_csle_stopping_game-0.2.19.dist-info → gym_csle_stopping_game-0.2.20.dist-info}/top_level.txt RENAMED Viewed

File without changes

gym-csle-stopping-game 0.2.19__py3-none-any.whl → 0.2.20__py3-none-any.whl

Potentially problematic release.

gym-csle-stopping-game 0.2.19py3-none-any.whl → 0.2.20py3-none-any.whl