PyPI - gym-csle-stopping-game - Versions diffs - 0.2.18__tar.gz → 0.2.20__tar.gz - Mend

gym-csle-stopping-game 0.2.18tar.gz → 0.2.20tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of gym-csle-stopping-game might be problematic. Click here for more details.

Files changed (28) hide show

{gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: gym_csle_stopping_game
-Version: 0.2.18
+Version: 0.2.20
 Summary: OpenAI gym reinforcement learning environment of a Dynkin (Optimal stopping) game in CSLE
 Author: Kim Hammar
 Author-email: hammar.kim@gmail.com
@@ -15,6 +15,3 @@ Classifier: Programming Language :: Python :: 3.9
 Classifier: Intended Audience :: Science/Research
 Requires-Python: >=3.8
 Provides-Extra: testing
-UNKNOWN

{gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/pyproject.toml RENAMED Viewed

@@ -1,5 +1,5 @@
 [build-system]
-requires = ["setuptools==62.0.0", "gymnasium>=0.27.1"]
+requires = ["setuptools==68.0.0", "gymnasium>=0.27.1"]
 build-backend = "setuptools.build_meta"
 [tool.pytest.ini_options]

{gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/setup.cfg RENAMED Viewed

@@ -20,10 +20,10 @@ classifiers =
 [options]
 install_requires =
 	gymnasium>=0.27.1
-	csle-base>=0.2.18
-	csle-common>=0.2.18
-	csle-attacker>=0.2.18
-	csle-defender>=0.2.18
+	csle-base>=0.2.20
+	csle-common>=0.2.20
+	csle-attacker>=0.2.20
+	csle-defender>=0.2.20
 	csle-collector>=0.2.9
 python_requires = >=3.8
 package_dir =
@@ -40,8 +40,16 @@ testing =
 	pytest-cov>=2.0
 	pytest-mock>=3.6.0
 	pytest-grpc>=0.8.0
-	mypy>=1.3.0
+	mypy>=1.4.1
+	mypy-extensions>=1.0.0
+	mypy-protobuf>=3.5.0
+	types-PyYAML>=6.0.12.11
+	types-paramiko>=3.2.0.0
+	types-protobuf>=4.23.0.3
+	types-requests>=2.31.0.1
+	types-urllib3>=1.26.25.13
 	flake8>=3.9
+	flake8-rst-docstrings>=0.3.0
 	tox>=3.24
 	sphinx>=5.3.0
 	sphinxcontrib-napoleon>=0.7
@@ -54,8 +62,12 @@ gym_csle_stopping_game = py.typed
 [flake8]
 max-line-length = 120
-exclude = .git,__pycache__,docs/source/conf.py,old,build,dist,*_pb2*,*init__*
+exclude = .git,__pycache__,docs/source/conf.py,old,build,dist,*_pb2*,*init__*,.tox
 ignore = E741, W503, W504, F821, W605
+rst-roles = class, func, ref
+rst-directives = envvar, exception
+rst-substitutions = version
+extend-ignore = D401, D400, D100, RST305, RST219, D205, D202, D200, D204, RST206, W293, D403, D402, RST306
 [egg_info]
 tag_build =

gym_csle_stopping_game-0.2.20/src/gym_csle_stopping_game/__version__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = '0.2.20'

{gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/src/gym_csle_stopping_game/dao/stopping_game_attacker_mdp_config.py RENAMED Viewed

@@ -37,7 +37,6 @@ class StoppingGameAttackerMdpConfig(SimulationEnvInputConfig):
         :param d: the dict to convert
         :return: the created instance
         """
-        defender_strategy = None
         try:
             defender_strategy = MultiThresholdStoppingPolicy.from_dict(d["defender_strategy"])
         except Exception:
@@ -54,9 +53,11 @@ class StoppingGameAttackerMdpConfig(SimulationEnvInputConfig):
     def to_dict(self) -> Dict[str, Any]:
         """
+        Converts the object to a dict representation
         :return: a dict representation of the object
         """
-        d = {}
+        d: Dict[str, Any] = {}
         d["stopping_game_config"] = self.stopping_game_config.to_dict()
         d["defender_strategy"] = self.defender_strategy.to_dict()
         d["stopping_game_name"] = self.stopping_game_name

{gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/src/gym_csle_stopping_game/dao/stopping_game_config.py RENAMED Viewed

@@ -1,6 +1,7 @@
 from typing import Dict, Any
 import gymnasium as gym
 import numpy as np
+import numpy.typing as npt
 from csle_common.dao.simulation_config.simulation_env_input_config import SimulationEnvInputConfig
@@ -10,8 +11,10 @@ class StoppingGameConfig(SimulationEnvInputConfig):
     """
     def __init__(self, env_name: str,
-                 T: np.ndarray, O: np.ndarray, Z: np.ndarray, R: np.ndarray, S: np.ndarray, A1: np.ndarray,
-                 A2: np.ndarray, L: int, R_INT: int, R_COST: int, R_SLA: int, R_ST: int, b1: np.ndarray,
+                 T: npt.NDArray[Any], O: npt.NDArray[np.int_], Z: npt.NDArray[Any],
+                 R: npt.NDArray[Any], S: npt.NDArray[np.int_], A1: npt.NDArray[np.int_],
+                 A2: npt.NDArray[np.int_], L: int, R_INT: int, R_COST: int, R_SLA: int, R_ST: int,
+                 b1: npt.NDArray[np.float_],
                  save_dir: str, checkpoint_traces_freq: int, gamma: float = 1) -> None:
         """
         Initializes the DTO
@@ -55,9 +58,11 @@ class StoppingGameConfig(SimulationEnvInputConfig):
     def to_dict(self) -> Dict[str, Any]:
         """
+        Converts the object to a dict representation
         :return: a dict representation of the object
         """
-        d = {}
+        d: Dict[str, Any] = {}
         d["T"] = list(self.T.tolist())
         d["O"] = list(self.O.tolist())
         d["Z"] = list(self.Z.tolist())

{gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/src/gym_csle_stopping_game/dao/stopping_game_defender_pomdp_config.py RENAMED Viewed

@@ -38,7 +38,6 @@ class StoppingGameDefenderPomdpConfig(SimulationEnvInputConfig):
         :param d: the dict to convert
         :return: the created instance
         """
-        attacker_strategy = None
         try:
             attacker_strategy = MultiThresholdStoppingPolicy.from_dict(d["attacker_strategy"])
         except Exception:
@@ -59,9 +58,11 @@ class StoppingGameDefenderPomdpConfig(SimulationEnvInputConfig):
     def to_dict(self) -> Dict[str, Any]:
         """
+        Converts the object to a dict representation
         :return: a dict representation of the object
         """
-        d = {}
+        d: Dict[str, Any] = {}
         d["stopping_game_config"] = self.stopping_game_config.to_dict()
         d["attacker_strategy"] = self.attacker_strategy.to_dict()
         d["stopping_game_name"] = self.stopping_game_name

{gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/src/gym_csle_stopping_game/dao/stopping_game_state.py RENAMED Viewed

@@ -1,5 +1,6 @@
 from typing import Dict, Any
 import numpy as np
+import numpy.typing as npt
 from gym_csle_stopping_game.util.stopping_game_util import StoppingGameUtil
 from csle_base.json_serializable import JSONSerializable
@@ -9,7 +10,7 @@ class StoppingGameState(JSONSerializable):
     Represents the state of the optimal stopping game
     """
-    def __init__(self, b1: np.ndarray, L: int) -> None:
+    def __init__(self, b1: npt.NDArray[np.float_], L: int) -> None:
         """
         Intializes the state
@@ -34,13 +35,13 @@ class StoppingGameState(JSONSerializable):
         self.s = StoppingGameUtil.sample_initial_state(b1=self.b1)
         self.b = self.b1.copy()
-    def attacker_observation(self) -> np.ndarray:
+    def attacker_observation(self) -> npt.NDArray[Any]:
         """
         :return: the attacker's observation
         """
         return np.array([self.l, self.b[1], self.s])
-    def defender_observation(self) -> np.ndarray:
+    def defender_observation(self) -> npt.NDArray[Any]:
         """
         :return: the defender's observation
         """
@@ -69,9 +70,11 @@ class StoppingGameState(JSONSerializable):
     def to_dict(self) -> Dict[str, Any]:
         """
+        Converts the object to a dict representation
         :return: a dict representation of the object
         """
-        d = {}
+        d: Dict[str, Any] = {}
         d["L"] = self.L
         d["b1"] = list(self.b1)
         d["b"] = list(self.b)

{gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/src/gym_csle_stopping_game/envs/stopping_game_env.py RENAMED Viewed

@@ -1,5 +1,6 @@
-from typing import Tuple, Dict, Union, List, Any
+from typing import Tuple, Dict, List, Any
 import numpy as np
+import numpy.typing as npt
 import time
 import math
 import csle_common.constants.constants as constants
@@ -32,6 +33,11 @@ class StoppingGameEnv(BaseEnv):
     """
     def __init__(self, config: StoppingGameConfig):
+        """
+        Initializes the environment
+        :param config: the environment configuration
+        """
         self.config = config
         # Initialize environment state
@@ -54,15 +60,15 @@ class StoppingGameEnv(BaseEnv):
         }
         # Setup traces
-        self.traces = []
+        self.traces: List[SimulationTrace] = []
         self.trace = SimulationTrace(simulation_env=self.config.env_name)
         # Reset
         self.reset()
         super().__init__()
-    def step(self, action_profile: Tuple[int, Tuple[np.ndarray, int]]) \
-            -> Tuple[Tuple[np.ndarray, np.ndarray], Tuple[int, int], bool, bool, dict]:
+    def step(self, action_profile: Tuple[int, Tuple[npt.NDArray[Any], int]]) \
+            -> Tuple[Tuple[npt.NDArray[Any], npt.NDArray[Any]], Tuple[int, int], bool, bool, Dict[str, Any]]:
         """
         Takes a step in the environment by executing the given action
@@ -76,7 +82,7 @@ class StoppingGameEnv(BaseEnv):
         assert pi2.shape[0] == len(self.config.S)
         assert pi2.shape[1] == len(self.config.A1)
         done = False
-        info = {}
+        info: Dict[str, Any] = {}
         # Compute r, s', b',o'
         r = self.config.R[self.state.l - 1][a1][a2][self.state.s]
@@ -129,8 +135,8 @@ class StoppingGameEnv(BaseEnv):
         return (defender_obs, attacker_obs), (r, -r), done, done, info
-    def step_test(self, action_profile: Tuple[int, Tuple[np.ndarray, int]], sample_Z) \
-            -> Tuple[Tuple[np.ndarray, np.ndarray], Tuple[int, int], bool, dict]:
+    def step_test(self, action_profile: Tuple[int, Tuple[npt.NDArray[Any], int]], sample_Z) \
+            -> Tuple[Tuple[npt.NDArray[Any], npt.NDArray[Any]], Tuple[int, int], bool, Dict[str, Any]]:
         """
         Takes a step in the environment by executing the given action
@@ -144,7 +150,7 @@ class StoppingGameEnv(BaseEnv):
         assert pi2.shape[0] == len(self.config.S)
         assert pi2.shape[1] == len(self.config.A1)
         done = False
-        info = {}
+        info: Dict[str, Any] = {}
         # Compute r, s', b',o'
         r = self.config.R[self.state.l - 1][a1][a2][self.state.s]
@@ -196,10 +202,18 @@ class StoppingGameEnv(BaseEnv):
         return (defender_obs, attacker_obs), (r, -r), done, info
-    def step_trace(self, trace: EmulationTrace, a1: int, pi2: np.ndarray) \
-            -> Tuple[Tuple[np.ndarray, np.ndarray], Tuple[int, int], bool, dict]:
+    def step_trace(self, trace: EmulationTrace, a1: int, pi2: npt.NDArray[Any]) \
+            -> Tuple[Tuple[npt.NDArray[Any], npt.NDArray[Any]], Tuple[int, int], bool, Dict[str, Any]]:
+        """
+        Utility function for stepping a given trace
+        :param trace: the trace to step
+        :param a1: the action to step with
+        :param pi2: the policy of the attacker
+        :return: the result of the step
+        """
         done = False
-        info = {}
+        info: Dict[str, Any] = {}
         if (self.state.t - 1) < len(trace.attacker_actions):
             a2_emulation_action = trace.attacker_actions[self.state.t - 1]
             a2 = 0
@@ -259,13 +273,16 @@ class StoppingGameEnv(BaseEnv):
         if not done:
             self.trace.attacker_observations.append(attacker_obs)
             self.trace.defender_observations.append(defender_obs)
-        # Populate info
         info = self._info(info)
         return (defender_obs, attacker_obs), (r, -r), done, info
     def mean(self, prob_vector):
+        """
+        Utility function for getting the mean of a vector
+        :param prob_vector: the vector to take the mean of
+        :return: the mean
+        """
         m = 0
         for i in range(len(prob_vector)):
             m += prob_vector[i] * i
@@ -284,9 +301,10 @@ class StoppingGameEnv(BaseEnv):
         else:
             return 1 - (min(10, (first_stop - (intrusion_start + 1))) / 2) / 10
-    def _info(self, info) -> Dict[str, Union[float, int]]:
+    def _info(self, info: Dict[str, Any]) -> Dict[str, Any]:
         """
         Adds the cumulative reward and episode length to the info dict
         :param info: the info dict to update
         :return: the updated info dict
         """
@@ -346,7 +364,8 @@ class StoppingGameEnv(BaseEnv):
             defender_baseline_stop_on_first_alert_return
         return info
-    def reset(self, seed: int = 0, soft: bool = False) -> Tuple[Tuple[np.ndarray, np.ndarray], Dict[str, Any]]:
+    def reset(self, seed: int = 0, soft: bool = False) \
+            -> Tuple[Tuple[npt.NDArray[Any], npt.NDArray[Any]], Dict[str, Any]]:
         """
         Resets the environment state, this should be called whenever step() returns <done>
@@ -361,7 +380,7 @@ class StoppingGameEnv(BaseEnv):
         defender_obs = self.state.defender_observation()
         self.trace.attacker_observations.append(attacker_obs)
         self.trace.defender_observations.append(defender_obs)
-        info = {}
+        info: Dict[str, Any] = {}
         return (defender_obs, attacker_obs), info
     @staticmethod
@@ -371,6 +390,18 @@ class StoppingGameEnv(BaseEnv):
                              emulation_env_config: EmulationEnvConfig,
                              simulation_env_config: SimulationEnvConfig
                              ) -> List[EmulationSimulationTrace]:
+        """
+        Utility function for evaluating a strategy profile in the emulation environment
+        :param env: the environment to use for evaluation
+        :param n_episodes: the number of evaluation episodes
+        :param intrusion_seq: the intrusion sequence for the evaluation (sequence of attacker actions)
+        :param defender_policy: the defender policy for the evaluation
+        :param attacker_policy: the attacker policy for the evaluation
+        :param emulation_env_config: configuration of the emulation environment for the evaluation
+        :param simulation_env_config: configuration of the simulation environment for the evaluation
+        :return: traces with the evaluation results
+        """
         logger = Logger.__call__().get_logger()
         traces = []
         s = EmulationEnvState(emulation_env_config=emulation_env_config)
@@ -379,7 +410,7 @@ class StoppingGameEnv(BaseEnv):
             done = False
             defender_obs_space = simulation_env_config.joint_observation_space_config.observation_spaces[0]
             b = env.state.b1
-            o = env.reset()
+            o, _ = env.reset()
             (d_obs, a_obs) = o
             t = 0
             s.reset()
@@ -390,7 +421,7 @@ class StoppingGameEnv(BaseEnv):
             while not done:
                 a1 = defender_policy.action(d_obs)
                 a2 = attacker_policy.action(a_obs)
-                o, r, done, info = env.step((a1, a2))
+                o, r, done, info, _ = env.step((a1, a2))
                 (d_obs, a_obs) = o
                 r_1, r_2 = r
                 logger.debug(f"a1:{a1}, a2:{a2}, d_obs:{d_obs}, a_obs:{a_obs}, r:{r}, done:{done}, info: {info}")
@@ -419,12 +450,12 @@ class StoppingGameEnv(BaseEnv):
                              f"{defender_obs_space.observation_id_to_observation_vector_inv}")
                 logger.debug(f"observation_id_to_observation_vector_inv:"
                              f"{o_components_str in defender_obs_space.observation_id_to_observation_vector_inv}")
+                emulation_o = 0
                 if o_components_str in defender_obs_space.observation_id_to_observation_vector_inv:
-                    o = defender_obs_space.observation_id_to_observation_vector_inv[o_components_str]
-                else:
-                    o = 0
-                logger.debug(f"o:{o}")
-                b = StoppingGameUtil.next_belief(o=o, a1=a1, b=b, pi2=a2, config=env.config, l=env.state.l, a2=a2)
+                    emulation_o = defender_obs_space.observation_id_to_observation_vector_inv[o_components_str]
+                logger.debug(f"o:{emulation_o}")
+                b = StoppingGameUtil.next_belief(o=emulation_o, a1=a1, b=b, pi2=a2, config=env.config,
+                                                 l=env.state.l, a2=a2)
                 d_obs[1] = b[1]
                 a_obs[1] = b[1]
                 logger.debug(f"b:{b}")
@@ -435,7 +466,7 @@ class StoppingGameEnv(BaseEnv):
                 simulation_trace.infos.append(info)
                 simulation_trace.states.append(s)
                 simulation_trace.beliefs.append(b[1])
-                simulation_trace.infrastructure_metrics.append(o)
+                simulation_trace.infrastructure_metrics.append(emulation_o)
             em_sim_trace = EmulationSimulationTrace(emulation_trace=emulation_trace, simulation_trace=simulation_trace)
             MetastoreFacade.save_emulation_simulation_trace(em_sim_trace)
@@ -527,10 +558,10 @@ class StoppingGameEnv(BaseEnv):
                 stage_policy = []
                 for s in self.config.S:
                     if s != 2:
-                        dist = [0, 0]
-                        dist[a2] = 1
+                        dist = [0.0, 0.0]
+                        dist[a2] = 1.0
                         stage_policy.append(dist)
                     else:
                         stage_policy.append([0.5, 0.5])
-                stage_policy = np.array(stage_policy)
-                _, _, done, _ = self.step(action_profile=(a1, (stage_policy, a2)))
+                pi2 = np.array(stage_policy)
+                _, _, done, _, _ = self.step(action_profile=(a1, (pi2, a2)))

{gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/src/gym_csle_stopping_game/envs/stopping_game_mdp_attacker_env.py RENAMED Viewed

@@ -1,6 +1,6 @@
 from typing import Tuple, List, Union, Dict, Any
-import gymnasium as gym
 import numpy as np
+import numpy.typing as npt
 import torch
 import math
 from csle_common.dao.simulation_config.base_env import BaseEnv
@@ -9,6 +9,7 @@ from gym_csle_stopping_game.dao.stopping_game_attacker_mdp_config import Stoppin
 from csle_common.dao.simulation_config.simulation_trace import SimulationTrace
 from gym_csle_stopping_game.util.stopping_game_util import StoppingGameUtil
 import gym_csle_stopping_game.constants.constants as env_constants
+from gym_csle_stopping_game.envs.stopping_game_env import StoppingGameEnv
 class StoppingGameMdpAttackerEnv(BaseEnv):
@@ -23,7 +24,7 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
         :param config: the configuration of the environment
         """
         self.config = config
-        self.stopping_game_env = gym.make(self.config.stopping_game_name, config=self.config.stopping_game_config)
+        self.stopping_game_env: StoppingGameEnv = StoppingGameEnv(config=self.config.stopping_game_config)
         # Setup spaces
         self.observation_space = self.config.stopping_game_config.attacker_observation_space()
@@ -33,41 +34,47 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
         self.static_defender_strategy = self.config.defender_strategy
         # Setup Config
-        self.viewer = None
+        self.viewer: Union[None, Any] = None
         self.metadata = {
             'render.modes': ['human', 'rgb_array'],
             'video.frames_per_second': 50  # Video rendering speed
         }
-        self.latest_defender_obs = None
-        self.latest_attacker_obs = None
-        self.model = None
+        self.latest_defender_obs: Union[None, List[Any], npt.NDArray[Any]] = None
+        self.latest_attacker_obs: Union[None, List[Any], npt.NDArray[Any]] = None
+        self.model: Union[None, Any] = None
         # Reset
         self.reset()
         super().__init__()
-    def step(self, pi2: Union[List[List[float]], int, float, np.int64, float, np.float64]) \
-            -> Tuple[np.ndarray, int, bool, bool, dict]:
+    def step(self, pi2: Union[npt.NDArray[Any], int, float, np.int_, np.float_]) \
+            -> Tuple[npt.NDArray[Any], int, bool, bool, Dict[str, Any]]:
         """
         Takes a step in the environment by executing the given action
         :param pi2: attacker stage policy
         :return: (obs, reward, terminated, truncated, info)
         """
-        if type(pi2) is int or type(pi2) is float or type(pi2) is np.int64 or type(pi2) is float \
-                or type(pi2) is np.float64:
+        if type(pi2) is int or type(pi2) is float or type(pi2) is np.int64 or type(pi2) is np.float64:
             a2 = pi2
-            pi2 = self.calculate_stage_policy(o=self.latest_attacker_obs, a2=a2)
+            if self.latest_attacker_obs is None:
+                raise ValueError("Attacker observation is None")
+            pi2 = self.calculate_stage_policy(o=list(self.latest_attacker_obs), a2=int(a2))
         else:
             if self.model is not None:
-                pi2 = self.calculate_stage_policy(o=self.latest_attacker_obs)
+                if self.latest_attacker_obs is None:
+                    raise ValueError("Attacker observation is None")
+                pi2 = self.calculate_stage_policy(o=list(self.latest_attacker_obs))
                 a2 = StoppingGameUtil.sample_attacker_action(pi2=pi2, s=self.stopping_game_env.state.s)
             else:
                 pi2 = np.array(pi2)
-                if (not pi2.shape[0] == len(self.config.stopping_game_config.S)
-                        or pi2.shape[1] != len(self.config.stopping_game_config.A1)) and self.model is not None:
-                    pi2 = self.calculate_stage_policy(o=self.latest_attacker_obs)
+                try:
+                    if self.latest_attacker_obs is None:
+                        raise ValueError("Attacker observation is None")
+                    pi2 = self.calculate_stage_policy(o=list(self.latest_attacker_obs))
+                except Exception:
+                    pass
                 a2 = StoppingGameUtil.sample_attacker_action(pi2=pi2, s=self.stopping_game_env.state.s)
         # a2 = pi2
@@ -83,7 +90,7 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
         a1 = self.static_defender_strategy.action(o=self.latest_defender_obs)
         # Step the game
-        o, r, d, _, info = self.stopping_game_env.step((a1, (pi2, a2)))
+        o, r, d, _, info = self.stopping_game_env.step((int(a1), (pi2, int(a2))))
         self.latest_defender_obs = o[0]
         self.latest_attacker_obs = o[1]
         attacker_obs = o[1]
@@ -94,7 +101,7 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
         return attacker_obs, r[1], d, d, info
-    def reset(self, seed: int = 0, soft: bool = False) -> Tuple[np.ndarray, Dict[str, Any]]:
+    def reset(self, seed: int = 0, soft: bool = False) -> Tuple[npt.NDArray[Any], Dict[str, Any]]:
         """
         Resets the environment state, this should be called whenever step() returns <done>
@@ -104,7 +111,7 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
         self.latest_defender_obs = o[0]
         self.latest_attacker_obs = o[1]
         attacker_obs = o[1]
-        info = {}
+        info: Dict[str, Any] = {}
         return attacker_obs, info
     def set_model(self, model) -> None:
@@ -116,7 +123,7 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
         """
         self.model = model
-    def calculate_stage_policy(self, o: List, a2: int = 0) -> np.ndarray:
+    def calculate_stage_policy(self, o: List[Any], a2: int = 0) -> npt.NDArray[Any]:
         """
         Calculates the stage policy of a given model and observation
@@ -127,15 +134,14 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
             stage_policy = []
             for s in self.config.stopping_game_config.S:
                 if s != 2:
-                    dist = [0, 0]
-                    dist[a2] = 1
+                    dist = [0.0, 0.0]
+                    dist[a2] = 1.0
                     stage_policy.append(dist)
                 else:
                     stage_policy.append([0.5, 0.5])
             return np.array(stage_policy)
         if isinstance(self.model, MixedMultiThresholdStoppingPolicy):
-            stage_policy = np.array(self.model.stage_policy(o=o))
-            return stage_policy
+            return np.array(self.model.stage_policy(o=o))
         else:
             b1 = o[1]
             l = int(o[0])
@@ -146,18 +152,19 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
                     stage_policy.append(self._get_attacker_dist(obs=o))
                 else:
                     stage_policy.append([0.5, 0.5])
-            stage_policy = np.array(stage_policy)
-            return stage_policy
+            return np.array(stage_policy)
-    def _get_attacker_dist(self, obs: List) -> List:
+    def _get_attacker_dist(self, obs: List[Any]) -> List[float]:
         """
         Utility function for getting the attacker's action distribution based on a given observation
         :param obs: the given observation
         :return:  the action distribution
         """
-        obs = np.array([obs])
-        actions, values, log_prob = self.model.policy.forward(obs=torch.tensor(obs).to(self.model.device))
+        np_obs = np.array([obs])
+        if self.model is None:
+            raise ValueError("Model is None")
+        actions, values, log_prob = self.model.policy.forward(obs=torch.tensor(np_obs).to(self.model.device))
         action = actions[0]
         if action == 1:
             stop_prob = math.exp(log_prob)
@@ -211,7 +218,7 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
         Closes the viewer (cleanup)
         :return: None
         """
-        if self.viewer:
+        if self.viewer is not None:
             self.viewer.close()
             self.viewer = None
@@ -244,4 +251,4 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
                 self.reset()
             else:
                 action_idx = int(raw_input)
-                _, _, done, _ = self.step(pi2=action_idx)
+                _, _, done, _, _ = self.step(pi2=action_idx)

{gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/src/gym_csle_stopping_game/envs/stopping_game_pomdp_defender_env.py RENAMED Viewed

@@ -1,6 +1,6 @@
-from typing import Tuple, List, Dict, Any
-import gymnasium as gym
+from typing import Tuple, List, Dict, Any, Union
 import numpy as np
+import numpy.typing as npt
 from csle_common.dao.simulation_config.base_env import BaseEnv
 from gym_csle_stopping_game.dao.stopping_game_defender_pomdp_config import StoppingGameDefenderPomdpConfig
 from csle_common.dao.simulation_config.simulation_trace import SimulationTrace
@@ -27,7 +27,7 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
         :param attacker_strategy: the strategy of the static attacker
         """
         self.config = config
-        self.stopping_game_env = gym.make(self.config.stopping_game_name, config=self.config.stopping_game_config)
+        self.stopping_game_env = StoppingGameEnv(config=self.config.stopping_game_config)
         # Setup spaces
         self.observation_space = self.config.stopping_game_config.defender_observation_space()
@@ -37,18 +37,18 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
         self.static_attacker_strategy = self.config.attacker_strategy
         # Setup Config
-        self.viewer = None
+        self.viewer: Union[None, Any] = None
         self.metadata = {
             'render.modes': ['human', 'rgb_array'],
             'video.frames_per_second': 50  # Video rendering speed
         }
-        self.latest_attacker_obs = None
+        self.latest_attacker_obs: Union[None, npt.NDArray[Any]] = None
         # Reset
         self.reset()
         super().__init__()
-    def step(self, a1: int) -> Tuple[np.ndarray, int, bool, bool, dict]:
+    def step(self, a1: int) -> Tuple[npt.NDArray[Any], int, bool, bool, Dict[str, Any]]:
         """
         Takes a step in the environment by executing the given action
@@ -66,7 +66,7 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
         return defender_obs, r[0], d, d, info
-    def step_test(self, a1: int, sample_Z) -> Tuple[np.ndarray, int, bool, dict]:
+    def step_test(self, a1: int, sample_Z) -> Tuple[npt.NDArray[Any], int, bool, Dict[str, Any]]:
         """
         Takes a step in the environment by executing the given action
@@ -84,7 +84,7 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
         return defender_obs, r[0], d, info
-    def reset(self, seed: int = 0, soft: bool = False) -> Tuple[np.ndarray, Dict[str, Any]]:
+    def reset(self, seed: int = 0, soft: bool = False) -> Tuple[npt.NDArray[Any], Dict[str, Any]]:
         """
         Resets the environment state, this should be called whenever step() returns <done>
@@ -93,7 +93,7 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
         o, _ = self.stopping_game_env.reset()
         self.latest_attacker_obs = o[1]
         defender_obs = o[0]
-        dict = {}
+        dict: Dict[str, Any] = {}
         return defender_obs, dict
     def render(self, mode: str = 'human'):
@@ -105,7 +105,14 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
         """
         raise NotImplementedError("Rendering is not implemented for this environment")
-    def step_trace(self, trace: EmulationTrace, a1: int) -> Tuple[np.ndarray, int, bool, dict]:
+    def step_trace(self, trace: EmulationTrace, a1: int) -> Tuple[npt.NDArray[Any], int, bool, Dict[str, Any]]:
+        """
+        Utility method for stopping a pre-recorded trace
+        :param trace: the trace to step
+        :param a1: the action to step with
+        :return: the result of the step according to the trace
+        """
         pi2 = np.array(self.static_attacker_strategy.stage_policy(self.latest_attacker_obs))
         o, r, d, info = self.stopping_game_env.step_trace(trace=trace, a1=a1, pi2=pi2)
         self.latest_attacker_obs = o[1]
@@ -118,6 +125,17 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
                              defender_policy: Policy,
                              emulation_env_config: EmulationEnvConfig, simulation_env_config: SimulationEnvConfig) \
             -> List[EmulationSimulationTrace]:
+        """
+        Utility function for evaluating policies in the emulation environment
+        :param env: the environment to use for evaluation
+        :param n_episodes: the number of episodes to use for evaluation
+        :param intrusion_seq: the sequence of intrusion actions to use for evaluation
+        :param defender_policy: the defender policy to use for evaluation
+        :param emulation_env_config: the configuration of the emulation environment to use for evaluation
+        :param simulation_env_config: the configuration of the simulation environment to use for evaluation
+        :return: traces with the evaluation results
+        """
         return StoppingGameEnv.emulation_evaluation(
             env=env.stopping_game_env, n_episodes=n_episodes, intrusion_seq=intrusion_seq,
             defender_policy=defender_policy, attacker_policy=env.static_attacker_strategy,
@@ -160,7 +178,7 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
         Closes the viewer (cleanup)
         :return: None
         """
-        if self.viewer:
+        if self.viewer is not None:
             self.viewer.close()
             self.viewer = None
@@ -193,4 +211,4 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
                 self.reset()
             else:
                 action_idx = int(raw_input)
-                _, _, done, _ = self.step(pi2=action_idx)
+                _, _, done, _, _ = self.step(a1=action_idx)

{gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/src/gym_csle_stopping_game/util/stopping_game_util.py RENAMED Viewed

@@ -1,5 +1,6 @@
-from typing import List, Dict, Tuple
+from typing import List, Dict, Tuple, Any
 import numpy as np
+import numpy.typing as npt
 from scipy.stats import betabinom
 from csle_common.dao.system_identification.emulation_statistics import EmulationStatistics
 from csle_common.dao.simulation_config.observation_space_config import ObservationSpaceConfig
@@ -14,26 +15,37 @@ class StoppingGameUtil:
     """
     @staticmethod
-    def b1() -> np.ndarray:
+    def b1() -> npt.NDArray[np.int_]:
         """
+        Gets the initial belief
         :return: the initial belief
         """
         return np.array([1, 0, 0])
     @staticmethod
     def state_space():
+        """
+        Gets the state space
+        :return: the state space of the game
+        """
         return np.array([0, 1, 2])
     @staticmethod
-    def defender_actions() -> np.ndarray:
+    def defender_actions() -> npt.NDArray[np.int_]:
         """
+        Gets the action space of the defender
         :return: the action space of the defender
         """
         return np.array([0, 1])
     @staticmethod
-    def attacker_actions() -> np.ndarray:
+    def attacker_actions() -> npt.NDArray[np.int_]:
         """
+        Gets the action space of the attacker
         :return: the action space of the attacker
         """
         return np.array([0, 1])
@@ -44,13 +56,15 @@ class StoppingGameUtil:
         Returns the observation space of size n
         :param n: the maximum observation
-        :return: O
+        :return: the observation space
         """
         return np.array(list(range(n + 1)))
     @staticmethod
-    def reward_tensor(R_SLA: int, R_INT: int, R_COST: int, L: int, R_ST: int) -> np.ndarray:
+    def reward_tensor(R_SLA: int, R_INT: int, R_COST: int, L: int, R_ST: int) -> npt.NDArray[Any]:
         """
+        Gets the reward tensor
         :param R_SLA: the R_SLA constant
         :param R_INT: the R_INT constant
         :param R_COST: the R_COST constant
@@ -76,12 +90,13 @@ class StoppingGameUtil:
                 ]
             ]
             R_l.append(R)
-        R = np.array(R_l)
-        return R
+        return np.array(R_l)
     @staticmethod
-    def transition_tensor(L: int, p: float) -> np.ndarray:
+    def transition_tensor(L: int, p: float) -> npt.NDArray[Any]:
         """
+        Gets the transition tensor
         :param L: the maximum number of stop actions
         :return: a |L|x|A1|x|A2||S|^2 tensor
         """
@@ -156,15 +171,14 @@ class StoppingGameUtil:
                     ]
                 ]
             T_l.append(T)
-        T = np.array(T_l)
-        return T
+        return np.array(T_l)
     @staticmethod
     def observation_tensor_from_emulation_statistics(emulation_statistic: EmulationStatistics,
                                                      observation_space_defender: ObservationSpaceConfig,
                                                      joint_action_space: JointActionSpaceConfig,
                                                      state_space: StateSpaceConfig) \
-            -> Tuple[np.ndarray, Dict[str, List]]:
+            -> Tuple[npt.NDArray[Any], Dict[str, List[Any]]]:
         """
         Returns an observation tensor based on measured emulation statistics
@@ -174,9 +188,9 @@ class StoppingGameUtil:
         :param state_space: the state space
         :return: a |A1|x|A2|x|S|x|O| tensor
         """
-        intrusion_severe_alerts_probabilities = []
-        intrusion_warning_alerts_probabilities = []
-        intrusion_login_attempts_probabilities = []
+        intrusion_severe_alerts_probabilities: List[float] = []
+        intrusion_warning_alerts_probabilities: List[float] = []
+        intrusion_login_attempts_probabilities: List[float] = []
         norm = sum(emulation_statistic.conditionals_counts["intrusion"]["severe_alerts"].values())
         for severe_alert_obs in observation_space_defender.component_observations["severe_alerts"]:
             count = emulation_statistic.conditionals_counts["intrusion"]["severe_alerts"][severe_alert_obs.id]
@@ -214,14 +228,14 @@ class StoppingGameUtil:
             login_attempts_a1_a2_s_o_dist = []
             for a2 in range(len(joint_action_space.action_spaces[1].actions)):
                 a2_s_o_dist = []
-                severe_alerts_a2_s_o_dist = []
-                warning_alerts_a2_s_o_dist = []
-                login_attempts_a2_s_o_dist = []
+                severe_alerts_a2_s_o_dist: List[List[float]] = []
+                warning_alerts_a2_s_o_dist: List[List[float]] = []
+                login_attempts_a2_s_o_dist: List[List[float]] = []
                 for s in range(len(state_space.states)):
                     s_o_dist = []
-                    severe_alerts_s_o_dist = []
-                    warning_alerts_s_o_dist = []
-                    login_attempts_s_o_dist = []
+                    severe_alerts_s_o_dist: List[float] = []
+                    warning_alerts_s_o_dist: List[float] = []
+                    login_attempts_s_o_dist: List[float] = []
                     for o in range(len(observation_space_defender.observations)):
                         obs_vector = observation_space_defender.observation_id_to_observation_id_vector[o]
                         if s == 0:
@@ -241,8 +255,8 @@ class StoppingGameUtil:
                         s_o_dist.append(p)
                     a2_s_o_dist.append(s_o_dist)
                     severe_alerts_a2_s_o_dist.append(severe_alerts_s_o_dist)
-                    warning_alerts_a2_s_o_dist.append(warning_alerts_a2_s_o_dist)
-                    login_attempts_a2_s_o_dist.append(login_attempts_a2_s_o_dist)
+                    warning_alerts_a2_s_o_dist.append(warning_alerts_s_o_dist)
+                    login_attempts_a2_s_o_dist.append(login_attempts_s_o_dist)
                 a1_a2_s_o_dist.append(a2_s_o_dist)
                 severe_alerts_a1_a2_s_o_dist.append(severe_alerts_a2_s_o_dist)
                 warning_alerts_a1_a2_s_o_dist.append(warning_alerts_a2_s_o_dist)
@@ -301,7 +315,7 @@ class StoppingGameUtil:
         return Z
     @staticmethod
-    def sample_next_state(T: np.ndarray, l: int, s: int, a1: int, a2: int, S: np.ndarray) -> int:
+    def sample_next_state(T: npt.NDArray[Any], l: int, s: int, a1: int, a2: int, S: npt.NDArray[np.int_]) -> int:
         """
         Samples the next state
@@ -316,22 +330,20 @@ class StoppingGameUtil:
         state_probs = []
         for s_prime in S:
             state_probs.append(T[l - 1][a1][a2][s][s_prime])
-        s_prime = np.random.choice(np.arange(0, len(S)), p=state_probs)
-        return s_prime
+        return int(np.random.choice(np.arange(0, len(S)), p=state_probs))
     @staticmethod
-    def sample_initial_state(b1: np.ndarray) -> int:
+    def sample_initial_state(b1: npt.NDArray[np.float_]) -> int:
         """
         Samples the initial state
         :param b1: the initial belief
         :return: s1
         """
-        s1 = np.random.choice(np.arange(0, len(b1)), p=b1)
-        return s1
+        return int(np.random.choice(np.arange(0, len(b1)), p=b1))
     @staticmethod
-    def sample_next_observation(Z: np.ndarray, s_prime: int, O: np.ndarray) -> int:
+    def sample_next_observation(Z: npt.NDArray[Any], s_prime: int, O: npt.NDArray[np.int_]) -> int:
         """
         Samples the next observation
@@ -351,7 +363,7 @@ class StoppingGameUtil:
         return int(o)
     @staticmethod
-    def bayes_filter(s_prime: int, o: int, a1: int, b: np.ndarray, pi2: np.ndarray, l: int,
+    def bayes_filter(s_prime: int, o: int, a1: int, b: npt.NDArray[np.float_], pi2: npt.NDArray[Any], l: int,
                      config: StoppingGameConfig) -> float:
         """
         A Bayesian filter to compute the belief of player 1
@@ -389,7 +401,7 @@ class StoppingGameUtil:
         return b_prime_s_prime
     @staticmethod
-    def p_o_given_b_a1_a2(o: int, b: List, a1: int, a2: int, config: StoppingGameConfig) -> float:
+    def p_o_given_b_a1_a2(o: int, b: List[float], a1: int, a2: int, config: StoppingGameConfig) -> float:
         """
         Computes P[o|a,b]
@@ -408,8 +420,8 @@ class StoppingGameUtil:
         return prob
     @staticmethod
-    def next_belief(o: int, a1: int, b: np.ndarray, pi2: np.ndarray, config: StoppingGameConfig, l: int,
-                    a2: int = 0, s: int = 0) -> np.ndarray:
+    def next_belief(o: int, a1: int, b: npt.NDArray[np.float_], pi2: npt.NDArray[Any],
+                    config: StoppingGameConfig, l: int, a2: int = 0, s: int = 0) -> npt.NDArray[np.float_]:
         """
         Computes the next belief using a Bayesian filter
@@ -434,7 +446,7 @@ class StoppingGameUtil:
         return b_prime
     @staticmethod
-    def sample_attacker_action(pi2: np.ndarray, s: int) -> int:
+    def sample_attacker_action(pi2: npt.NDArray[Any], s: int) -> int:
         """
         Samples the attacker action
@@ -442,5 +454,4 @@ class StoppingGameUtil:
         :param s: the game state
         :return: a2 (the attacker action
         """
-        a2 = np.random.choice(np.arange(0, len(pi2[s])), p=pi2[s])
-        return a2
+        return int(np.random.choice(np.arange(0, len(pi2[s])), p=pi2[s]))

{gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/src/gym_csle_stopping_game.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: gym-csle-stopping-game
-Version: 0.2.18
+Version: 0.2.20
 Summary: OpenAI gym reinforcement learning environment of a Dynkin (Optimal stopping) game in CSLE
 Author: Kim Hammar
 Author-email: hammar.kim@gmail.com
@@ -15,6 +15,3 @@ Classifier: Programming Language :: Python :: 3.9
 Classifier: Intended Audience :: Science/Research
 Requires-Python: >=3.8
 Provides-Extra: testing
-UNKNOWN

{gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/src/gym_csle_stopping_game.egg-info/SOURCES.txt RENAMED Viewed

@@ -21,4 +21,5 @@ src/gym_csle_stopping_game/envs/stopping_game_env.py
 src/gym_csle_stopping_game/envs/stopping_game_mdp_attacker_env.py
 src/gym_csle_stopping_game/envs/stopping_game_pomdp_defender_env.py
 src/gym_csle_stopping_game/util/__init__.py
-src/gym_csle_stopping_game/util/stopping_game_util.py
+src/gym_csle_stopping_game/util/stopping_game_util.py
+tests/test_stopping_game_util.py

gym_csle_stopping_game-0.2.20/src/gym_csle_stopping_game.egg-info/requires.txt ADDED Viewed

@@ -0,0 +1,28 @@
+gymnasium>=0.27.1
+csle-base>=0.2.20
+csle-common>=0.2.20
+csle-attacker>=0.2.20
+csle-defender>=0.2.20
+csle-collector>=0.2.9
+[testing]
+pytest>=6.0
+pytest-cov>=2.0
+pytest-mock>=3.6.0
+pytest-grpc>=0.8.0
+mypy>=1.4.1
+mypy-extensions>=1.0.0
+mypy-protobuf>=3.5.0
+types-PyYAML>=6.0.12.11
+types-paramiko>=3.2.0.0
+types-protobuf>=4.23.0.3
+types-requests>=2.31.0.1
+types-urllib3>=1.26.25.13
+flake8>=3.9
+flake8-rst-docstrings>=0.3.0
+tox>=3.24
+sphinx>=5.3.0
+sphinxcontrib-napoleon>=0.7
+sphinx-rtd-theme>=1.1.1
+twine>=4.0.2
+build>=0.10.0

gym_csle_stopping_game-0.2.20/tests/test_stopping_game_util.py ADDED Viewed

@@ -0,0 +1,19 @@
+import logging
+import pytest
+from gym_csle_stopping_game.util.stopping_game_util import StoppingGameUtil
+class TestStoppingGameUtilSuite(object):
+    """
+    Test suite for stopping_game_util.py
+    """
+    pytest.logger = logging.getLogger("stopping_game_util_tests")
+    def test_b1(self) -> None:
+        """
+        Tests the b1 function
+        :return: None
+        """
+        assert sum(StoppingGameUtil.b1()) == 1

gym_csle_stopping_game-0.2.18/src/gym_csle_stopping_game/__version__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- __version__ = '0.2.18'

gym_csle_stopping_game-0.2.18/src/gym_csle_stopping_game.egg-info/requires.txt DELETED Viewed

@@ -1,20 +0,0 @@
-gymnasium>=0.27.1
-csle-base>=0.2.18
-csle-common>=0.2.18
-csle-attacker>=0.2.18
-csle-defender>=0.2.18
-csle-collector>=0.2.9
-[testing]
-pytest>=6.0
-pytest-cov>=2.0
-pytest-mock>=3.6.0
-pytest-grpc>=0.8.0
-mypy>=1.3.0
-flake8>=3.9
-tox>=3.24
-sphinx>=5.3.0
-sphinxcontrib-napoleon>=0.7
-sphinx-rtd-theme>=1.1.1
-twine>=4.0.2
-build>=0.10.0