gym-csle-stopping-game 0.2.19__py3-none-any.whl → 0.2.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gym-csle-stopping-game might be problematic. Click here for more details.

@@ -1 +1 @@
1
- __version__ = '0.2.19'
1
+ __version__ = '0.2.20'
@@ -37,7 +37,6 @@ class StoppingGameAttackerMdpConfig(SimulationEnvInputConfig):
37
37
  :param d: the dict to convert
38
38
  :return: the created instance
39
39
  """
40
- defender_strategy = None
41
40
  try:
42
41
  defender_strategy = MultiThresholdStoppingPolicy.from_dict(d["defender_strategy"])
43
42
  except Exception:
@@ -58,7 +57,7 @@ class StoppingGameAttackerMdpConfig(SimulationEnvInputConfig):
58
57
 
59
58
  :return: a dict representation of the object
60
59
  """
61
- d = {}
60
+ d: Dict[str, Any] = {}
62
61
  d["stopping_game_config"] = self.stopping_game_config.to_dict()
63
62
  d["defender_strategy"] = self.defender_strategy.to_dict()
64
63
  d["stopping_game_name"] = self.stopping_game_name
@@ -1,6 +1,7 @@
1
1
  from typing import Dict, Any
2
2
  import gymnasium as gym
3
3
  import numpy as np
4
+ import numpy.typing as npt
4
5
  from csle_common.dao.simulation_config.simulation_env_input_config import SimulationEnvInputConfig
5
6
 
6
7
 
@@ -10,8 +11,10 @@ class StoppingGameConfig(SimulationEnvInputConfig):
10
11
  """
11
12
 
12
13
  def __init__(self, env_name: str,
13
- T: np.ndarray, O: np.ndarray, Z: np.ndarray, R: np.ndarray, S: np.ndarray, A1: np.ndarray,
14
- A2: np.ndarray, L: int, R_INT: int, R_COST: int, R_SLA: int, R_ST: int, b1: np.ndarray,
14
+ T: npt.NDArray[Any], O: npt.NDArray[np.int_], Z: npt.NDArray[Any],
15
+ R: npt.NDArray[Any], S: npt.NDArray[np.int_], A1: npt.NDArray[np.int_],
16
+ A2: npt.NDArray[np.int_], L: int, R_INT: int, R_COST: int, R_SLA: int, R_ST: int,
17
+ b1: npt.NDArray[np.float_],
15
18
  save_dir: str, checkpoint_traces_freq: int, gamma: float = 1) -> None:
16
19
  """
17
20
  Initializes the DTO
@@ -59,7 +62,7 @@ class StoppingGameConfig(SimulationEnvInputConfig):
59
62
 
60
63
  :return: a dict representation of the object
61
64
  """
62
- d = {}
65
+ d: Dict[str, Any] = {}
63
66
  d["T"] = list(self.T.tolist())
64
67
  d["O"] = list(self.O.tolist())
65
68
  d["Z"] = list(self.Z.tolist())
@@ -38,7 +38,6 @@ class StoppingGameDefenderPomdpConfig(SimulationEnvInputConfig):
38
38
  :param d: the dict to convert
39
39
  :return: the created instance
40
40
  """
41
- attacker_strategy = None
42
41
  try:
43
42
  attacker_strategy = MultiThresholdStoppingPolicy.from_dict(d["attacker_strategy"])
44
43
  except Exception:
@@ -63,7 +62,7 @@ class StoppingGameDefenderPomdpConfig(SimulationEnvInputConfig):
63
62
 
64
63
  :return: a dict representation of the object
65
64
  """
66
- d = {}
65
+ d: Dict[str, Any] = {}
67
66
  d["stopping_game_config"] = self.stopping_game_config.to_dict()
68
67
  d["attacker_strategy"] = self.attacker_strategy.to_dict()
69
68
  d["stopping_game_name"] = self.stopping_game_name
@@ -1,5 +1,6 @@
1
1
  from typing import Dict, Any
2
2
  import numpy as np
3
+ import numpy.typing as npt
3
4
  from gym_csle_stopping_game.util.stopping_game_util import StoppingGameUtil
4
5
  from csle_base.json_serializable import JSONSerializable
5
6
 
@@ -9,7 +10,7 @@ class StoppingGameState(JSONSerializable):
9
10
  Represents the state of the optimal stopping game
10
11
  """
11
12
 
12
- def __init__(self, b1: np.ndarray, L: int) -> None:
13
+ def __init__(self, b1: npt.NDArray[np.float_], L: int) -> None:
13
14
  """
14
15
  Intializes the state
15
16
 
@@ -34,13 +35,13 @@ class StoppingGameState(JSONSerializable):
34
35
  self.s = StoppingGameUtil.sample_initial_state(b1=self.b1)
35
36
  self.b = self.b1.copy()
36
37
 
37
- def attacker_observation(self) -> np.ndarray:
38
+ def attacker_observation(self) -> npt.NDArray[Any]:
38
39
  """
39
40
  :return: the attacker's observation
40
41
  """
41
42
  return np.array([self.l, self.b[1], self.s])
42
43
 
43
- def defender_observation(self) -> np.ndarray:
44
+ def defender_observation(self) -> npt.NDArray[Any]:
44
45
  """
45
46
  :return: the defender's observation
46
47
  """
@@ -73,7 +74,7 @@ class StoppingGameState(JSONSerializable):
73
74
 
74
75
  :return: a dict representation of the object
75
76
  """
76
- d = {}
77
+ d: Dict[str, Any] = {}
77
78
  d["L"] = self.L
78
79
  d["b1"] = list(self.b1)
79
80
  d["b"] = list(self.b)
@@ -1,5 +1,6 @@
1
- from typing import Tuple, Dict, Union, List, Any
1
+ from typing import Tuple, Dict, List, Any
2
2
  import numpy as np
3
+ import numpy.typing as npt
3
4
  import time
4
5
  import math
5
6
  import csle_common.constants.constants as constants
@@ -59,15 +60,15 @@ class StoppingGameEnv(BaseEnv):
59
60
  }
60
61
 
61
62
  # Setup traces
62
- self.traces = []
63
+ self.traces: List[SimulationTrace] = []
63
64
  self.trace = SimulationTrace(simulation_env=self.config.env_name)
64
65
 
65
66
  # Reset
66
67
  self.reset()
67
68
  super().__init__()
68
69
 
69
- def step(self, action_profile: Tuple[int, Tuple[np.ndarray, int]]) \
70
- -> Tuple[Tuple[np.ndarray, np.ndarray], Tuple[int, int], bool, bool, dict]:
70
+ def step(self, action_profile: Tuple[int, Tuple[npt.NDArray[Any], int]]) \
71
+ -> Tuple[Tuple[npt.NDArray[Any], npt.NDArray[Any]], Tuple[int, int], bool, bool, Dict[str, Any]]:
71
72
  """
72
73
  Takes a step in the environment by executing the given action
73
74
 
@@ -81,7 +82,7 @@ class StoppingGameEnv(BaseEnv):
81
82
  assert pi2.shape[0] == len(self.config.S)
82
83
  assert pi2.shape[1] == len(self.config.A1)
83
84
  done = False
84
- info = {}
85
+ info: Dict[str, Any] = {}
85
86
 
86
87
  # Compute r, s', b',o'
87
88
  r = self.config.R[self.state.l - 1][a1][a2][self.state.s]
@@ -134,8 +135,8 @@ class StoppingGameEnv(BaseEnv):
134
135
 
135
136
  return (defender_obs, attacker_obs), (r, -r), done, done, info
136
137
 
137
- def step_test(self, action_profile: Tuple[int, Tuple[np.ndarray, int]], sample_Z) \
138
- -> Tuple[Tuple[np.ndarray, np.ndarray], Tuple[int, int], bool, dict]:
138
+ def step_test(self, action_profile: Tuple[int, Tuple[npt.NDArray[Any], int]], sample_Z) \
139
+ -> Tuple[Tuple[npt.NDArray[Any], npt.NDArray[Any]], Tuple[int, int], bool, Dict[str, Any]]:
139
140
  """
140
141
  Takes a step in the environment by executing the given action
141
142
 
@@ -149,7 +150,7 @@ class StoppingGameEnv(BaseEnv):
149
150
  assert pi2.shape[0] == len(self.config.S)
150
151
  assert pi2.shape[1] == len(self.config.A1)
151
152
  done = False
152
- info = {}
153
+ info: Dict[str, Any] = {}
153
154
 
154
155
  # Compute r, s', b',o'
155
156
  r = self.config.R[self.state.l - 1][a1][a2][self.state.s]
@@ -201,8 +202,8 @@ class StoppingGameEnv(BaseEnv):
201
202
 
202
203
  return (defender_obs, attacker_obs), (r, -r), done, info
203
204
 
204
- def step_trace(self, trace: EmulationTrace, a1: int, pi2: np.ndarray) \
205
- -> Tuple[Tuple[np.ndarray, np.ndarray], Tuple[int, int], bool, dict]:
205
+ def step_trace(self, trace: EmulationTrace, a1: int, pi2: npt.NDArray[Any]) \
206
+ -> Tuple[Tuple[npt.NDArray[Any], npt.NDArray[Any]], Tuple[int, int], bool, Dict[str, Any]]:
206
207
  """
207
208
  Utility function for stepping a given trace
208
209
 
@@ -212,7 +213,7 @@ class StoppingGameEnv(BaseEnv):
212
213
  :return: the result of the step
213
214
  """
214
215
  done = False
215
- info = {}
216
+ info: Dict[str, Any] = {}
216
217
  if (self.state.t - 1) < len(trace.attacker_actions):
217
218
  a2_emulation_action = trace.attacker_actions[self.state.t - 1]
218
219
  a2 = 0
@@ -300,7 +301,7 @@ class StoppingGameEnv(BaseEnv):
300
301
  else:
301
302
  return 1 - (min(10, (first_stop - (intrusion_start + 1))) / 2) / 10
302
303
 
303
- def _info(self, info) -> Dict[str, Union[float, int]]:
304
+ def _info(self, info: Dict[str, Any]) -> Dict[str, Any]:
304
305
  """
305
306
  Adds the cumulative reward and episode length to the info dict
306
307
 
@@ -363,7 +364,8 @@ class StoppingGameEnv(BaseEnv):
363
364
  defender_baseline_stop_on_first_alert_return
364
365
  return info
365
366
 
366
- def reset(self, seed: int = 0, soft: bool = False) -> Tuple[Tuple[np.ndarray, np.ndarray], Dict[str, Any]]:
367
+ def reset(self, seed: int = 0, soft: bool = False) \
368
+ -> Tuple[Tuple[npt.NDArray[Any], npt.NDArray[Any]], Dict[str, Any]]:
367
369
  """
368
370
  Resets the environment state, this should be called whenever step() returns <done>
369
371
 
@@ -378,7 +380,7 @@ class StoppingGameEnv(BaseEnv):
378
380
  defender_obs = self.state.defender_observation()
379
381
  self.trace.attacker_observations.append(attacker_obs)
380
382
  self.trace.defender_observations.append(defender_obs)
381
- info = {}
383
+ info: Dict[str, Any] = {}
382
384
  return (defender_obs, attacker_obs), info
383
385
 
384
386
  @staticmethod
@@ -408,7 +410,7 @@ class StoppingGameEnv(BaseEnv):
408
410
  done = False
409
411
  defender_obs_space = simulation_env_config.joint_observation_space_config.observation_spaces[0]
410
412
  b = env.state.b1
411
- o = env.reset()
413
+ o, _ = env.reset()
412
414
  (d_obs, a_obs) = o
413
415
  t = 0
414
416
  s.reset()
@@ -419,7 +421,7 @@ class StoppingGameEnv(BaseEnv):
419
421
  while not done:
420
422
  a1 = defender_policy.action(d_obs)
421
423
  a2 = attacker_policy.action(a_obs)
422
- o, r, done, info = env.step((a1, a2))
424
+ o, r, done, info, _ = env.step((a1, a2))
423
425
  (d_obs, a_obs) = o
424
426
  r_1, r_2 = r
425
427
  logger.debug(f"a1:{a1}, a2:{a2}, d_obs:{d_obs}, a_obs:{a_obs}, r:{r}, done:{done}, info: {info}")
@@ -448,12 +450,12 @@ class StoppingGameEnv(BaseEnv):
448
450
  f"{defender_obs_space.observation_id_to_observation_vector_inv}")
449
451
  logger.debug(f"observation_id_to_observation_vector_inv:"
450
452
  f"{o_components_str in defender_obs_space.observation_id_to_observation_vector_inv}")
453
+ emulation_o = 0
451
454
  if o_components_str in defender_obs_space.observation_id_to_observation_vector_inv:
452
- o = defender_obs_space.observation_id_to_observation_vector_inv[o_components_str]
453
- else:
454
- o = 0
455
- logger.debug(f"o:{o}")
456
- b = StoppingGameUtil.next_belief(o=o, a1=a1, b=b, pi2=a2, config=env.config, l=env.state.l, a2=a2)
455
+ emulation_o = defender_obs_space.observation_id_to_observation_vector_inv[o_components_str]
456
+ logger.debug(f"o:{emulation_o}")
457
+ b = StoppingGameUtil.next_belief(o=emulation_o, a1=a1, b=b, pi2=a2, config=env.config,
458
+ l=env.state.l, a2=a2)
457
459
  d_obs[1] = b[1]
458
460
  a_obs[1] = b[1]
459
461
  logger.debug(f"b:{b}")
@@ -464,7 +466,7 @@ class StoppingGameEnv(BaseEnv):
464
466
  simulation_trace.infos.append(info)
465
467
  simulation_trace.states.append(s)
466
468
  simulation_trace.beliefs.append(b[1])
467
- simulation_trace.infrastructure_metrics.append(o)
469
+ simulation_trace.infrastructure_metrics.append(emulation_o)
468
470
 
469
471
  em_sim_trace = EmulationSimulationTrace(emulation_trace=emulation_trace, simulation_trace=simulation_trace)
470
472
  MetastoreFacade.save_emulation_simulation_trace(em_sim_trace)
@@ -556,10 +558,10 @@ class StoppingGameEnv(BaseEnv):
556
558
  stage_policy = []
557
559
  for s in self.config.S:
558
560
  if s != 2:
559
- dist = [0, 0]
560
- dist[a2] = 1
561
+ dist = [0.0, 0.0]
562
+ dist[a2] = 1.0
561
563
  stage_policy.append(dist)
562
564
  else:
563
565
  stage_policy.append([0.5, 0.5])
564
- stage_policy = np.array(stage_policy)
565
- _, _, done, _ = self.step(action_profile=(a1, (stage_policy, a2)))
566
+ pi2 = np.array(stage_policy)
567
+ _, _, done, _, _ = self.step(action_profile=(a1, (pi2, a2)))
@@ -1,6 +1,6 @@
1
1
  from typing import Tuple, List, Union, Dict, Any
2
- import gymnasium as gym
3
2
  import numpy as np
3
+ import numpy.typing as npt
4
4
  import torch
5
5
  import math
6
6
  from csle_common.dao.simulation_config.base_env import BaseEnv
@@ -9,6 +9,7 @@ from gym_csle_stopping_game.dao.stopping_game_attacker_mdp_config import Stoppin
9
9
  from csle_common.dao.simulation_config.simulation_trace import SimulationTrace
10
10
  from gym_csle_stopping_game.util.stopping_game_util import StoppingGameUtil
11
11
  import gym_csle_stopping_game.constants.constants as env_constants
12
+ from gym_csle_stopping_game.envs.stopping_game_env import StoppingGameEnv
12
13
 
13
14
 
14
15
  class StoppingGameMdpAttackerEnv(BaseEnv):
@@ -23,7 +24,7 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
23
24
  :param config: the configuration of the environment
24
25
  """
25
26
  self.config = config
26
- self.stopping_game_env = gym.make(self.config.stopping_game_name, config=self.config.stopping_game_config)
27
+ self.stopping_game_env: StoppingGameEnv = StoppingGameEnv(config=self.config.stopping_game_config)
27
28
 
28
29
  # Setup spaces
29
30
  self.observation_space = self.config.stopping_game_config.attacker_observation_space()
@@ -33,41 +34,47 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
33
34
  self.static_defender_strategy = self.config.defender_strategy
34
35
 
35
36
  # Setup Config
36
- self.viewer = None
37
+ self.viewer: Union[None, Any] = None
37
38
  self.metadata = {
38
39
  'render.modes': ['human', 'rgb_array'],
39
40
  'video.frames_per_second': 50 # Video rendering speed
40
41
  }
41
42
 
42
- self.latest_defender_obs = None
43
- self.latest_attacker_obs = None
44
- self.model = None
43
+ self.latest_defender_obs: Union[None, List[Any], npt.NDArray[Any]] = None
44
+ self.latest_attacker_obs: Union[None, List[Any], npt.NDArray[Any]] = None
45
+ self.model: Union[None, Any] = None
45
46
 
46
47
  # Reset
47
48
  self.reset()
48
49
  super().__init__()
49
50
 
50
- def step(self, pi2: Union[List[List[float]], int, float, np.int64, float, np.float64]) \
51
- -> Tuple[np.ndarray, int, bool, bool, dict]:
51
+ def step(self, pi2: Union[npt.NDArray[Any], int, float, np.int_, np.float_]) \
52
+ -> Tuple[npt.NDArray[Any], int, bool, bool, Dict[str, Any]]:
52
53
  """
53
54
  Takes a step in the environment by executing the given action
54
55
 
55
56
  :param pi2: attacker stage policy
56
57
  :return: (obs, reward, terminated, truncated, info)
57
58
  """
58
- if type(pi2) is int or type(pi2) is float or type(pi2) is np.int64 or type(pi2) is float \
59
- or type(pi2) is np.float64:
59
+ if type(pi2) is int or type(pi2) is float or type(pi2) is np.int64 or type(pi2) is np.float64:
60
60
  a2 = pi2
61
- pi2 = self.calculate_stage_policy(o=self.latest_attacker_obs, a2=a2)
61
+ if self.latest_attacker_obs is None:
62
+ raise ValueError("Attacker observation is None")
63
+ pi2 = self.calculate_stage_policy(o=list(self.latest_attacker_obs), a2=int(a2))
62
64
  else:
63
65
  if self.model is not None:
64
- pi2 = self.calculate_stage_policy(o=self.latest_attacker_obs)
66
+ if self.latest_attacker_obs is None:
67
+ raise ValueError("Attacker observation is None")
68
+ pi2 = self.calculate_stage_policy(o=list(self.latest_attacker_obs))
65
69
  a2 = StoppingGameUtil.sample_attacker_action(pi2=pi2, s=self.stopping_game_env.state.s)
66
70
  else:
67
71
  pi2 = np.array(pi2)
68
- if (not pi2.shape[0] == len(self.config.stopping_game_config.S)
69
- or pi2.shape[1] != len(self.config.stopping_game_config.A1)) and self.model is not None:
70
- pi2 = self.calculate_stage_policy(o=self.latest_attacker_obs)
72
+ try:
73
+ if self.latest_attacker_obs is None:
74
+ raise ValueError("Attacker observation is None")
75
+ pi2 = self.calculate_stage_policy(o=list(self.latest_attacker_obs))
76
+ except Exception:
77
+ pass
71
78
  a2 = StoppingGameUtil.sample_attacker_action(pi2=pi2, s=self.stopping_game_env.state.s)
72
79
 
73
80
  # a2 = pi2
@@ -83,7 +90,7 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
83
90
  a1 = self.static_defender_strategy.action(o=self.latest_defender_obs)
84
91
 
85
92
  # Step the game
86
- o, r, d, _, info = self.stopping_game_env.step((a1, (pi2, a2)))
93
+ o, r, d, _, info = self.stopping_game_env.step((int(a1), (pi2, int(a2))))
87
94
  self.latest_defender_obs = o[0]
88
95
  self.latest_attacker_obs = o[1]
89
96
  attacker_obs = o[1]
@@ -94,7 +101,7 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
94
101
 
95
102
  return attacker_obs, r[1], d, d, info
96
103
 
97
- def reset(self, seed: int = 0, soft: bool = False) -> Tuple[np.ndarray, Dict[str, Any]]:
104
+ def reset(self, seed: int = 0, soft: bool = False) -> Tuple[npt.NDArray[Any], Dict[str, Any]]:
98
105
  """
99
106
  Resets the environment state, this should be called whenever step() returns <done>
100
107
 
@@ -104,7 +111,7 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
104
111
  self.latest_defender_obs = o[0]
105
112
  self.latest_attacker_obs = o[1]
106
113
  attacker_obs = o[1]
107
- info = {}
114
+ info: Dict[str, Any] = {}
108
115
  return attacker_obs, info
109
116
 
110
117
  def set_model(self, model) -> None:
@@ -116,7 +123,7 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
116
123
  """
117
124
  self.model = model
118
125
 
119
- def calculate_stage_policy(self, o: List, a2: int = 0) -> np.ndarray:
126
+ def calculate_stage_policy(self, o: List[Any], a2: int = 0) -> npt.NDArray[Any]:
120
127
  """
121
128
  Calculates the stage policy of a given model and observation
122
129
 
@@ -127,15 +134,14 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
127
134
  stage_policy = []
128
135
  for s in self.config.stopping_game_config.S:
129
136
  if s != 2:
130
- dist = [0, 0]
131
- dist[a2] = 1
137
+ dist = [0.0, 0.0]
138
+ dist[a2] = 1.0
132
139
  stage_policy.append(dist)
133
140
  else:
134
141
  stage_policy.append([0.5, 0.5])
135
142
  return np.array(stage_policy)
136
143
  if isinstance(self.model, MixedMultiThresholdStoppingPolicy):
137
- stage_policy = np.array(self.model.stage_policy(o=o))
138
- return stage_policy
144
+ return np.array(self.model.stage_policy(o=o))
139
145
  else:
140
146
  b1 = o[1]
141
147
  l = int(o[0])
@@ -146,18 +152,19 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
146
152
  stage_policy.append(self._get_attacker_dist(obs=o))
147
153
  else:
148
154
  stage_policy.append([0.5, 0.5])
149
- stage_policy = np.array(stage_policy)
150
- return stage_policy
155
+ return np.array(stage_policy)
151
156
 
152
- def _get_attacker_dist(self, obs: List) -> List:
157
+ def _get_attacker_dist(self, obs: List[Any]) -> List[float]:
153
158
  """
154
159
  Utility function for getting the attacker's action distribution based on a given observation
155
160
 
156
161
  :param obs: the given observation
157
162
  :return: the action distribution
158
163
  """
159
- obs = np.array([obs])
160
- actions, values, log_prob = self.model.policy.forward(obs=torch.tensor(obs).to(self.model.device))
164
+ np_obs = np.array([obs])
165
+ if self.model is None:
166
+ raise ValueError("Model is None")
167
+ actions, values, log_prob = self.model.policy.forward(obs=torch.tensor(np_obs).to(self.model.device))
161
168
  action = actions[0]
162
169
  if action == 1:
163
170
  stop_prob = math.exp(log_prob)
@@ -211,7 +218,7 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
211
218
  Closes the viewer (cleanup)
212
219
  :return: None
213
220
  """
214
- if self.viewer:
221
+ if self.viewer is not None:
215
222
  self.viewer.close()
216
223
  self.viewer = None
217
224
 
@@ -244,4 +251,4 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
244
251
  self.reset()
245
252
  else:
246
253
  action_idx = int(raw_input)
247
- _, _, done, _ = self.step(pi2=action_idx)
254
+ _, _, done, _, _ = self.step(pi2=action_idx)
@@ -1,6 +1,6 @@
1
- from typing import Tuple, List, Dict, Any
2
- import gymnasium as gym
1
+ from typing import Tuple, List, Dict, Any, Union
3
2
  import numpy as np
3
+ import numpy.typing as npt
4
4
  from csle_common.dao.simulation_config.base_env import BaseEnv
5
5
  from gym_csle_stopping_game.dao.stopping_game_defender_pomdp_config import StoppingGameDefenderPomdpConfig
6
6
  from csle_common.dao.simulation_config.simulation_trace import SimulationTrace
@@ -27,7 +27,7 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
27
27
  :param attacker_strategy: the strategy of the static attacker
28
28
  """
29
29
  self.config = config
30
- self.stopping_game_env = gym.make(self.config.stopping_game_name, config=self.config.stopping_game_config)
30
+ self.stopping_game_env = StoppingGameEnv(config=self.config.stopping_game_config)
31
31
 
32
32
  # Setup spaces
33
33
  self.observation_space = self.config.stopping_game_config.defender_observation_space()
@@ -37,18 +37,18 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
37
37
  self.static_attacker_strategy = self.config.attacker_strategy
38
38
 
39
39
  # Setup Config
40
- self.viewer = None
40
+ self.viewer: Union[None, Any] = None
41
41
  self.metadata = {
42
42
  'render.modes': ['human', 'rgb_array'],
43
43
  'video.frames_per_second': 50 # Video rendering speed
44
44
  }
45
45
 
46
- self.latest_attacker_obs = None
46
+ self.latest_attacker_obs: Union[None, npt.NDArray[Any]] = None
47
47
  # Reset
48
48
  self.reset()
49
49
  super().__init__()
50
50
 
51
- def step(self, a1: int) -> Tuple[np.ndarray, int, bool, bool, dict]:
51
+ def step(self, a1: int) -> Tuple[npt.NDArray[Any], int, bool, bool, Dict[str, Any]]:
52
52
  """
53
53
  Takes a step in the environment by executing the given action
54
54
 
@@ -66,7 +66,7 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
66
66
 
67
67
  return defender_obs, r[0], d, d, info
68
68
 
69
- def step_test(self, a1: int, sample_Z) -> Tuple[np.ndarray, int, bool, dict]:
69
+ def step_test(self, a1: int, sample_Z) -> Tuple[npt.NDArray[Any], int, bool, Dict[str, Any]]:
70
70
  """
71
71
  Takes a step in the environment by executing the given action
72
72
 
@@ -84,7 +84,7 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
84
84
 
85
85
  return defender_obs, r[0], d, info
86
86
 
87
- def reset(self, seed: int = 0, soft: bool = False) -> Tuple[np.ndarray, Dict[str, Any]]:
87
+ def reset(self, seed: int = 0, soft: bool = False) -> Tuple[npt.NDArray[Any], Dict[str, Any]]:
88
88
  """
89
89
  Resets the environment state, this should be called whenever step() returns <done>
90
90
 
@@ -93,7 +93,7 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
93
93
  o, _ = self.stopping_game_env.reset()
94
94
  self.latest_attacker_obs = o[1]
95
95
  defender_obs = o[0]
96
- dict = {}
96
+ dict: Dict[str, Any] = {}
97
97
  return defender_obs, dict
98
98
 
99
99
  def render(self, mode: str = 'human'):
@@ -105,7 +105,7 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
105
105
  """
106
106
  raise NotImplementedError("Rendering is not implemented for this environment")
107
107
 
108
- def step_trace(self, trace: EmulationTrace, a1: int) -> Tuple[np.ndarray, int, bool, dict]:
108
+ def step_trace(self, trace: EmulationTrace, a1: int) -> Tuple[npt.NDArray[Any], int, bool, Dict[str, Any]]:
109
109
  """
110
110
  Utility method for stopping a pre-recorded trace
111
111
 
@@ -178,7 +178,7 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
178
178
  Closes the viewer (cleanup)
179
179
  :return: None
180
180
  """
181
- if self.viewer:
181
+ if self.viewer is not None:
182
182
  self.viewer.close()
183
183
  self.viewer = None
184
184
 
@@ -211,4 +211,4 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
211
211
  self.reset()
212
212
  else:
213
213
  action_idx = int(raw_input)
214
- _, _, done, _ = self.step(pi2=action_idx)
214
+ _, _, done, _, _ = self.step(a1=action_idx)
@@ -1,5 +1,6 @@
1
- from typing import List, Dict, Tuple
1
+ from typing import List, Dict, Tuple, Any
2
2
  import numpy as np
3
+ import numpy.typing as npt
3
4
  from scipy.stats import betabinom
4
5
  from csle_common.dao.system_identification.emulation_statistics import EmulationStatistics
5
6
  from csle_common.dao.simulation_config.observation_space_config import ObservationSpaceConfig
@@ -14,7 +15,7 @@ class StoppingGameUtil:
14
15
  """
15
16
 
16
17
  @staticmethod
17
- def b1() -> np.ndarray:
18
+ def b1() -> npt.NDArray[np.int_]:
18
19
  """
19
20
  Gets the initial belief
20
21
 
@@ -32,7 +33,7 @@ class StoppingGameUtil:
32
33
  return np.array([0, 1, 2])
33
34
 
34
35
  @staticmethod
35
- def defender_actions() -> np.ndarray:
36
+ def defender_actions() -> npt.NDArray[np.int_]:
36
37
  """
37
38
  Gets the action space of the defender
38
39
 
@@ -41,7 +42,7 @@ class StoppingGameUtil:
41
42
  return np.array([0, 1])
42
43
 
43
44
  @staticmethod
44
- def attacker_actions() -> np.ndarray:
45
+ def attacker_actions() -> npt.NDArray[np.int_]:
45
46
  """
46
47
  Gets the action space of the attacker
47
48
 
@@ -60,7 +61,7 @@ class StoppingGameUtil:
60
61
  return np.array(list(range(n + 1)))
61
62
 
62
63
  @staticmethod
63
- def reward_tensor(R_SLA: int, R_INT: int, R_COST: int, L: int, R_ST: int) -> np.ndarray:
64
+ def reward_tensor(R_SLA: int, R_INT: int, R_COST: int, L: int, R_ST: int) -> npt.NDArray[Any]:
64
65
  """
65
66
  Gets the reward tensor
66
67
 
@@ -89,11 +90,10 @@ class StoppingGameUtil:
89
90
  ]
90
91
  ]
91
92
  R_l.append(R)
92
- R = np.array(R_l)
93
- return R
93
+ return np.array(R_l)
94
94
 
95
95
  @staticmethod
96
- def transition_tensor(L: int, p: float) -> np.ndarray:
96
+ def transition_tensor(L: int, p: float) -> npt.NDArray[Any]:
97
97
  """
98
98
  Gets the transition tensor
99
99
 
@@ -171,15 +171,14 @@ class StoppingGameUtil:
171
171
  ]
172
172
  ]
173
173
  T_l.append(T)
174
- T = np.array(T_l)
175
- return T
174
+ return np.array(T_l)
176
175
 
177
176
  @staticmethod
178
177
  def observation_tensor_from_emulation_statistics(emulation_statistic: EmulationStatistics,
179
178
  observation_space_defender: ObservationSpaceConfig,
180
179
  joint_action_space: JointActionSpaceConfig,
181
180
  state_space: StateSpaceConfig) \
182
- -> Tuple[np.ndarray, Dict[str, List]]:
181
+ -> Tuple[npt.NDArray[Any], Dict[str, List[Any]]]:
183
182
  """
184
183
  Returns an observation tensor based on measured emulation statistics
185
184
 
@@ -189,9 +188,9 @@ class StoppingGameUtil:
189
188
  :param state_space: the state space
190
189
  :return: a |A1|x|A2|x|S|x|O| tensor
191
190
  """
192
- intrusion_severe_alerts_probabilities = []
193
- intrusion_warning_alerts_probabilities = []
194
- intrusion_login_attempts_probabilities = []
191
+ intrusion_severe_alerts_probabilities: List[float] = []
192
+ intrusion_warning_alerts_probabilities: List[float] = []
193
+ intrusion_login_attempts_probabilities: List[float] = []
195
194
  norm = sum(emulation_statistic.conditionals_counts["intrusion"]["severe_alerts"].values())
196
195
  for severe_alert_obs in observation_space_defender.component_observations["severe_alerts"]:
197
196
  count = emulation_statistic.conditionals_counts["intrusion"]["severe_alerts"][severe_alert_obs.id]
@@ -229,14 +228,14 @@ class StoppingGameUtil:
229
228
  login_attempts_a1_a2_s_o_dist = []
230
229
  for a2 in range(len(joint_action_space.action_spaces[1].actions)):
231
230
  a2_s_o_dist = []
232
- severe_alerts_a2_s_o_dist = []
233
- warning_alerts_a2_s_o_dist = []
234
- login_attempts_a2_s_o_dist = []
231
+ severe_alerts_a2_s_o_dist: List[List[float]] = []
232
+ warning_alerts_a2_s_o_dist: List[List[float]] = []
233
+ login_attempts_a2_s_o_dist: List[List[float]] = []
235
234
  for s in range(len(state_space.states)):
236
235
  s_o_dist = []
237
- severe_alerts_s_o_dist = []
238
- warning_alerts_s_o_dist = []
239
- login_attempts_s_o_dist = []
236
+ severe_alerts_s_o_dist: List[float] = []
237
+ warning_alerts_s_o_dist: List[float] = []
238
+ login_attempts_s_o_dist: List[float] = []
240
239
  for o in range(len(observation_space_defender.observations)):
241
240
  obs_vector = observation_space_defender.observation_id_to_observation_id_vector[o]
242
241
  if s == 0:
@@ -256,8 +255,8 @@ class StoppingGameUtil:
256
255
  s_o_dist.append(p)
257
256
  a2_s_o_dist.append(s_o_dist)
258
257
  severe_alerts_a2_s_o_dist.append(severe_alerts_s_o_dist)
259
- warning_alerts_a2_s_o_dist.append(warning_alerts_a2_s_o_dist)
260
- login_attempts_a2_s_o_dist.append(login_attempts_a2_s_o_dist)
258
+ warning_alerts_a2_s_o_dist.append(warning_alerts_s_o_dist)
259
+ login_attempts_a2_s_o_dist.append(login_attempts_s_o_dist)
261
260
  a1_a2_s_o_dist.append(a2_s_o_dist)
262
261
  severe_alerts_a1_a2_s_o_dist.append(severe_alerts_a2_s_o_dist)
263
262
  warning_alerts_a1_a2_s_o_dist.append(warning_alerts_a2_s_o_dist)
@@ -316,7 +315,7 @@ class StoppingGameUtil:
316
315
  return Z
317
316
 
318
317
  @staticmethod
319
- def sample_next_state(T: np.ndarray, l: int, s: int, a1: int, a2: int, S: np.ndarray) -> int:
318
+ def sample_next_state(T: npt.NDArray[Any], l: int, s: int, a1: int, a2: int, S: npt.NDArray[np.int_]) -> int:
320
319
  """
321
320
  Samples the next state
322
321
 
@@ -331,22 +330,20 @@ class StoppingGameUtil:
331
330
  state_probs = []
332
331
  for s_prime in S:
333
332
  state_probs.append(T[l - 1][a1][a2][s][s_prime])
334
- s_prime = np.random.choice(np.arange(0, len(S)), p=state_probs)
335
- return s_prime
333
+ return int(np.random.choice(np.arange(0, len(S)), p=state_probs))
336
334
 
337
335
  @staticmethod
338
- def sample_initial_state(b1: np.ndarray) -> int:
336
+ def sample_initial_state(b1: npt.NDArray[np.float_]) -> int:
339
337
  """
340
338
  Samples the initial state
341
339
 
342
340
  :param b1: the initial belief
343
341
  :return: s1
344
342
  """
345
- s1 = np.random.choice(np.arange(0, len(b1)), p=b1)
346
- return s1
343
+ return int(np.random.choice(np.arange(0, len(b1)), p=b1))
347
344
 
348
345
  @staticmethod
349
- def sample_next_observation(Z: np.ndarray, s_prime: int, O: np.ndarray) -> int:
346
+ def sample_next_observation(Z: npt.NDArray[Any], s_prime: int, O: npt.NDArray[np.int_]) -> int:
350
347
  """
351
348
  Samples the next observation
352
349
 
@@ -366,7 +363,7 @@ class StoppingGameUtil:
366
363
  return int(o)
367
364
 
368
365
  @staticmethod
369
- def bayes_filter(s_prime: int, o: int, a1: int, b: np.ndarray, pi2: np.ndarray, l: int,
366
+ def bayes_filter(s_prime: int, o: int, a1: int, b: npt.NDArray[np.float_], pi2: npt.NDArray[Any], l: int,
370
367
  config: StoppingGameConfig) -> float:
371
368
  """
372
369
  A Bayesian filter to compute the belief of player 1
@@ -404,7 +401,7 @@ class StoppingGameUtil:
404
401
  return b_prime_s_prime
405
402
 
406
403
  @staticmethod
407
- def p_o_given_b_a1_a2(o: int, b: List, a1: int, a2: int, config: StoppingGameConfig) -> float:
404
+ def p_o_given_b_a1_a2(o: int, b: List[float], a1: int, a2: int, config: StoppingGameConfig) -> float:
408
405
  """
409
406
  Computes P[o|a,b]
410
407
 
@@ -423,8 +420,8 @@ class StoppingGameUtil:
423
420
  return prob
424
421
 
425
422
  @staticmethod
426
- def next_belief(o: int, a1: int, b: np.ndarray, pi2: np.ndarray, config: StoppingGameConfig, l: int,
427
- a2: int = 0, s: int = 0) -> np.ndarray:
423
+ def next_belief(o: int, a1: int, b: npt.NDArray[np.float_], pi2: npt.NDArray[Any],
424
+ config: StoppingGameConfig, l: int, a2: int = 0, s: int = 0) -> npt.NDArray[np.float_]:
428
425
  """
429
426
  Computes the next belief using a Bayesian filter
430
427
 
@@ -449,7 +446,7 @@ class StoppingGameUtil:
449
446
  return b_prime
450
447
 
451
448
  @staticmethod
452
- def sample_attacker_action(pi2: np.ndarray, s: int) -> int:
449
+ def sample_attacker_action(pi2: npt.NDArray[Any], s: int) -> int:
453
450
  """
454
451
  Samples the attacker action
455
452
 
@@ -457,5 +454,4 @@ class StoppingGameUtil:
457
454
  :param s: the game state
458
455
  :return: a2 (the attacker action
459
456
  """
460
- a2 = np.random.choice(np.arange(0, len(pi2[s])), p=pi2[s])
461
- return a2
457
+ return int(np.random.choice(np.arange(0, len(pi2[s])), p=pi2[s]))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: gym-csle-stopping-game
3
- Version: 0.2.19
3
+ Version: 0.2.20
4
4
  Summary: OpenAI gym reinforcement learning environment of a Dynkin (Optimal stopping) game in CSLE
5
5
  Author: Kim Hammar
6
6
  Author-email: hammar.kim@gmail.com
@@ -15,10 +15,10 @@ Classifier: Programming Language :: Python :: 3.9
15
15
  Classifier: Intended Audience :: Science/Research
16
16
  Requires-Python: >=3.8
17
17
  Requires-Dist: gymnasium >=0.27.1
18
- Requires-Dist: csle-base >=0.2.19
19
- Requires-Dist: csle-common >=0.2.19
20
- Requires-Dist: csle-attacker >=0.2.19
21
- Requires-Dist: csle-defender >=0.2.19
18
+ Requires-Dist: csle-base >=0.2.20
19
+ Requires-Dist: csle-common >=0.2.20
20
+ Requires-Dist: csle-attacker >=0.2.20
21
+ Requires-Dist: csle-defender >=0.2.20
22
22
  Requires-Dist: csle-collector >=0.2.9
23
23
  Provides-Extra: testing
24
24
  Requires-Dist: pytest >=6.0 ; extra == 'testing'
@@ -26,6 +26,9 @@ Requires-Dist: pytest-cov >=2.0 ; extra == 'testing'
26
26
  Requires-Dist: pytest-mock >=3.6.0 ; extra == 'testing'
27
27
  Requires-Dist: pytest-grpc >=0.8.0 ; extra == 'testing'
28
28
  Requires-Dist: mypy >=1.4.1 ; extra == 'testing'
29
+ Requires-Dist: mypy-extensions >=1.0.0 ; extra == 'testing'
30
+ Requires-Dist: mypy-protobuf >=3.5.0 ; extra == 'testing'
31
+ Requires-Dist: types-PyYAML >=6.0.12.11 ; extra == 'testing'
29
32
  Requires-Dist: types-paramiko >=3.2.0.0 ; extra == 'testing'
30
33
  Requires-Dist: types-protobuf >=4.23.0.3 ; extra == 'testing'
31
34
  Requires-Dist: types-requests >=2.31.0.1 ; extra == 'testing'
@@ -0,0 +1,19 @@
1
+ gym_csle_stopping_game/__init__.py,sha256=ooy6TjxvBi1sZMEX3_mVlvfskqI5GqwITWzI882tfk0,657
2
+ gym_csle_stopping_game/__version__.py,sha256=qMaQjrsi_F2wUsxjrQ7pmHBHD2rS1ibAIXrI1iCRn08,23
3
+ gym_csle_stopping_game/constants/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ gym_csle_stopping_game/constants/constants.py,sha256=eIoD9eXifZ73kP-lSlvG-IXCpe4n6D-_aDygx0zOr5U,1030
5
+ gym_csle_stopping_game/dao/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ gym_csle_stopping_game/dao/stopping_game_attacker_mdp_config.py,sha256=kmtrVa2CCVbi5xfd6uPWqMvhGmP8ccrtn1e_VmVvH7k,3494
7
+ gym_csle_stopping_game/dao/stopping_game_config.py,sha256=5jFMvSWkJ0_PqlVZlAf2pzJFttHeUdUv_G4GeXIrdm0,5595
8
+ gym_csle_stopping_game/dao/stopping_game_defender_pomdp_config.py,sha256=3FfNi2-R6n1LqjA644EVq-v7wtp6sqyEkEdBN90-2n0,3753
9
+ gym_csle_stopping_game/dao/stopping_game_state.py,sha256=Fyl19vIMeShYnHaixeJ-OHM4LHpJAswhBIp_5ytN3bM,2768
10
+ gym_csle_stopping_game/envs/__init__.py,sha256=SQHaqXI0_2HYsC8i9swXEHDFcXKEYpb8GRP9l_S0Sw8,74
11
+ gym_csle_stopping_game/envs/stopping_game_env.py,sha256=SY3vYUhtS42XIkzJfIhiJE-dKjSTRH7iiLpEQ7Id9P8,26178
12
+ gym_csle_stopping_game/envs/stopping_game_mdp_attacker_env.py,sha256=JGKp2B3s7Hf6z_nfOmDtv9LZo26i9HayTcVBq6gW6sk,9570
13
+ gym_csle_stopping_game/envs/stopping_game_pomdp_defender_env.py,sha256=JD7hGrBMYrNXh6A3JXFLvz4Op1ZWOZqlFEcDU4jS49k,8754
14
+ gym_csle_stopping_game/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
+ gym_csle_stopping_game/util/stopping_game_util.py,sha256=Kgt6o8nobpdlx0zoPxfoA8sH24KxMxQ5gh4Txt9wWgs,19160
16
+ gym_csle_stopping_game-0.2.20.dist-info/METADATA,sha256=Ez_SGYA2X1QFWfE4arGr2LLHKQIliBQJ8RIdP804FyU,2055
17
+ gym_csle_stopping_game-0.2.20.dist-info/WHEEL,sha256=5sUXSg9e4bi7lTLOHcm6QEYwO5TIF1TNbTSVFVjcJcc,92
18
+ gym_csle_stopping_game-0.2.20.dist-info/top_level.txt,sha256=3DBHkAEI00nq0aXZlJUkXJrLiwkcJCfaFoYcaOzEZUU,23
19
+ gym_csle_stopping_game-0.2.20.dist-info/RECORD,,
@@ -1,19 +0,0 @@
1
- gym_csle_stopping_game/__init__.py,sha256=ooy6TjxvBi1sZMEX3_mVlvfskqI5GqwITWzI882tfk0,657
2
- gym_csle_stopping_game/__version__.py,sha256=8swO86HIyP6FWtTckB79yaNlkfWbV8msnhazFt9EOKQ,23
3
- gym_csle_stopping_game/constants/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- gym_csle_stopping_game/constants/constants.py,sha256=eIoD9eXifZ73kP-lSlvG-IXCpe4n6D-_aDygx0zOr5U,1030
5
- gym_csle_stopping_game/dao/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- gym_csle_stopping_game/dao/stopping_game_attacker_mdp_config.py,sha256=e8KwNZpZ_VswM57ZmjaGDeFTFButVCB8WUAHEctAWJk,3511
7
- gym_csle_stopping_game/dao/stopping_game_config.py,sha256=qWcfGLf7X6ymp8R_SLkXxNgH1AngIsGrhFXsB5ZnBxw,5448
8
- gym_csle_stopping_game/dao/stopping_game_defender_pomdp_config.py,sha256=niK4cK_YoyFW6Wq-rFK_5hbLsDtlQ-UivcVf5BtGUrA,3770
9
- gym_csle_stopping_game/dao/stopping_game_state.py,sha256=tqlFJ9sjlNzkgKJPBbLKhi_HFEnuTJfFcZzs-idrf4w,2701
10
- gym_csle_stopping_game/envs/__init__.py,sha256=SQHaqXI0_2HYsC8i9swXEHDFcXKEYpb8GRP9l_S0Sw8,74
11
- gym_csle_stopping_game/envs/stopping_game_env.py,sha256=J0fL4z6cNhmXiwSXtaU9wbrAvVBeDuW2bJQ0YWAqGMs,25889
12
- gym_csle_stopping_game/envs/stopping_game_mdp_attacker_env.py,sha256=wsOZRDgktz5ENvmhQI8DLCLIoN2JhmY4eoLyX0X0zsA,9060
13
- gym_csle_stopping_game/envs/stopping_game_pomdp_defender_env.py,sha256=IvY4gwg3Mz2hNBTuTToCbPLA8Zp1KRWc_P9H5KXVvug,8636
14
- gym_csle_stopping_game/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- gym_csle_stopping_game/util/stopping_game_util.py,sha256=RaR-onJtmuTk15B2m4KUYTbXNElKBw2iEkoP6m0n1b0,18912
16
- gym_csle_stopping_game-0.2.19.dist-info/METADATA,sha256=NJe-ly9n_tOtOTMS0AHj7OwBAsGuVL2Qxu5bAl08wUA,1876
17
- gym_csle_stopping_game-0.2.19.dist-info/WHEEL,sha256=5sUXSg9e4bi7lTLOHcm6QEYwO5TIF1TNbTSVFVjcJcc,92
18
- gym_csle_stopping_game-0.2.19.dist-info/top_level.txt,sha256=3DBHkAEI00nq0aXZlJUkXJrLiwkcJCfaFoYcaOzEZUU,23
19
- gym_csle_stopping_game-0.2.19.dist-info/RECORD,,