gym-csle-stopping-game 0.2.18__py3-none-any.whl → 0.2.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gym-csle-stopping-game might be problematic. Click here for more details.

@@ -1 +1 @@
1
- __version__ = '0.2.18'
1
+ __version__ = '0.2.20'
@@ -37,7 +37,6 @@ class StoppingGameAttackerMdpConfig(SimulationEnvInputConfig):
37
37
  :param d: the dict to convert
38
38
  :return: the created instance
39
39
  """
40
- defender_strategy = None
41
40
  try:
42
41
  defender_strategy = MultiThresholdStoppingPolicy.from_dict(d["defender_strategy"])
43
42
  except Exception:
@@ -54,9 +53,11 @@ class StoppingGameAttackerMdpConfig(SimulationEnvInputConfig):
54
53
 
55
54
  def to_dict(self) -> Dict[str, Any]:
56
55
  """
56
+ Converts the object to a dict representation
57
+
57
58
  :return: a dict representation of the object
58
59
  """
59
- d = {}
60
+ d: Dict[str, Any] = {}
60
61
  d["stopping_game_config"] = self.stopping_game_config.to_dict()
61
62
  d["defender_strategy"] = self.defender_strategy.to_dict()
62
63
  d["stopping_game_name"] = self.stopping_game_name
@@ -1,6 +1,7 @@
1
1
  from typing import Dict, Any
2
2
  import gymnasium as gym
3
3
  import numpy as np
4
+ import numpy.typing as npt
4
5
  from csle_common.dao.simulation_config.simulation_env_input_config import SimulationEnvInputConfig
5
6
 
6
7
 
@@ -10,8 +11,10 @@ class StoppingGameConfig(SimulationEnvInputConfig):
10
11
  """
11
12
 
12
13
  def __init__(self, env_name: str,
13
- T: np.ndarray, O: np.ndarray, Z: np.ndarray, R: np.ndarray, S: np.ndarray, A1: np.ndarray,
14
- A2: np.ndarray, L: int, R_INT: int, R_COST: int, R_SLA: int, R_ST: int, b1: np.ndarray,
14
+ T: npt.NDArray[Any], O: npt.NDArray[np.int_], Z: npt.NDArray[Any],
15
+ R: npt.NDArray[Any], S: npt.NDArray[np.int_], A1: npt.NDArray[np.int_],
16
+ A2: npt.NDArray[np.int_], L: int, R_INT: int, R_COST: int, R_SLA: int, R_ST: int,
17
+ b1: npt.NDArray[np.float_],
15
18
  save_dir: str, checkpoint_traces_freq: int, gamma: float = 1) -> None:
16
19
  """
17
20
  Initializes the DTO
@@ -55,9 +58,11 @@ class StoppingGameConfig(SimulationEnvInputConfig):
55
58
 
56
59
  def to_dict(self) -> Dict[str, Any]:
57
60
  """
61
+ Converts the object to a dict representation
62
+
58
63
  :return: a dict representation of the object
59
64
  """
60
- d = {}
65
+ d: Dict[str, Any] = {}
61
66
  d["T"] = list(self.T.tolist())
62
67
  d["O"] = list(self.O.tolist())
63
68
  d["Z"] = list(self.Z.tolist())
@@ -38,7 +38,6 @@ class StoppingGameDefenderPomdpConfig(SimulationEnvInputConfig):
38
38
  :param d: the dict to convert
39
39
  :return: the created instance
40
40
  """
41
- attacker_strategy = None
42
41
  try:
43
42
  attacker_strategy = MultiThresholdStoppingPolicy.from_dict(d["attacker_strategy"])
44
43
  except Exception:
@@ -59,9 +58,11 @@ class StoppingGameDefenderPomdpConfig(SimulationEnvInputConfig):
59
58
 
60
59
  def to_dict(self) -> Dict[str, Any]:
61
60
  """
61
+ Converts the object to a dict representation
62
+
62
63
  :return: a dict representation of the object
63
64
  """
64
- d = {}
65
+ d: Dict[str, Any] = {}
65
66
  d["stopping_game_config"] = self.stopping_game_config.to_dict()
66
67
  d["attacker_strategy"] = self.attacker_strategy.to_dict()
67
68
  d["stopping_game_name"] = self.stopping_game_name
@@ -1,5 +1,6 @@
1
1
  from typing import Dict, Any
2
2
  import numpy as np
3
+ import numpy.typing as npt
3
4
  from gym_csle_stopping_game.util.stopping_game_util import StoppingGameUtil
4
5
  from csle_base.json_serializable import JSONSerializable
5
6
 
@@ -9,7 +10,7 @@ class StoppingGameState(JSONSerializable):
9
10
  Represents the state of the optimal stopping game
10
11
  """
11
12
 
12
- def __init__(self, b1: np.ndarray, L: int) -> None:
13
+ def __init__(self, b1: npt.NDArray[np.float_], L: int) -> None:
13
14
  """
14
15
  Intializes the state
15
16
 
@@ -34,13 +35,13 @@ class StoppingGameState(JSONSerializable):
34
35
  self.s = StoppingGameUtil.sample_initial_state(b1=self.b1)
35
36
  self.b = self.b1.copy()
36
37
 
37
- def attacker_observation(self) -> np.ndarray:
38
+ def attacker_observation(self) -> npt.NDArray[Any]:
38
39
  """
39
40
  :return: the attacker's observation
40
41
  """
41
42
  return np.array([self.l, self.b[1], self.s])
42
43
 
43
- def defender_observation(self) -> np.ndarray:
44
+ def defender_observation(self) -> npt.NDArray[Any]:
44
45
  """
45
46
  :return: the defender's observation
46
47
  """
@@ -69,9 +70,11 @@ class StoppingGameState(JSONSerializable):
69
70
 
70
71
  def to_dict(self) -> Dict[str, Any]:
71
72
  """
73
+ Converts the object to a dict representation
74
+
72
75
  :return: a dict representation of the object
73
76
  """
74
- d = {}
77
+ d: Dict[str, Any] = {}
75
78
  d["L"] = self.L
76
79
  d["b1"] = list(self.b1)
77
80
  d["b"] = list(self.b)
@@ -1,5 +1,6 @@
1
- from typing import Tuple, Dict, Union, List, Any
1
+ from typing import Tuple, Dict, List, Any
2
2
  import numpy as np
3
+ import numpy.typing as npt
3
4
  import time
4
5
  import math
5
6
  import csle_common.constants.constants as constants
@@ -32,6 +33,11 @@ class StoppingGameEnv(BaseEnv):
32
33
  """
33
34
 
34
35
  def __init__(self, config: StoppingGameConfig):
36
+ """
37
+ Initializes the environment
38
+
39
+ :param config: the environment configuration
40
+ """
35
41
  self.config = config
36
42
 
37
43
  # Initialize environment state
@@ -54,15 +60,15 @@ class StoppingGameEnv(BaseEnv):
54
60
  }
55
61
 
56
62
  # Setup traces
57
- self.traces = []
63
+ self.traces: List[SimulationTrace] = []
58
64
  self.trace = SimulationTrace(simulation_env=self.config.env_name)
59
65
 
60
66
  # Reset
61
67
  self.reset()
62
68
  super().__init__()
63
69
 
64
- def step(self, action_profile: Tuple[int, Tuple[np.ndarray, int]]) \
65
- -> Tuple[Tuple[np.ndarray, np.ndarray], Tuple[int, int], bool, bool, dict]:
70
+ def step(self, action_profile: Tuple[int, Tuple[npt.NDArray[Any], int]]) \
71
+ -> Tuple[Tuple[npt.NDArray[Any], npt.NDArray[Any]], Tuple[int, int], bool, bool, Dict[str, Any]]:
66
72
  """
67
73
  Takes a step in the environment by executing the given action
68
74
 
@@ -76,7 +82,7 @@ class StoppingGameEnv(BaseEnv):
76
82
  assert pi2.shape[0] == len(self.config.S)
77
83
  assert pi2.shape[1] == len(self.config.A1)
78
84
  done = False
79
- info = {}
85
+ info: Dict[str, Any] = {}
80
86
 
81
87
  # Compute r, s', b',o'
82
88
  r = self.config.R[self.state.l - 1][a1][a2][self.state.s]
@@ -129,8 +135,8 @@ class StoppingGameEnv(BaseEnv):
129
135
 
130
136
  return (defender_obs, attacker_obs), (r, -r), done, done, info
131
137
 
132
- def step_test(self, action_profile: Tuple[int, Tuple[np.ndarray, int]], sample_Z) \
133
- -> Tuple[Tuple[np.ndarray, np.ndarray], Tuple[int, int], bool, dict]:
138
+ def step_test(self, action_profile: Tuple[int, Tuple[npt.NDArray[Any], int]], sample_Z) \
139
+ -> Tuple[Tuple[npt.NDArray[Any], npt.NDArray[Any]], Tuple[int, int], bool, Dict[str, Any]]:
134
140
  """
135
141
  Takes a step in the environment by executing the given action
136
142
 
@@ -144,7 +150,7 @@ class StoppingGameEnv(BaseEnv):
144
150
  assert pi2.shape[0] == len(self.config.S)
145
151
  assert pi2.shape[1] == len(self.config.A1)
146
152
  done = False
147
- info = {}
153
+ info: Dict[str, Any] = {}
148
154
 
149
155
  # Compute r, s', b',o'
150
156
  r = self.config.R[self.state.l - 1][a1][a2][self.state.s]
@@ -196,10 +202,18 @@ class StoppingGameEnv(BaseEnv):
196
202
 
197
203
  return (defender_obs, attacker_obs), (r, -r), done, info
198
204
 
199
- def step_trace(self, trace: EmulationTrace, a1: int, pi2: np.ndarray) \
200
- -> Tuple[Tuple[np.ndarray, np.ndarray], Tuple[int, int], bool, dict]:
205
+ def step_trace(self, trace: EmulationTrace, a1: int, pi2: npt.NDArray[Any]) \
206
+ -> Tuple[Tuple[npt.NDArray[Any], npt.NDArray[Any]], Tuple[int, int], bool, Dict[str, Any]]:
207
+ """
208
+ Utility function for stepping a given trace
209
+
210
+ :param trace: the trace to step
211
+ :param a1: the action to step with
212
+ :param pi2: the policy of the attacker
213
+ :return: the result of the step
214
+ """
201
215
  done = False
202
- info = {}
216
+ info: Dict[str, Any] = {}
203
217
  if (self.state.t - 1) < len(trace.attacker_actions):
204
218
  a2_emulation_action = trace.attacker_actions[self.state.t - 1]
205
219
  a2 = 0
@@ -259,13 +273,16 @@ class StoppingGameEnv(BaseEnv):
259
273
  if not done:
260
274
  self.trace.attacker_observations.append(attacker_obs)
261
275
  self.trace.defender_observations.append(defender_obs)
262
-
263
- # Populate info
264
276
  info = self._info(info)
265
-
266
277
  return (defender_obs, attacker_obs), (r, -r), done, info
267
278
 
268
279
  def mean(self, prob_vector):
280
+ """
281
+ Utility function for getting the mean of a vector
282
+
283
+ :param prob_vector: the vector to take the mean of
284
+ :return: the mean
285
+ """
269
286
  m = 0
270
287
  for i in range(len(prob_vector)):
271
288
  m += prob_vector[i] * i
@@ -284,9 +301,10 @@ class StoppingGameEnv(BaseEnv):
284
301
  else:
285
302
  return 1 - (min(10, (first_stop - (intrusion_start + 1))) / 2) / 10
286
303
 
287
- def _info(self, info) -> Dict[str, Union[float, int]]:
304
+ def _info(self, info: Dict[str, Any]) -> Dict[str, Any]:
288
305
  """
289
306
  Adds the cumulative reward and episode length to the info dict
307
+
290
308
  :param info: the info dict to update
291
309
  :return: the updated info dict
292
310
  """
@@ -346,7 +364,8 @@ class StoppingGameEnv(BaseEnv):
346
364
  defender_baseline_stop_on_first_alert_return
347
365
  return info
348
366
 
349
- def reset(self, seed: int = 0, soft: bool = False) -> Tuple[Tuple[np.ndarray, np.ndarray], Dict[str, Any]]:
367
+ def reset(self, seed: int = 0, soft: bool = False) \
368
+ -> Tuple[Tuple[npt.NDArray[Any], npt.NDArray[Any]], Dict[str, Any]]:
350
369
  """
351
370
  Resets the environment state, this should be called whenever step() returns <done>
352
371
 
@@ -361,7 +380,7 @@ class StoppingGameEnv(BaseEnv):
361
380
  defender_obs = self.state.defender_observation()
362
381
  self.trace.attacker_observations.append(attacker_obs)
363
382
  self.trace.defender_observations.append(defender_obs)
364
- info = {}
383
+ info: Dict[str, Any] = {}
365
384
  return (defender_obs, attacker_obs), info
366
385
 
367
386
  @staticmethod
@@ -371,6 +390,18 @@ class StoppingGameEnv(BaseEnv):
371
390
  emulation_env_config: EmulationEnvConfig,
372
391
  simulation_env_config: SimulationEnvConfig
373
392
  ) -> List[EmulationSimulationTrace]:
393
+ """
394
+ Utility function for evaluating a strategy profile in the emulation environment
395
+
396
+ :param env: the environment to use for evaluation
397
+ :param n_episodes: the number of evaluation episodes
398
+ :param intrusion_seq: the intrusion sequence for the evaluation (sequence of attacker actions)
399
+ :param defender_policy: the defender policy for the evaluation
400
+ :param attacker_policy: the attacker policy for the evaluation
401
+ :param emulation_env_config: configuration of the emulation environment for the evaluation
402
+ :param simulation_env_config: configuration of the simulation environment for the evaluation
403
+ :return: traces with the evaluation results
404
+ """
374
405
  logger = Logger.__call__().get_logger()
375
406
  traces = []
376
407
  s = EmulationEnvState(emulation_env_config=emulation_env_config)
@@ -379,7 +410,7 @@ class StoppingGameEnv(BaseEnv):
379
410
  done = False
380
411
  defender_obs_space = simulation_env_config.joint_observation_space_config.observation_spaces[0]
381
412
  b = env.state.b1
382
- o = env.reset()
413
+ o, _ = env.reset()
383
414
  (d_obs, a_obs) = o
384
415
  t = 0
385
416
  s.reset()
@@ -390,7 +421,7 @@ class StoppingGameEnv(BaseEnv):
390
421
  while not done:
391
422
  a1 = defender_policy.action(d_obs)
392
423
  a2 = attacker_policy.action(a_obs)
393
- o, r, done, info = env.step((a1, a2))
424
+ o, r, done, info, _ = env.step((a1, a2))
394
425
  (d_obs, a_obs) = o
395
426
  r_1, r_2 = r
396
427
  logger.debug(f"a1:{a1}, a2:{a2}, d_obs:{d_obs}, a_obs:{a_obs}, r:{r}, done:{done}, info: {info}")
@@ -419,12 +450,12 @@ class StoppingGameEnv(BaseEnv):
419
450
  f"{defender_obs_space.observation_id_to_observation_vector_inv}")
420
451
  logger.debug(f"observation_id_to_observation_vector_inv:"
421
452
  f"{o_components_str in defender_obs_space.observation_id_to_observation_vector_inv}")
453
+ emulation_o = 0
422
454
  if o_components_str in defender_obs_space.observation_id_to_observation_vector_inv:
423
- o = defender_obs_space.observation_id_to_observation_vector_inv[o_components_str]
424
- else:
425
- o = 0
426
- logger.debug(f"o:{o}")
427
- b = StoppingGameUtil.next_belief(o=o, a1=a1, b=b, pi2=a2, config=env.config, l=env.state.l, a2=a2)
455
+ emulation_o = defender_obs_space.observation_id_to_observation_vector_inv[o_components_str]
456
+ logger.debug(f"o:{emulation_o}")
457
+ b = StoppingGameUtil.next_belief(o=emulation_o, a1=a1, b=b, pi2=a2, config=env.config,
458
+ l=env.state.l, a2=a2)
428
459
  d_obs[1] = b[1]
429
460
  a_obs[1] = b[1]
430
461
  logger.debug(f"b:{b}")
@@ -435,7 +466,7 @@ class StoppingGameEnv(BaseEnv):
435
466
  simulation_trace.infos.append(info)
436
467
  simulation_trace.states.append(s)
437
468
  simulation_trace.beliefs.append(b[1])
438
- simulation_trace.infrastructure_metrics.append(o)
469
+ simulation_trace.infrastructure_metrics.append(emulation_o)
439
470
 
440
471
  em_sim_trace = EmulationSimulationTrace(emulation_trace=emulation_trace, simulation_trace=simulation_trace)
441
472
  MetastoreFacade.save_emulation_simulation_trace(em_sim_trace)
@@ -527,10 +558,10 @@ class StoppingGameEnv(BaseEnv):
527
558
  stage_policy = []
528
559
  for s in self.config.S:
529
560
  if s != 2:
530
- dist = [0, 0]
531
- dist[a2] = 1
561
+ dist = [0.0, 0.0]
562
+ dist[a2] = 1.0
532
563
  stage_policy.append(dist)
533
564
  else:
534
565
  stage_policy.append([0.5, 0.5])
535
- stage_policy = np.array(stage_policy)
536
- _, _, done, _ = self.step(action_profile=(a1, (stage_policy, a2)))
566
+ pi2 = np.array(stage_policy)
567
+ _, _, done, _, _ = self.step(action_profile=(a1, (pi2, a2)))
@@ -1,6 +1,6 @@
1
1
  from typing import Tuple, List, Union, Dict, Any
2
- import gymnasium as gym
3
2
  import numpy as np
3
+ import numpy.typing as npt
4
4
  import torch
5
5
  import math
6
6
  from csle_common.dao.simulation_config.base_env import BaseEnv
@@ -9,6 +9,7 @@ from gym_csle_stopping_game.dao.stopping_game_attacker_mdp_config import Stoppin
9
9
  from csle_common.dao.simulation_config.simulation_trace import SimulationTrace
10
10
  from gym_csle_stopping_game.util.stopping_game_util import StoppingGameUtil
11
11
  import gym_csle_stopping_game.constants.constants as env_constants
12
+ from gym_csle_stopping_game.envs.stopping_game_env import StoppingGameEnv
12
13
 
13
14
 
14
15
  class StoppingGameMdpAttackerEnv(BaseEnv):
@@ -23,7 +24,7 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
23
24
  :param config: the configuration of the environment
24
25
  """
25
26
  self.config = config
26
- self.stopping_game_env = gym.make(self.config.stopping_game_name, config=self.config.stopping_game_config)
27
+ self.stopping_game_env: StoppingGameEnv = StoppingGameEnv(config=self.config.stopping_game_config)
27
28
 
28
29
  # Setup spaces
29
30
  self.observation_space = self.config.stopping_game_config.attacker_observation_space()
@@ -33,41 +34,47 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
33
34
  self.static_defender_strategy = self.config.defender_strategy
34
35
 
35
36
  # Setup Config
36
- self.viewer = None
37
+ self.viewer: Union[None, Any] = None
37
38
  self.metadata = {
38
39
  'render.modes': ['human', 'rgb_array'],
39
40
  'video.frames_per_second': 50 # Video rendering speed
40
41
  }
41
42
 
42
- self.latest_defender_obs = None
43
- self.latest_attacker_obs = None
44
- self.model = None
43
+ self.latest_defender_obs: Union[None, List[Any], npt.NDArray[Any]] = None
44
+ self.latest_attacker_obs: Union[None, List[Any], npt.NDArray[Any]] = None
45
+ self.model: Union[None, Any] = None
45
46
 
46
47
  # Reset
47
48
  self.reset()
48
49
  super().__init__()
49
50
 
50
- def step(self, pi2: Union[List[List[float]], int, float, np.int64, float, np.float64]) \
51
- -> Tuple[np.ndarray, int, bool, bool, dict]:
51
+ def step(self, pi2: Union[npt.NDArray[Any], int, float, np.int_, np.float_]) \
52
+ -> Tuple[npt.NDArray[Any], int, bool, bool, Dict[str, Any]]:
52
53
  """
53
54
  Takes a step in the environment by executing the given action
54
55
 
55
56
  :param pi2: attacker stage policy
56
57
  :return: (obs, reward, terminated, truncated, info)
57
58
  """
58
- if type(pi2) is int or type(pi2) is float or type(pi2) is np.int64 or type(pi2) is float \
59
- or type(pi2) is np.float64:
59
+ if type(pi2) is int or type(pi2) is float or type(pi2) is np.int64 or type(pi2) is np.float64:
60
60
  a2 = pi2
61
- pi2 = self.calculate_stage_policy(o=self.latest_attacker_obs, a2=a2)
61
+ if self.latest_attacker_obs is None:
62
+ raise ValueError("Attacker observation is None")
63
+ pi2 = self.calculate_stage_policy(o=list(self.latest_attacker_obs), a2=int(a2))
62
64
  else:
63
65
  if self.model is not None:
64
- pi2 = self.calculate_stage_policy(o=self.latest_attacker_obs)
66
+ if self.latest_attacker_obs is None:
67
+ raise ValueError("Attacker observation is None")
68
+ pi2 = self.calculate_stage_policy(o=list(self.latest_attacker_obs))
65
69
  a2 = StoppingGameUtil.sample_attacker_action(pi2=pi2, s=self.stopping_game_env.state.s)
66
70
  else:
67
71
  pi2 = np.array(pi2)
68
- if (not pi2.shape[0] == len(self.config.stopping_game_config.S)
69
- or pi2.shape[1] != len(self.config.stopping_game_config.A1)) and self.model is not None:
70
- pi2 = self.calculate_stage_policy(o=self.latest_attacker_obs)
72
+ try:
73
+ if self.latest_attacker_obs is None:
74
+ raise ValueError("Attacker observation is None")
75
+ pi2 = self.calculate_stage_policy(o=list(self.latest_attacker_obs))
76
+ except Exception:
77
+ pass
71
78
  a2 = StoppingGameUtil.sample_attacker_action(pi2=pi2, s=self.stopping_game_env.state.s)
72
79
 
73
80
  # a2 = pi2
@@ -83,7 +90,7 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
83
90
  a1 = self.static_defender_strategy.action(o=self.latest_defender_obs)
84
91
 
85
92
  # Step the game
86
- o, r, d, _, info = self.stopping_game_env.step((a1, (pi2, a2)))
93
+ o, r, d, _, info = self.stopping_game_env.step((int(a1), (pi2, int(a2))))
87
94
  self.latest_defender_obs = o[0]
88
95
  self.latest_attacker_obs = o[1]
89
96
  attacker_obs = o[1]
@@ -94,7 +101,7 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
94
101
 
95
102
  return attacker_obs, r[1], d, d, info
96
103
 
97
- def reset(self, seed: int = 0, soft: bool = False) -> Tuple[np.ndarray, Dict[str, Any]]:
104
+ def reset(self, seed: int = 0, soft: bool = False) -> Tuple[npt.NDArray[Any], Dict[str, Any]]:
98
105
  """
99
106
  Resets the environment state, this should be called whenever step() returns <done>
100
107
 
@@ -104,7 +111,7 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
104
111
  self.latest_defender_obs = o[0]
105
112
  self.latest_attacker_obs = o[1]
106
113
  attacker_obs = o[1]
107
- info = {}
114
+ info: Dict[str, Any] = {}
108
115
  return attacker_obs, info
109
116
 
110
117
  def set_model(self, model) -> None:
@@ -116,7 +123,7 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
116
123
  """
117
124
  self.model = model
118
125
 
119
- def calculate_stage_policy(self, o: List, a2: int = 0) -> np.ndarray:
126
+ def calculate_stage_policy(self, o: List[Any], a2: int = 0) -> npt.NDArray[Any]:
120
127
  """
121
128
  Calculates the stage policy of a given model and observation
122
129
 
@@ -127,15 +134,14 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
127
134
  stage_policy = []
128
135
  for s in self.config.stopping_game_config.S:
129
136
  if s != 2:
130
- dist = [0, 0]
131
- dist[a2] = 1
137
+ dist = [0.0, 0.0]
138
+ dist[a2] = 1.0
132
139
  stage_policy.append(dist)
133
140
  else:
134
141
  stage_policy.append([0.5, 0.5])
135
142
  return np.array(stage_policy)
136
143
  if isinstance(self.model, MixedMultiThresholdStoppingPolicy):
137
- stage_policy = np.array(self.model.stage_policy(o=o))
138
- return stage_policy
144
+ return np.array(self.model.stage_policy(o=o))
139
145
  else:
140
146
  b1 = o[1]
141
147
  l = int(o[0])
@@ -146,18 +152,19 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
146
152
  stage_policy.append(self._get_attacker_dist(obs=o))
147
153
  else:
148
154
  stage_policy.append([0.5, 0.5])
149
- stage_policy = np.array(stage_policy)
150
- return stage_policy
155
+ return np.array(stage_policy)
151
156
 
152
- def _get_attacker_dist(self, obs: List) -> List:
157
+ def _get_attacker_dist(self, obs: List[Any]) -> List[float]:
153
158
  """
154
159
  Utility function for getting the attacker's action distribution based on a given observation
155
160
 
156
161
  :param obs: the given observation
157
162
  :return: the action distribution
158
163
  """
159
- obs = np.array([obs])
160
- actions, values, log_prob = self.model.policy.forward(obs=torch.tensor(obs).to(self.model.device))
164
+ np_obs = np.array([obs])
165
+ if self.model is None:
166
+ raise ValueError("Model is None")
167
+ actions, values, log_prob = self.model.policy.forward(obs=torch.tensor(np_obs).to(self.model.device))
161
168
  action = actions[0]
162
169
  if action == 1:
163
170
  stop_prob = math.exp(log_prob)
@@ -211,7 +218,7 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
211
218
  Closes the viewer (cleanup)
212
219
  :return: None
213
220
  """
214
- if self.viewer:
221
+ if self.viewer is not None:
215
222
  self.viewer.close()
216
223
  self.viewer = None
217
224
 
@@ -244,4 +251,4 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
244
251
  self.reset()
245
252
  else:
246
253
  action_idx = int(raw_input)
247
- _, _, done, _ = self.step(pi2=action_idx)
254
+ _, _, done, _, _ = self.step(pi2=action_idx)
@@ -1,6 +1,6 @@
1
- from typing import Tuple, List, Dict, Any
2
- import gymnasium as gym
1
+ from typing import Tuple, List, Dict, Any, Union
3
2
  import numpy as np
3
+ import numpy.typing as npt
4
4
  from csle_common.dao.simulation_config.base_env import BaseEnv
5
5
  from gym_csle_stopping_game.dao.stopping_game_defender_pomdp_config import StoppingGameDefenderPomdpConfig
6
6
  from csle_common.dao.simulation_config.simulation_trace import SimulationTrace
@@ -27,7 +27,7 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
27
27
  :param attacker_strategy: the strategy of the static attacker
28
28
  """
29
29
  self.config = config
30
- self.stopping_game_env = gym.make(self.config.stopping_game_name, config=self.config.stopping_game_config)
30
+ self.stopping_game_env = StoppingGameEnv(config=self.config.stopping_game_config)
31
31
 
32
32
  # Setup spaces
33
33
  self.observation_space = self.config.stopping_game_config.defender_observation_space()
@@ -37,18 +37,18 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
37
37
  self.static_attacker_strategy = self.config.attacker_strategy
38
38
 
39
39
  # Setup Config
40
- self.viewer = None
40
+ self.viewer: Union[None, Any] = None
41
41
  self.metadata = {
42
42
  'render.modes': ['human', 'rgb_array'],
43
43
  'video.frames_per_second': 50 # Video rendering speed
44
44
  }
45
45
 
46
- self.latest_attacker_obs = None
46
+ self.latest_attacker_obs: Union[None, npt.NDArray[Any]] = None
47
47
  # Reset
48
48
  self.reset()
49
49
  super().__init__()
50
50
 
51
- def step(self, a1: int) -> Tuple[np.ndarray, int, bool, bool, dict]:
51
+ def step(self, a1: int) -> Tuple[npt.NDArray[Any], int, bool, bool, Dict[str, Any]]:
52
52
  """
53
53
  Takes a step in the environment by executing the given action
54
54
 
@@ -66,7 +66,7 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
66
66
 
67
67
  return defender_obs, r[0], d, d, info
68
68
 
69
- def step_test(self, a1: int, sample_Z) -> Tuple[np.ndarray, int, bool, dict]:
69
+ def step_test(self, a1: int, sample_Z) -> Tuple[npt.NDArray[Any], int, bool, Dict[str, Any]]:
70
70
  """
71
71
  Takes a step in the environment by executing the given action
72
72
 
@@ -84,7 +84,7 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
84
84
 
85
85
  return defender_obs, r[0], d, info
86
86
 
87
- def reset(self, seed: int = 0, soft: bool = False) -> Tuple[np.ndarray, Dict[str, Any]]:
87
+ def reset(self, seed: int = 0, soft: bool = False) -> Tuple[npt.NDArray[Any], Dict[str, Any]]:
88
88
  """
89
89
  Resets the environment state, this should be called whenever step() returns <done>
90
90
 
@@ -93,7 +93,7 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
93
93
  o, _ = self.stopping_game_env.reset()
94
94
  self.latest_attacker_obs = o[1]
95
95
  defender_obs = o[0]
96
- dict = {}
96
+ dict: Dict[str, Any] = {}
97
97
  return defender_obs, dict
98
98
 
99
99
  def render(self, mode: str = 'human'):
@@ -105,7 +105,14 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
105
105
  """
106
106
  raise NotImplementedError("Rendering is not implemented for this environment")
107
107
 
108
- def step_trace(self, trace: EmulationTrace, a1: int) -> Tuple[np.ndarray, int, bool, dict]:
108
+ def step_trace(self, trace: EmulationTrace, a1: int) -> Tuple[npt.NDArray[Any], int, bool, Dict[str, Any]]:
109
+ """
110
+ Utility method for stopping a pre-recorded trace
111
+
112
+ :param trace: the trace to step
113
+ :param a1: the action to step with
114
+ :return: the result of the step according to the trace
115
+ """
109
116
  pi2 = np.array(self.static_attacker_strategy.stage_policy(self.latest_attacker_obs))
110
117
  o, r, d, info = self.stopping_game_env.step_trace(trace=trace, a1=a1, pi2=pi2)
111
118
  self.latest_attacker_obs = o[1]
@@ -118,6 +125,17 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
118
125
  defender_policy: Policy,
119
126
  emulation_env_config: EmulationEnvConfig, simulation_env_config: SimulationEnvConfig) \
120
127
  -> List[EmulationSimulationTrace]:
128
+ """
129
+ Utility function for evaluating policies in the emulation environment
130
+
131
+ :param env: the environment to use for evaluation
132
+ :param n_episodes: the number of episodes to use for evaluation
133
+ :param intrusion_seq: the sequence of intrusion actions to use for evaluation
134
+ :param defender_policy: the defender policy to use for evaluation
135
+ :param emulation_env_config: the configuration of the emulation environment to use for evaluation
136
+ :param simulation_env_config: the configuration of the simulation environment to use for evaluation
137
+ :return: traces with the evaluation results
138
+ """
121
139
  return StoppingGameEnv.emulation_evaluation(
122
140
  env=env.stopping_game_env, n_episodes=n_episodes, intrusion_seq=intrusion_seq,
123
141
  defender_policy=defender_policy, attacker_policy=env.static_attacker_strategy,
@@ -160,7 +178,7 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
160
178
  Closes the viewer (cleanup)
161
179
  :return: None
162
180
  """
163
- if self.viewer:
181
+ if self.viewer is not None:
164
182
  self.viewer.close()
165
183
  self.viewer = None
166
184
 
@@ -193,4 +211,4 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
193
211
  self.reset()
194
212
  else:
195
213
  action_idx = int(raw_input)
196
- _, _, done, _ = self.step(pi2=action_idx)
214
+ _, _, done, _, _ = self.step(a1=action_idx)
@@ -1,5 +1,6 @@
1
- from typing import List, Dict, Tuple
1
+ from typing import List, Dict, Tuple, Any
2
2
  import numpy as np
3
+ import numpy.typing as npt
3
4
  from scipy.stats import betabinom
4
5
  from csle_common.dao.system_identification.emulation_statistics import EmulationStatistics
5
6
  from csle_common.dao.simulation_config.observation_space_config import ObservationSpaceConfig
@@ -14,26 +15,37 @@ class StoppingGameUtil:
14
15
  """
15
16
 
16
17
  @staticmethod
17
- def b1() -> np.ndarray:
18
+ def b1() -> npt.NDArray[np.int_]:
18
19
  """
20
+ Gets the initial belief
21
+
19
22
  :return: the initial belief
20
23
  """
21
24
  return np.array([1, 0, 0])
22
25
 
23
26
  @staticmethod
24
27
  def state_space():
28
+ """
29
+ Gets the state space
30
+
31
+ :return: the state space of the game
32
+ """
25
33
  return np.array([0, 1, 2])
26
34
 
27
35
  @staticmethod
28
- def defender_actions() -> np.ndarray:
36
+ def defender_actions() -> npt.NDArray[np.int_]:
29
37
  """
38
+ Gets the action space of the defender
39
+
30
40
  :return: the action space of the defender
31
41
  """
32
42
  return np.array([0, 1])
33
43
 
34
44
  @staticmethod
35
- def attacker_actions() -> np.ndarray:
45
+ def attacker_actions() -> npt.NDArray[np.int_]:
36
46
  """
47
+ Gets the action space of the attacker
48
+
37
49
  :return: the action space of the attacker
38
50
  """
39
51
  return np.array([0, 1])
@@ -44,13 +56,15 @@ class StoppingGameUtil:
44
56
  Returns the observation space of size n
45
57
 
46
58
  :param n: the maximum observation
47
- :return: O
59
+ :return: the observation space
48
60
  """
49
61
  return np.array(list(range(n + 1)))
50
62
 
51
63
  @staticmethod
52
- def reward_tensor(R_SLA: int, R_INT: int, R_COST: int, L: int, R_ST: int) -> np.ndarray:
64
+ def reward_tensor(R_SLA: int, R_INT: int, R_COST: int, L: int, R_ST: int) -> npt.NDArray[Any]:
53
65
  """
66
+ Gets the reward tensor
67
+
54
68
  :param R_SLA: the R_SLA constant
55
69
  :param R_INT: the R_INT constant
56
70
  :param R_COST: the R_COST constant
@@ -76,12 +90,13 @@ class StoppingGameUtil:
76
90
  ]
77
91
  ]
78
92
  R_l.append(R)
79
- R = np.array(R_l)
80
- return R
93
+ return np.array(R_l)
81
94
 
82
95
  @staticmethod
83
- def transition_tensor(L: int, p: float) -> np.ndarray:
96
+ def transition_tensor(L: int, p: float) -> npt.NDArray[Any]:
84
97
  """
98
+ Gets the transition tensor
99
+
85
100
  :param L: the maximum number of stop actions
86
101
  :return: a |L|x|A1|x|A2||S|^2 tensor
87
102
  """
@@ -156,15 +171,14 @@ class StoppingGameUtil:
156
171
  ]
157
172
  ]
158
173
  T_l.append(T)
159
- T = np.array(T_l)
160
- return T
174
+ return np.array(T_l)
161
175
 
162
176
  @staticmethod
163
177
  def observation_tensor_from_emulation_statistics(emulation_statistic: EmulationStatistics,
164
178
  observation_space_defender: ObservationSpaceConfig,
165
179
  joint_action_space: JointActionSpaceConfig,
166
180
  state_space: StateSpaceConfig) \
167
- -> Tuple[np.ndarray, Dict[str, List]]:
181
+ -> Tuple[npt.NDArray[Any], Dict[str, List[Any]]]:
168
182
  """
169
183
  Returns an observation tensor based on measured emulation statistics
170
184
 
@@ -174,9 +188,9 @@ class StoppingGameUtil:
174
188
  :param state_space: the state space
175
189
  :return: a |A1|x|A2|x|S|x|O| tensor
176
190
  """
177
- intrusion_severe_alerts_probabilities = []
178
- intrusion_warning_alerts_probabilities = []
179
- intrusion_login_attempts_probabilities = []
191
+ intrusion_severe_alerts_probabilities: List[float] = []
192
+ intrusion_warning_alerts_probabilities: List[float] = []
193
+ intrusion_login_attempts_probabilities: List[float] = []
180
194
  norm = sum(emulation_statistic.conditionals_counts["intrusion"]["severe_alerts"].values())
181
195
  for severe_alert_obs in observation_space_defender.component_observations["severe_alerts"]:
182
196
  count = emulation_statistic.conditionals_counts["intrusion"]["severe_alerts"][severe_alert_obs.id]
@@ -214,14 +228,14 @@ class StoppingGameUtil:
214
228
  login_attempts_a1_a2_s_o_dist = []
215
229
  for a2 in range(len(joint_action_space.action_spaces[1].actions)):
216
230
  a2_s_o_dist = []
217
- severe_alerts_a2_s_o_dist = []
218
- warning_alerts_a2_s_o_dist = []
219
- login_attempts_a2_s_o_dist = []
231
+ severe_alerts_a2_s_o_dist: List[List[float]] = []
232
+ warning_alerts_a2_s_o_dist: List[List[float]] = []
233
+ login_attempts_a2_s_o_dist: List[List[float]] = []
220
234
  for s in range(len(state_space.states)):
221
235
  s_o_dist = []
222
- severe_alerts_s_o_dist = []
223
- warning_alerts_s_o_dist = []
224
- login_attempts_s_o_dist = []
236
+ severe_alerts_s_o_dist: List[float] = []
237
+ warning_alerts_s_o_dist: List[float] = []
238
+ login_attempts_s_o_dist: List[float] = []
225
239
  for o in range(len(observation_space_defender.observations)):
226
240
  obs_vector = observation_space_defender.observation_id_to_observation_id_vector[o]
227
241
  if s == 0:
@@ -241,8 +255,8 @@ class StoppingGameUtil:
241
255
  s_o_dist.append(p)
242
256
  a2_s_o_dist.append(s_o_dist)
243
257
  severe_alerts_a2_s_o_dist.append(severe_alerts_s_o_dist)
244
- warning_alerts_a2_s_o_dist.append(warning_alerts_a2_s_o_dist)
245
- login_attempts_a2_s_o_dist.append(login_attempts_a2_s_o_dist)
258
+ warning_alerts_a2_s_o_dist.append(warning_alerts_s_o_dist)
259
+ login_attempts_a2_s_o_dist.append(login_attempts_s_o_dist)
246
260
  a1_a2_s_o_dist.append(a2_s_o_dist)
247
261
  severe_alerts_a1_a2_s_o_dist.append(severe_alerts_a2_s_o_dist)
248
262
  warning_alerts_a1_a2_s_o_dist.append(warning_alerts_a2_s_o_dist)
@@ -301,7 +315,7 @@ class StoppingGameUtil:
301
315
  return Z
302
316
 
303
317
  @staticmethod
304
- def sample_next_state(T: np.ndarray, l: int, s: int, a1: int, a2: int, S: np.ndarray) -> int:
318
+ def sample_next_state(T: npt.NDArray[Any], l: int, s: int, a1: int, a2: int, S: npt.NDArray[np.int_]) -> int:
305
319
  """
306
320
  Samples the next state
307
321
 
@@ -316,22 +330,20 @@ class StoppingGameUtil:
316
330
  state_probs = []
317
331
  for s_prime in S:
318
332
  state_probs.append(T[l - 1][a1][a2][s][s_prime])
319
- s_prime = np.random.choice(np.arange(0, len(S)), p=state_probs)
320
- return s_prime
333
+ return int(np.random.choice(np.arange(0, len(S)), p=state_probs))
321
334
 
322
335
  @staticmethod
323
- def sample_initial_state(b1: np.ndarray) -> int:
336
+ def sample_initial_state(b1: npt.NDArray[np.float_]) -> int:
324
337
  """
325
338
  Samples the initial state
326
339
 
327
340
  :param b1: the initial belief
328
341
  :return: s1
329
342
  """
330
- s1 = np.random.choice(np.arange(0, len(b1)), p=b1)
331
- return s1
343
+ return int(np.random.choice(np.arange(0, len(b1)), p=b1))
332
344
 
333
345
  @staticmethod
334
- def sample_next_observation(Z: np.ndarray, s_prime: int, O: np.ndarray) -> int:
346
+ def sample_next_observation(Z: npt.NDArray[Any], s_prime: int, O: npt.NDArray[np.int_]) -> int:
335
347
  """
336
348
  Samples the next observation
337
349
 
@@ -351,7 +363,7 @@ class StoppingGameUtil:
351
363
  return int(o)
352
364
 
353
365
  @staticmethod
354
- def bayes_filter(s_prime: int, o: int, a1: int, b: np.ndarray, pi2: np.ndarray, l: int,
366
+ def bayes_filter(s_prime: int, o: int, a1: int, b: npt.NDArray[np.float_], pi2: npt.NDArray[Any], l: int,
355
367
  config: StoppingGameConfig) -> float:
356
368
  """
357
369
  A Bayesian filter to compute the belief of player 1
@@ -389,7 +401,7 @@ class StoppingGameUtil:
389
401
  return b_prime_s_prime
390
402
 
391
403
  @staticmethod
392
- def p_o_given_b_a1_a2(o: int, b: List, a1: int, a2: int, config: StoppingGameConfig) -> float:
404
+ def p_o_given_b_a1_a2(o: int, b: List[float], a1: int, a2: int, config: StoppingGameConfig) -> float:
393
405
  """
394
406
  Computes P[o|a,b]
395
407
 
@@ -408,8 +420,8 @@ class StoppingGameUtil:
408
420
  return prob
409
421
 
410
422
  @staticmethod
411
- def next_belief(o: int, a1: int, b: np.ndarray, pi2: np.ndarray, config: StoppingGameConfig, l: int,
412
- a2: int = 0, s: int = 0) -> np.ndarray:
423
+ def next_belief(o: int, a1: int, b: npt.NDArray[np.float_], pi2: npt.NDArray[Any],
424
+ config: StoppingGameConfig, l: int, a2: int = 0, s: int = 0) -> npt.NDArray[np.float_]:
413
425
  """
414
426
  Computes the next belief using a Bayesian filter
415
427
 
@@ -434,7 +446,7 @@ class StoppingGameUtil:
434
446
  return b_prime
435
447
 
436
448
  @staticmethod
437
- def sample_attacker_action(pi2: np.ndarray, s: int) -> int:
449
+ def sample_attacker_action(pi2: npt.NDArray[Any], s: int) -> int:
438
450
  """
439
451
  Samples the attacker action
440
452
 
@@ -442,5 +454,4 @@ class StoppingGameUtil:
442
454
  :param s: the game state
443
455
  :return: a2 (the attacker action
444
456
  """
445
- a2 = np.random.choice(np.arange(0, len(pi2[s])), p=pi2[s])
446
- return a2
457
+ return int(np.random.choice(np.arange(0, len(pi2[s])), p=pi2[s]))
@@ -0,0 +1,44 @@
1
+ Metadata-Version: 2.1
2
+ Name: gym-csle-stopping-game
3
+ Version: 0.2.20
4
+ Summary: OpenAI gym reinforcement learning environment of a Dynkin (Optimal stopping) game in CSLE
5
+ Author: Kim Hammar
6
+ Author-email: hammar.kim@gmail.com
7
+ License: Creative Commons Attribution-ShareAlike 4.0 International
8
+ Keywords: Reinforcement-Learning Cyber-Security Markov-Games Markov-Decision-Processes
9
+ Platform: unix
10
+ Platform: linux
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3 :: Only
13
+ Classifier: Programming Language :: Python :: 3.8
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Intended Audience :: Science/Research
16
+ Requires-Python: >=3.8
17
+ Requires-Dist: gymnasium >=0.27.1
18
+ Requires-Dist: csle-base >=0.2.20
19
+ Requires-Dist: csle-common >=0.2.20
20
+ Requires-Dist: csle-attacker >=0.2.20
21
+ Requires-Dist: csle-defender >=0.2.20
22
+ Requires-Dist: csle-collector >=0.2.9
23
+ Provides-Extra: testing
24
+ Requires-Dist: pytest >=6.0 ; extra == 'testing'
25
+ Requires-Dist: pytest-cov >=2.0 ; extra == 'testing'
26
+ Requires-Dist: pytest-mock >=3.6.0 ; extra == 'testing'
27
+ Requires-Dist: pytest-grpc >=0.8.0 ; extra == 'testing'
28
+ Requires-Dist: mypy >=1.4.1 ; extra == 'testing'
29
+ Requires-Dist: mypy-extensions >=1.0.0 ; extra == 'testing'
30
+ Requires-Dist: mypy-protobuf >=3.5.0 ; extra == 'testing'
31
+ Requires-Dist: types-PyYAML >=6.0.12.11 ; extra == 'testing'
32
+ Requires-Dist: types-paramiko >=3.2.0.0 ; extra == 'testing'
33
+ Requires-Dist: types-protobuf >=4.23.0.3 ; extra == 'testing'
34
+ Requires-Dist: types-requests >=2.31.0.1 ; extra == 'testing'
35
+ Requires-Dist: types-urllib3 >=1.26.25.13 ; extra == 'testing'
36
+ Requires-Dist: flake8 >=3.9 ; extra == 'testing'
37
+ Requires-Dist: flake8-rst-docstrings >=0.3.0 ; extra == 'testing'
38
+ Requires-Dist: tox >=3.24 ; extra == 'testing'
39
+ Requires-Dist: sphinx >=5.3.0 ; extra == 'testing'
40
+ Requires-Dist: sphinxcontrib-napoleon >=0.7 ; extra == 'testing'
41
+ Requires-Dist: sphinx-rtd-theme >=1.1.1 ; extra == 'testing'
42
+ Requires-Dist: twine >=4.0.2 ; extra == 'testing'
43
+ Requires-Dist: build >=0.10.0 ; extra == 'testing'
44
+
@@ -0,0 +1,19 @@
1
+ gym_csle_stopping_game/__init__.py,sha256=ooy6TjxvBi1sZMEX3_mVlvfskqI5GqwITWzI882tfk0,657
2
+ gym_csle_stopping_game/__version__.py,sha256=qMaQjrsi_F2wUsxjrQ7pmHBHD2rS1ibAIXrI1iCRn08,23
3
+ gym_csle_stopping_game/constants/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ gym_csle_stopping_game/constants/constants.py,sha256=eIoD9eXifZ73kP-lSlvG-IXCpe4n6D-_aDygx0zOr5U,1030
5
+ gym_csle_stopping_game/dao/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ gym_csle_stopping_game/dao/stopping_game_attacker_mdp_config.py,sha256=kmtrVa2CCVbi5xfd6uPWqMvhGmP8ccrtn1e_VmVvH7k,3494
7
+ gym_csle_stopping_game/dao/stopping_game_config.py,sha256=5jFMvSWkJ0_PqlVZlAf2pzJFttHeUdUv_G4GeXIrdm0,5595
8
+ gym_csle_stopping_game/dao/stopping_game_defender_pomdp_config.py,sha256=3FfNi2-R6n1LqjA644EVq-v7wtp6sqyEkEdBN90-2n0,3753
9
+ gym_csle_stopping_game/dao/stopping_game_state.py,sha256=Fyl19vIMeShYnHaixeJ-OHM4LHpJAswhBIp_5ytN3bM,2768
10
+ gym_csle_stopping_game/envs/__init__.py,sha256=SQHaqXI0_2HYsC8i9swXEHDFcXKEYpb8GRP9l_S0Sw8,74
11
+ gym_csle_stopping_game/envs/stopping_game_env.py,sha256=SY3vYUhtS42XIkzJfIhiJE-dKjSTRH7iiLpEQ7Id9P8,26178
12
+ gym_csle_stopping_game/envs/stopping_game_mdp_attacker_env.py,sha256=JGKp2B3s7Hf6z_nfOmDtv9LZo26i9HayTcVBq6gW6sk,9570
13
+ gym_csle_stopping_game/envs/stopping_game_pomdp_defender_env.py,sha256=JD7hGrBMYrNXh6A3JXFLvz4Op1ZWOZqlFEcDU4jS49k,8754
14
+ gym_csle_stopping_game/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
+ gym_csle_stopping_game/util/stopping_game_util.py,sha256=Kgt6o8nobpdlx0zoPxfoA8sH24KxMxQ5gh4Txt9wWgs,19160
16
+ gym_csle_stopping_game-0.2.20.dist-info/METADATA,sha256=Ez_SGYA2X1QFWfE4arGr2LLHKQIliBQJ8RIdP804FyU,2055
17
+ gym_csle_stopping_game-0.2.20.dist-info/WHEEL,sha256=5sUXSg9e4bi7lTLOHcm6QEYwO5TIF1TNbTSVFVjcJcc,92
18
+ gym_csle_stopping_game-0.2.20.dist-info/top_level.txt,sha256=3DBHkAEI00nq0aXZlJUkXJrLiwkcJCfaFoYcaOzEZUU,23
19
+ gym_csle_stopping_game-0.2.20.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.40.0)
2
+ Generator: bdist_wheel (0.41.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,38 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: gym-csle-stopping-game
3
- Version: 0.2.18
4
- Summary: OpenAI gym reinforcement learning environment of a Dynkin (Optimal stopping) game in CSLE
5
- Author: Kim Hammar
6
- Author-email: hammar.kim@gmail.com
7
- License: Creative Commons Attribution-ShareAlike 4.0 International
8
- Keywords: Reinforcement-Learning Cyber-Security Markov-Games Markov-Decision-Processes
9
- Platform: unix
10
- Platform: linux
11
- Classifier: Programming Language :: Python :: 3
12
- Classifier: Programming Language :: Python :: 3 :: Only
13
- Classifier: Programming Language :: Python :: 3.8
14
- Classifier: Programming Language :: Python :: 3.9
15
- Classifier: Intended Audience :: Science/Research
16
- Requires-Python: >=3.8
17
- Requires-Dist: gymnasium (>=0.27.1)
18
- Requires-Dist: csle-base (>=0.2.18)
19
- Requires-Dist: csle-common (>=0.2.18)
20
- Requires-Dist: csle-attacker (>=0.2.18)
21
- Requires-Dist: csle-defender (>=0.2.18)
22
- Requires-Dist: csle-collector (>=0.2.9)
23
- Provides-Extra: testing
24
- Requires-Dist: pytest (>=6.0) ; extra == 'testing'
25
- Requires-Dist: pytest-cov (>=2.0) ; extra == 'testing'
26
- Requires-Dist: pytest-mock (>=3.6.0) ; extra == 'testing'
27
- Requires-Dist: pytest-grpc (>=0.8.0) ; extra == 'testing'
28
- Requires-Dist: mypy (>=1.3.0) ; extra == 'testing'
29
- Requires-Dist: flake8 (>=3.9) ; extra == 'testing'
30
- Requires-Dist: tox (>=3.24) ; extra == 'testing'
31
- Requires-Dist: sphinx (>=5.3.0) ; extra == 'testing'
32
- Requires-Dist: sphinxcontrib-napoleon (>=0.7) ; extra == 'testing'
33
- Requires-Dist: sphinx-rtd-theme (>=1.1.1) ; extra == 'testing'
34
- Requires-Dist: twine (>=4.0.2) ; extra == 'testing'
35
- Requires-Dist: build (>=0.10.0) ; extra == 'testing'
36
-
37
- UNKNOWN
38
-
@@ -1,19 +0,0 @@
1
- gym_csle_stopping_game/__init__.py,sha256=ooy6TjxvBi1sZMEX3_mVlvfskqI5GqwITWzI882tfk0,657
2
- gym_csle_stopping_game/__version__.py,sha256=FXd43n0EILiron_V2hjgc29zx5yUKaeqvkuBEJuCDkc,23
3
- gym_csle_stopping_game/constants/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- gym_csle_stopping_game/constants/constants.py,sha256=eIoD9eXifZ73kP-lSlvG-IXCpe4n6D-_aDygx0zOr5U,1030
5
- gym_csle_stopping_game/dao/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- gym_csle_stopping_game/dao/stopping_game_attacker_mdp_config.py,sha256=wHiPVKh1CHPDUEOyYOUSfYwjZugZ40Xy4gb_AfwKgaI,3457
7
- gym_csle_stopping_game/dao/stopping_game_config.py,sha256=592sb6qgVGDY2fqJ9pUZyJ9LygT_yu02X3obsYsWIPs,5394
8
- gym_csle_stopping_game/dao/stopping_game_defender_pomdp_config.py,sha256=NAfGU3tcT_lgO0h0Ga_ZZPn7-moQT7O2_CmcentCH_M,3708
9
- gym_csle_stopping_game/dao/stopping_game_state.py,sha256=XUN_5TyZnKhe1XozDG4mvJxZQ1xSgbOtduNWE0e9mmE,2639
10
- gym_csle_stopping_game/envs/__init__.py,sha256=SQHaqXI0_2HYsC8i9swXEHDFcXKEYpb8GRP9l_S0Sw8,74
11
- gym_csle_stopping_game/envs/stopping_game_env.py,sha256=0kC_cEeuMafrZ3eH3862dweq-tcWvhzL7ignVN2OwZU,24656
12
- gym_csle_stopping_game/envs/stopping_game_mdp_attacker_env.py,sha256=wsOZRDgktz5ENvmhQI8DLCLIoN2JhmY4eoLyX0X0zsA,9060
13
- gym_csle_stopping_game/envs/stopping_game_pomdp_defender_env.py,sha256=7Bt_VVR0TFvUdVO67Th2CBV-UVo-6pICY5D-Eh1Lnu0,7749
14
- gym_csle_stopping_game/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- gym_csle_stopping_game/util/stopping_game_util.py,sha256=T5Wqa1nmSdc-i_wfgAufQH3RxYHwtUFJsB94_RES-PU,18598
16
- gym_csle_stopping_game-0.2.18.dist-info/METADATA,sha256=7zlM9ezdSajKVxHYiQZ0fJdtj4ia3YpglJElXjieW04,1607
17
- gym_csle_stopping_game-0.2.18.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
18
- gym_csle_stopping_game-0.2.18.dist-info/top_level.txt,sha256=3DBHkAEI00nq0aXZlJUkXJrLiwkcJCfaFoYcaOzEZUU,23
19
- gym_csle_stopping_game-0.2.18.dist-info/RECORD,,