gym-csle-stopping-game 0.2.18__tar.gz → 0.2.20__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gym-csle-stopping-game might be problematic. Click here for more details.
- {gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/PKG-INFO +1 -4
- {gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/pyproject.toml +1 -1
- {gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/setup.cfg +18 -6
- gym_csle_stopping_game-0.2.20/src/gym_csle_stopping_game/__version__.py +1 -0
- {gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/src/gym_csle_stopping_game/dao/stopping_game_attacker_mdp_config.py +3 -2
- {gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/src/gym_csle_stopping_game/dao/stopping_game_config.py +8 -3
- {gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/src/gym_csle_stopping_game/dao/stopping_game_defender_pomdp_config.py +3 -2
- {gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/src/gym_csle_stopping_game/dao/stopping_game_state.py +7 -4
- {gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/src/gym_csle_stopping_game/envs/stopping_game_env.py +60 -29
- {gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/src/gym_csle_stopping_game/envs/stopping_game_mdp_attacker_env.py +37 -30
- {gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/src/gym_csle_stopping_game/envs/stopping_game_pomdp_defender_env.py +30 -12
- {gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/src/gym_csle_stopping_game/util/stopping_game_util.py +48 -37
- {gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/src/gym_csle_stopping_game.egg-info/PKG-INFO +1 -4
- {gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/src/gym_csle_stopping_game.egg-info/SOURCES.txt +2 -1
- gym_csle_stopping_game-0.2.20/src/gym_csle_stopping_game.egg-info/requires.txt +28 -0
- gym_csle_stopping_game-0.2.20/tests/test_stopping_game_util.py +19 -0
- gym_csle_stopping_game-0.2.18/src/gym_csle_stopping_game/__version__.py +0 -1
- gym_csle_stopping_game-0.2.18/src/gym_csle_stopping_game.egg-info/requires.txt +0 -20
- {gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/setup.py +0 -0
- {gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/src/gym_csle_stopping_game/__init__.py +0 -0
- {gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/src/gym_csle_stopping_game/constants/__init__.py +0 -0
- {gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/src/gym_csle_stopping_game/constants/constants.py +0 -0
- {gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/src/gym_csle_stopping_game/dao/__init__.py +0 -0
- {gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/src/gym_csle_stopping_game/envs/__init__.py +0 -0
- {gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/src/gym_csle_stopping_game/util/__init__.py +0 -0
- {gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/src/gym_csle_stopping_game.egg-info/dependency_links.txt +0 -0
- {gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/src/gym_csle_stopping_game.egg-info/not-zip-safe +0 -0
- {gym_csle_stopping_game-0.2.18 → gym_csle_stopping_game-0.2.20}/src/gym_csle_stopping_game.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: gym_csle_stopping_game
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.20
|
|
4
4
|
Summary: OpenAI gym reinforcement learning environment of a Dynkin (Optimal stopping) game in CSLE
|
|
5
5
|
Author: Kim Hammar
|
|
6
6
|
Author-email: hammar.kim@gmail.com
|
|
@@ -15,6 +15,3 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
15
15
|
Classifier: Intended Audience :: Science/Research
|
|
16
16
|
Requires-Python: >=3.8
|
|
17
17
|
Provides-Extra: testing
|
|
18
|
-
|
|
19
|
-
UNKNOWN
|
|
20
|
-
|
|
@@ -20,10 +20,10 @@ classifiers =
|
|
|
20
20
|
[options]
|
|
21
21
|
install_requires =
|
|
22
22
|
gymnasium>=0.27.1
|
|
23
|
-
csle-base>=0.2.
|
|
24
|
-
csle-common>=0.2.
|
|
25
|
-
csle-attacker>=0.2.
|
|
26
|
-
csle-defender>=0.2.
|
|
23
|
+
csle-base>=0.2.20
|
|
24
|
+
csle-common>=0.2.20
|
|
25
|
+
csle-attacker>=0.2.20
|
|
26
|
+
csle-defender>=0.2.20
|
|
27
27
|
csle-collector>=0.2.9
|
|
28
28
|
python_requires = >=3.8
|
|
29
29
|
package_dir =
|
|
@@ -40,8 +40,16 @@ testing =
|
|
|
40
40
|
pytest-cov>=2.0
|
|
41
41
|
pytest-mock>=3.6.0
|
|
42
42
|
pytest-grpc>=0.8.0
|
|
43
|
-
mypy>=1.
|
|
43
|
+
mypy>=1.4.1
|
|
44
|
+
mypy-extensions>=1.0.0
|
|
45
|
+
mypy-protobuf>=3.5.0
|
|
46
|
+
types-PyYAML>=6.0.12.11
|
|
47
|
+
types-paramiko>=3.2.0.0
|
|
48
|
+
types-protobuf>=4.23.0.3
|
|
49
|
+
types-requests>=2.31.0.1
|
|
50
|
+
types-urllib3>=1.26.25.13
|
|
44
51
|
flake8>=3.9
|
|
52
|
+
flake8-rst-docstrings>=0.3.0
|
|
45
53
|
tox>=3.24
|
|
46
54
|
sphinx>=5.3.0
|
|
47
55
|
sphinxcontrib-napoleon>=0.7
|
|
@@ -54,8 +62,12 @@ gym_csle_stopping_game = py.typed
|
|
|
54
62
|
|
|
55
63
|
[flake8]
|
|
56
64
|
max-line-length = 120
|
|
57
|
-
exclude = .git,__pycache__,docs/source/conf.py,old,build,dist,*_pb2*,*init__
|
|
65
|
+
exclude = .git,__pycache__,docs/source/conf.py,old,build,dist,*_pb2*,*init__*,.tox
|
|
58
66
|
ignore = E741, W503, W504, F821, W605
|
|
67
|
+
rst-roles = class, func, ref
|
|
68
|
+
rst-directives = envvar, exception
|
|
69
|
+
rst-substitutions = version
|
|
70
|
+
extend-ignore = D401, D400, D100, RST305, RST219, D205, D202, D200, D204, RST206, W293, D403, D402, RST306
|
|
59
71
|
|
|
60
72
|
[egg_info]
|
|
61
73
|
tag_build =
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = '0.2.20'
|
|
@@ -37,7 +37,6 @@ class StoppingGameAttackerMdpConfig(SimulationEnvInputConfig):
|
|
|
37
37
|
:param d: the dict to convert
|
|
38
38
|
:return: the created instance
|
|
39
39
|
"""
|
|
40
|
-
defender_strategy = None
|
|
41
40
|
try:
|
|
42
41
|
defender_strategy = MultiThresholdStoppingPolicy.from_dict(d["defender_strategy"])
|
|
43
42
|
except Exception:
|
|
@@ -54,9 +53,11 @@ class StoppingGameAttackerMdpConfig(SimulationEnvInputConfig):
|
|
|
54
53
|
|
|
55
54
|
def to_dict(self) -> Dict[str, Any]:
|
|
56
55
|
"""
|
|
56
|
+
Converts the object to a dict representation
|
|
57
|
+
|
|
57
58
|
:return: a dict representation of the object
|
|
58
59
|
"""
|
|
59
|
-
d = {}
|
|
60
|
+
d: Dict[str, Any] = {}
|
|
60
61
|
d["stopping_game_config"] = self.stopping_game_config.to_dict()
|
|
61
62
|
d["defender_strategy"] = self.defender_strategy.to_dict()
|
|
62
63
|
d["stopping_game_name"] = self.stopping_game_name
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from typing import Dict, Any
|
|
2
2
|
import gymnasium as gym
|
|
3
3
|
import numpy as np
|
|
4
|
+
import numpy.typing as npt
|
|
4
5
|
from csle_common.dao.simulation_config.simulation_env_input_config import SimulationEnvInputConfig
|
|
5
6
|
|
|
6
7
|
|
|
@@ -10,8 +11,10 @@ class StoppingGameConfig(SimulationEnvInputConfig):
|
|
|
10
11
|
"""
|
|
11
12
|
|
|
12
13
|
def __init__(self, env_name: str,
|
|
13
|
-
T:
|
|
14
|
-
|
|
14
|
+
T: npt.NDArray[Any], O: npt.NDArray[np.int_], Z: npt.NDArray[Any],
|
|
15
|
+
R: npt.NDArray[Any], S: npt.NDArray[np.int_], A1: npt.NDArray[np.int_],
|
|
16
|
+
A2: npt.NDArray[np.int_], L: int, R_INT: int, R_COST: int, R_SLA: int, R_ST: int,
|
|
17
|
+
b1: npt.NDArray[np.float_],
|
|
15
18
|
save_dir: str, checkpoint_traces_freq: int, gamma: float = 1) -> None:
|
|
16
19
|
"""
|
|
17
20
|
Initializes the DTO
|
|
@@ -55,9 +58,11 @@ class StoppingGameConfig(SimulationEnvInputConfig):
|
|
|
55
58
|
|
|
56
59
|
def to_dict(self) -> Dict[str, Any]:
|
|
57
60
|
"""
|
|
61
|
+
Converts the object to a dict representation
|
|
62
|
+
|
|
58
63
|
:return: a dict representation of the object
|
|
59
64
|
"""
|
|
60
|
-
d = {}
|
|
65
|
+
d: Dict[str, Any] = {}
|
|
61
66
|
d["T"] = list(self.T.tolist())
|
|
62
67
|
d["O"] = list(self.O.tolist())
|
|
63
68
|
d["Z"] = list(self.Z.tolist())
|
|
@@ -38,7 +38,6 @@ class StoppingGameDefenderPomdpConfig(SimulationEnvInputConfig):
|
|
|
38
38
|
:param d: the dict to convert
|
|
39
39
|
:return: the created instance
|
|
40
40
|
"""
|
|
41
|
-
attacker_strategy = None
|
|
42
41
|
try:
|
|
43
42
|
attacker_strategy = MultiThresholdStoppingPolicy.from_dict(d["attacker_strategy"])
|
|
44
43
|
except Exception:
|
|
@@ -59,9 +58,11 @@ class StoppingGameDefenderPomdpConfig(SimulationEnvInputConfig):
|
|
|
59
58
|
|
|
60
59
|
def to_dict(self) -> Dict[str, Any]:
|
|
61
60
|
"""
|
|
61
|
+
Converts the object to a dict representation
|
|
62
|
+
|
|
62
63
|
:return: a dict representation of the object
|
|
63
64
|
"""
|
|
64
|
-
d = {}
|
|
65
|
+
d: Dict[str, Any] = {}
|
|
65
66
|
d["stopping_game_config"] = self.stopping_game_config.to_dict()
|
|
66
67
|
d["attacker_strategy"] = self.attacker_strategy.to_dict()
|
|
67
68
|
d["stopping_game_name"] = self.stopping_game_name
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from typing import Dict, Any
|
|
2
2
|
import numpy as np
|
|
3
|
+
import numpy.typing as npt
|
|
3
4
|
from gym_csle_stopping_game.util.stopping_game_util import StoppingGameUtil
|
|
4
5
|
from csle_base.json_serializable import JSONSerializable
|
|
5
6
|
|
|
@@ -9,7 +10,7 @@ class StoppingGameState(JSONSerializable):
|
|
|
9
10
|
Represents the state of the optimal stopping game
|
|
10
11
|
"""
|
|
11
12
|
|
|
12
|
-
def __init__(self, b1: np.
|
|
13
|
+
def __init__(self, b1: npt.NDArray[np.float_], L: int) -> None:
|
|
13
14
|
"""
|
|
14
15
|
Intializes the state
|
|
15
16
|
|
|
@@ -34,13 +35,13 @@ class StoppingGameState(JSONSerializable):
|
|
|
34
35
|
self.s = StoppingGameUtil.sample_initial_state(b1=self.b1)
|
|
35
36
|
self.b = self.b1.copy()
|
|
36
37
|
|
|
37
|
-
def attacker_observation(self) ->
|
|
38
|
+
def attacker_observation(self) -> npt.NDArray[Any]:
|
|
38
39
|
"""
|
|
39
40
|
:return: the attacker's observation
|
|
40
41
|
"""
|
|
41
42
|
return np.array([self.l, self.b[1], self.s])
|
|
42
43
|
|
|
43
|
-
def defender_observation(self) ->
|
|
44
|
+
def defender_observation(self) -> npt.NDArray[Any]:
|
|
44
45
|
"""
|
|
45
46
|
:return: the defender's observation
|
|
46
47
|
"""
|
|
@@ -69,9 +70,11 @@ class StoppingGameState(JSONSerializable):
|
|
|
69
70
|
|
|
70
71
|
def to_dict(self) -> Dict[str, Any]:
|
|
71
72
|
"""
|
|
73
|
+
Converts the object to a dict representation
|
|
74
|
+
|
|
72
75
|
:return: a dict representation of the object
|
|
73
76
|
"""
|
|
74
|
-
d = {}
|
|
77
|
+
d: Dict[str, Any] = {}
|
|
75
78
|
d["L"] = self.L
|
|
76
79
|
d["b1"] = list(self.b1)
|
|
77
80
|
d["b"] = list(self.b)
|
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
from typing import Tuple, Dict,
|
|
1
|
+
from typing import Tuple, Dict, List, Any
|
|
2
2
|
import numpy as np
|
|
3
|
+
import numpy.typing as npt
|
|
3
4
|
import time
|
|
4
5
|
import math
|
|
5
6
|
import csle_common.constants.constants as constants
|
|
@@ -32,6 +33,11 @@ class StoppingGameEnv(BaseEnv):
|
|
|
32
33
|
"""
|
|
33
34
|
|
|
34
35
|
def __init__(self, config: StoppingGameConfig):
|
|
36
|
+
"""
|
|
37
|
+
Initializes the environment
|
|
38
|
+
|
|
39
|
+
:param config: the environment configuration
|
|
40
|
+
"""
|
|
35
41
|
self.config = config
|
|
36
42
|
|
|
37
43
|
# Initialize environment state
|
|
@@ -54,15 +60,15 @@ class StoppingGameEnv(BaseEnv):
|
|
|
54
60
|
}
|
|
55
61
|
|
|
56
62
|
# Setup traces
|
|
57
|
-
self.traces = []
|
|
63
|
+
self.traces: List[SimulationTrace] = []
|
|
58
64
|
self.trace = SimulationTrace(simulation_env=self.config.env_name)
|
|
59
65
|
|
|
60
66
|
# Reset
|
|
61
67
|
self.reset()
|
|
62
68
|
super().__init__()
|
|
63
69
|
|
|
64
|
-
def step(self, action_profile: Tuple[int, Tuple[
|
|
65
|
-
-> Tuple[Tuple[
|
|
70
|
+
def step(self, action_profile: Tuple[int, Tuple[npt.NDArray[Any], int]]) \
|
|
71
|
+
-> Tuple[Tuple[npt.NDArray[Any], npt.NDArray[Any]], Tuple[int, int], bool, bool, Dict[str, Any]]:
|
|
66
72
|
"""
|
|
67
73
|
Takes a step in the environment by executing the given action
|
|
68
74
|
|
|
@@ -76,7 +82,7 @@ class StoppingGameEnv(BaseEnv):
|
|
|
76
82
|
assert pi2.shape[0] == len(self.config.S)
|
|
77
83
|
assert pi2.shape[1] == len(self.config.A1)
|
|
78
84
|
done = False
|
|
79
|
-
info = {}
|
|
85
|
+
info: Dict[str, Any] = {}
|
|
80
86
|
|
|
81
87
|
# Compute r, s', b',o'
|
|
82
88
|
r = self.config.R[self.state.l - 1][a1][a2][self.state.s]
|
|
@@ -129,8 +135,8 @@ class StoppingGameEnv(BaseEnv):
|
|
|
129
135
|
|
|
130
136
|
return (defender_obs, attacker_obs), (r, -r), done, done, info
|
|
131
137
|
|
|
132
|
-
def step_test(self, action_profile: Tuple[int, Tuple[
|
|
133
|
-
-> Tuple[Tuple[
|
|
138
|
+
def step_test(self, action_profile: Tuple[int, Tuple[npt.NDArray[Any], int]], sample_Z) \
|
|
139
|
+
-> Tuple[Tuple[npt.NDArray[Any], npt.NDArray[Any]], Tuple[int, int], bool, Dict[str, Any]]:
|
|
134
140
|
"""
|
|
135
141
|
Takes a step in the environment by executing the given action
|
|
136
142
|
|
|
@@ -144,7 +150,7 @@ class StoppingGameEnv(BaseEnv):
|
|
|
144
150
|
assert pi2.shape[0] == len(self.config.S)
|
|
145
151
|
assert pi2.shape[1] == len(self.config.A1)
|
|
146
152
|
done = False
|
|
147
|
-
info = {}
|
|
153
|
+
info: Dict[str, Any] = {}
|
|
148
154
|
|
|
149
155
|
# Compute r, s', b',o'
|
|
150
156
|
r = self.config.R[self.state.l - 1][a1][a2][self.state.s]
|
|
@@ -196,10 +202,18 @@ class StoppingGameEnv(BaseEnv):
|
|
|
196
202
|
|
|
197
203
|
return (defender_obs, attacker_obs), (r, -r), done, info
|
|
198
204
|
|
|
199
|
-
def step_trace(self, trace: EmulationTrace, a1: int, pi2:
|
|
200
|
-
-> Tuple[Tuple[
|
|
205
|
+
def step_trace(self, trace: EmulationTrace, a1: int, pi2: npt.NDArray[Any]) \
|
|
206
|
+
-> Tuple[Tuple[npt.NDArray[Any], npt.NDArray[Any]], Tuple[int, int], bool, Dict[str, Any]]:
|
|
207
|
+
"""
|
|
208
|
+
Utility function for stepping a given trace
|
|
209
|
+
|
|
210
|
+
:param trace: the trace to step
|
|
211
|
+
:param a1: the action to step with
|
|
212
|
+
:param pi2: the policy of the attacker
|
|
213
|
+
:return: the result of the step
|
|
214
|
+
"""
|
|
201
215
|
done = False
|
|
202
|
-
info = {}
|
|
216
|
+
info: Dict[str, Any] = {}
|
|
203
217
|
if (self.state.t - 1) < len(trace.attacker_actions):
|
|
204
218
|
a2_emulation_action = trace.attacker_actions[self.state.t - 1]
|
|
205
219
|
a2 = 0
|
|
@@ -259,13 +273,16 @@ class StoppingGameEnv(BaseEnv):
|
|
|
259
273
|
if not done:
|
|
260
274
|
self.trace.attacker_observations.append(attacker_obs)
|
|
261
275
|
self.trace.defender_observations.append(defender_obs)
|
|
262
|
-
|
|
263
|
-
# Populate info
|
|
264
276
|
info = self._info(info)
|
|
265
|
-
|
|
266
277
|
return (defender_obs, attacker_obs), (r, -r), done, info
|
|
267
278
|
|
|
268
279
|
def mean(self, prob_vector):
|
|
280
|
+
"""
|
|
281
|
+
Utility function for getting the mean of a vector
|
|
282
|
+
|
|
283
|
+
:param prob_vector: the vector to take the mean of
|
|
284
|
+
:return: the mean
|
|
285
|
+
"""
|
|
269
286
|
m = 0
|
|
270
287
|
for i in range(len(prob_vector)):
|
|
271
288
|
m += prob_vector[i] * i
|
|
@@ -284,9 +301,10 @@ class StoppingGameEnv(BaseEnv):
|
|
|
284
301
|
else:
|
|
285
302
|
return 1 - (min(10, (first_stop - (intrusion_start + 1))) / 2) / 10
|
|
286
303
|
|
|
287
|
-
def _info(self, info) -> Dict[str,
|
|
304
|
+
def _info(self, info: Dict[str, Any]) -> Dict[str, Any]:
|
|
288
305
|
"""
|
|
289
306
|
Adds the cumulative reward and episode length to the info dict
|
|
307
|
+
|
|
290
308
|
:param info: the info dict to update
|
|
291
309
|
:return: the updated info dict
|
|
292
310
|
"""
|
|
@@ -346,7 +364,8 @@ class StoppingGameEnv(BaseEnv):
|
|
|
346
364
|
defender_baseline_stop_on_first_alert_return
|
|
347
365
|
return info
|
|
348
366
|
|
|
349
|
-
def reset(self, seed: int = 0, soft: bool = False)
|
|
367
|
+
def reset(self, seed: int = 0, soft: bool = False) \
|
|
368
|
+
-> Tuple[Tuple[npt.NDArray[Any], npt.NDArray[Any]], Dict[str, Any]]:
|
|
350
369
|
"""
|
|
351
370
|
Resets the environment state, this should be called whenever step() returns <done>
|
|
352
371
|
|
|
@@ -361,7 +380,7 @@ class StoppingGameEnv(BaseEnv):
|
|
|
361
380
|
defender_obs = self.state.defender_observation()
|
|
362
381
|
self.trace.attacker_observations.append(attacker_obs)
|
|
363
382
|
self.trace.defender_observations.append(defender_obs)
|
|
364
|
-
info = {}
|
|
383
|
+
info: Dict[str, Any] = {}
|
|
365
384
|
return (defender_obs, attacker_obs), info
|
|
366
385
|
|
|
367
386
|
@staticmethod
|
|
@@ -371,6 +390,18 @@ class StoppingGameEnv(BaseEnv):
|
|
|
371
390
|
emulation_env_config: EmulationEnvConfig,
|
|
372
391
|
simulation_env_config: SimulationEnvConfig
|
|
373
392
|
) -> List[EmulationSimulationTrace]:
|
|
393
|
+
"""
|
|
394
|
+
Utility function for evaluating a strategy profile in the emulation environment
|
|
395
|
+
|
|
396
|
+
:param env: the environment to use for evaluation
|
|
397
|
+
:param n_episodes: the number of evaluation episodes
|
|
398
|
+
:param intrusion_seq: the intrusion sequence for the evaluation (sequence of attacker actions)
|
|
399
|
+
:param defender_policy: the defender policy for the evaluation
|
|
400
|
+
:param attacker_policy: the attacker policy for the evaluation
|
|
401
|
+
:param emulation_env_config: configuration of the emulation environment for the evaluation
|
|
402
|
+
:param simulation_env_config: configuration of the simulation environment for the evaluation
|
|
403
|
+
:return: traces with the evaluation results
|
|
404
|
+
"""
|
|
374
405
|
logger = Logger.__call__().get_logger()
|
|
375
406
|
traces = []
|
|
376
407
|
s = EmulationEnvState(emulation_env_config=emulation_env_config)
|
|
@@ -379,7 +410,7 @@ class StoppingGameEnv(BaseEnv):
|
|
|
379
410
|
done = False
|
|
380
411
|
defender_obs_space = simulation_env_config.joint_observation_space_config.observation_spaces[0]
|
|
381
412
|
b = env.state.b1
|
|
382
|
-
o = env.reset()
|
|
413
|
+
o, _ = env.reset()
|
|
383
414
|
(d_obs, a_obs) = o
|
|
384
415
|
t = 0
|
|
385
416
|
s.reset()
|
|
@@ -390,7 +421,7 @@ class StoppingGameEnv(BaseEnv):
|
|
|
390
421
|
while not done:
|
|
391
422
|
a1 = defender_policy.action(d_obs)
|
|
392
423
|
a2 = attacker_policy.action(a_obs)
|
|
393
|
-
o, r, done, info = env.step((a1, a2))
|
|
424
|
+
o, r, done, info, _ = env.step((a1, a2))
|
|
394
425
|
(d_obs, a_obs) = o
|
|
395
426
|
r_1, r_2 = r
|
|
396
427
|
logger.debug(f"a1:{a1}, a2:{a2}, d_obs:{d_obs}, a_obs:{a_obs}, r:{r}, done:{done}, info: {info}")
|
|
@@ -419,12 +450,12 @@ class StoppingGameEnv(BaseEnv):
|
|
|
419
450
|
f"{defender_obs_space.observation_id_to_observation_vector_inv}")
|
|
420
451
|
logger.debug(f"observation_id_to_observation_vector_inv:"
|
|
421
452
|
f"{o_components_str in defender_obs_space.observation_id_to_observation_vector_inv}")
|
|
453
|
+
emulation_o = 0
|
|
422
454
|
if o_components_str in defender_obs_space.observation_id_to_observation_vector_inv:
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
b = StoppingGameUtil.next_belief(o=o, a1=a1, b=b, pi2=a2, config=env.config, l=env.state.l, a2=a2)
|
|
455
|
+
emulation_o = defender_obs_space.observation_id_to_observation_vector_inv[o_components_str]
|
|
456
|
+
logger.debug(f"o:{emulation_o}")
|
|
457
|
+
b = StoppingGameUtil.next_belief(o=emulation_o, a1=a1, b=b, pi2=a2, config=env.config,
|
|
458
|
+
l=env.state.l, a2=a2)
|
|
428
459
|
d_obs[1] = b[1]
|
|
429
460
|
a_obs[1] = b[1]
|
|
430
461
|
logger.debug(f"b:{b}")
|
|
@@ -435,7 +466,7 @@ class StoppingGameEnv(BaseEnv):
|
|
|
435
466
|
simulation_trace.infos.append(info)
|
|
436
467
|
simulation_trace.states.append(s)
|
|
437
468
|
simulation_trace.beliefs.append(b[1])
|
|
438
|
-
simulation_trace.infrastructure_metrics.append(
|
|
469
|
+
simulation_trace.infrastructure_metrics.append(emulation_o)
|
|
439
470
|
|
|
440
471
|
em_sim_trace = EmulationSimulationTrace(emulation_trace=emulation_trace, simulation_trace=simulation_trace)
|
|
441
472
|
MetastoreFacade.save_emulation_simulation_trace(em_sim_trace)
|
|
@@ -527,10 +558,10 @@ class StoppingGameEnv(BaseEnv):
|
|
|
527
558
|
stage_policy = []
|
|
528
559
|
for s in self.config.S:
|
|
529
560
|
if s != 2:
|
|
530
|
-
dist = [0, 0]
|
|
531
|
-
dist[a2] = 1
|
|
561
|
+
dist = [0.0, 0.0]
|
|
562
|
+
dist[a2] = 1.0
|
|
532
563
|
stage_policy.append(dist)
|
|
533
564
|
else:
|
|
534
565
|
stage_policy.append([0.5, 0.5])
|
|
535
|
-
|
|
536
|
-
_, _, done, _ = self.step(action_profile=(a1, (
|
|
566
|
+
pi2 = np.array(stage_policy)
|
|
567
|
+
_, _, done, _, _ = self.step(action_profile=(a1, (pi2, a2)))
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from typing import Tuple, List, Union, Dict, Any
|
|
2
|
-
import gymnasium as gym
|
|
3
2
|
import numpy as np
|
|
3
|
+
import numpy.typing as npt
|
|
4
4
|
import torch
|
|
5
5
|
import math
|
|
6
6
|
from csle_common.dao.simulation_config.base_env import BaseEnv
|
|
@@ -9,6 +9,7 @@ from gym_csle_stopping_game.dao.stopping_game_attacker_mdp_config import Stoppin
|
|
|
9
9
|
from csle_common.dao.simulation_config.simulation_trace import SimulationTrace
|
|
10
10
|
from gym_csle_stopping_game.util.stopping_game_util import StoppingGameUtil
|
|
11
11
|
import gym_csle_stopping_game.constants.constants as env_constants
|
|
12
|
+
from gym_csle_stopping_game.envs.stopping_game_env import StoppingGameEnv
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
class StoppingGameMdpAttackerEnv(BaseEnv):
|
|
@@ -23,7 +24,7 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
|
|
|
23
24
|
:param config: the configuration of the environment
|
|
24
25
|
"""
|
|
25
26
|
self.config = config
|
|
26
|
-
self.stopping_game_env =
|
|
27
|
+
self.stopping_game_env: StoppingGameEnv = StoppingGameEnv(config=self.config.stopping_game_config)
|
|
27
28
|
|
|
28
29
|
# Setup spaces
|
|
29
30
|
self.observation_space = self.config.stopping_game_config.attacker_observation_space()
|
|
@@ -33,41 +34,47 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
|
|
|
33
34
|
self.static_defender_strategy = self.config.defender_strategy
|
|
34
35
|
|
|
35
36
|
# Setup Config
|
|
36
|
-
self.viewer = None
|
|
37
|
+
self.viewer: Union[None, Any] = None
|
|
37
38
|
self.metadata = {
|
|
38
39
|
'render.modes': ['human', 'rgb_array'],
|
|
39
40
|
'video.frames_per_second': 50 # Video rendering speed
|
|
40
41
|
}
|
|
41
42
|
|
|
42
|
-
self.latest_defender_obs = None
|
|
43
|
-
self.latest_attacker_obs = None
|
|
44
|
-
self.model = None
|
|
43
|
+
self.latest_defender_obs: Union[None, List[Any], npt.NDArray[Any]] = None
|
|
44
|
+
self.latest_attacker_obs: Union[None, List[Any], npt.NDArray[Any]] = None
|
|
45
|
+
self.model: Union[None, Any] = None
|
|
45
46
|
|
|
46
47
|
# Reset
|
|
47
48
|
self.reset()
|
|
48
49
|
super().__init__()
|
|
49
50
|
|
|
50
|
-
def step(self, pi2: Union[
|
|
51
|
-
-> Tuple[
|
|
51
|
+
def step(self, pi2: Union[npt.NDArray[Any], int, float, np.int_, np.float_]) \
|
|
52
|
+
-> Tuple[npt.NDArray[Any], int, bool, bool, Dict[str, Any]]:
|
|
52
53
|
"""
|
|
53
54
|
Takes a step in the environment by executing the given action
|
|
54
55
|
|
|
55
56
|
:param pi2: attacker stage policy
|
|
56
57
|
:return: (obs, reward, terminated, truncated, info)
|
|
57
58
|
"""
|
|
58
|
-
if type(pi2) is int or type(pi2) is float or type(pi2) is np.int64 or type(pi2) is
|
|
59
|
-
or type(pi2) is np.float64:
|
|
59
|
+
if type(pi2) is int or type(pi2) is float or type(pi2) is np.int64 or type(pi2) is np.float64:
|
|
60
60
|
a2 = pi2
|
|
61
|
-
|
|
61
|
+
if self.latest_attacker_obs is None:
|
|
62
|
+
raise ValueError("Attacker observation is None")
|
|
63
|
+
pi2 = self.calculate_stage_policy(o=list(self.latest_attacker_obs), a2=int(a2))
|
|
62
64
|
else:
|
|
63
65
|
if self.model is not None:
|
|
64
|
-
|
|
66
|
+
if self.latest_attacker_obs is None:
|
|
67
|
+
raise ValueError("Attacker observation is None")
|
|
68
|
+
pi2 = self.calculate_stage_policy(o=list(self.latest_attacker_obs))
|
|
65
69
|
a2 = StoppingGameUtil.sample_attacker_action(pi2=pi2, s=self.stopping_game_env.state.s)
|
|
66
70
|
else:
|
|
67
71
|
pi2 = np.array(pi2)
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
72
|
+
try:
|
|
73
|
+
if self.latest_attacker_obs is None:
|
|
74
|
+
raise ValueError("Attacker observation is None")
|
|
75
|
+
pi2 = self.calculate_stage_policy(o=list(self.latest_attacker_obs))
|
|
76
|
+
except Exception:
|
|
77
|
+
pass
|
|
71
78
|
a2 = StoppingGameUtil.sample_attacker_action(pi2=pi2, s=self.stopping_game_env.state.s)
|
|
72
79
|
|
|
73
80
|
# a2 = pi2
|
|
@@ -83,7 +90,7 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
|
|
|
83
90
|
a1 = self.static_defender_strategy.action(o=self.latest_defender_obs)
|
|
84
91
|
|
|
85
92
|
# Step the game
|
|
86
|
-
o, r, d, _, info = self.stopping_game_env.step((a1, (pi2, a2)))
|
|
93
|
+
o, r, d, _, info = self.stopping_game_env.step((int(a1), (pi2, int(a2))))
|
|
87
94
|
self.latest_defender_obs = o[0]
|
|
88
95
|
self.latest_attacker_obs = o[1]
|
|
89
96
|
attacker_obs = o[1]
|
|
@@ -94,7 +101,7 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
|
|
|
94
101
|
|
|
95
102
|
return attacker_obs, r[1], d, d, info
|
|
96
103
|
|
|
97
|
-
def reset(self, seed: int = 0, soft: bool = False) -> Tuple[
|
|
104
|
+
def reset(self, seed: int = 0, soft: bool = False) -> Tuple[npt.NDArray[Any], Dict[str, Any]]:
|
|
98
105
|
"""
|
|
99
106
|
Resets the environment state, this should be called whenever step() returns <done>
|
|
100
107
|
|
|
@@ -104,7 +111,7 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
|
|
|
104
111
|
self.latest_defender_obs = o[0]
|
|
105
112
|
self.latest_attacker_obs = o[1]
|
|
106
113
|
attacker_obs = o[1]
|
|
107
|
-
info = {}
|
|
114
|
+
info: Dict[str, Any] = {}
|
|
108
115
|
return attacker_obs, info
|
|
109
116
|
|
|
110
117
|
def set_model(self, model) -> None:
|
|
@@ -116,7 +123,7 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
|
|
|
116
123
|
"""
|
|
117
124
|
self.model = model
|
|
118
125
|
|
|
119
|
-
def calculate_stage_policy(self, o: List, a2: int = 0) ->
|
|
126
|
+
def calculate_stage_policy(self, o: List[Any], a2: int = 0) -> npt.NDArray[Any]:
|
|
120
127
|
"""
|
|
121
128
|
Calculates the stage policy of a given model and observation
|
|
122
129
|
|
|
@@ -127,15 +134,14 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
|
|
|
127
134
|
stage_policy = []
|
|
128
135
|
for s in self.config.stopping_game_config.S:
|
|
129
136
|
if s != 2:
|
|
130
|
-
dist = [0, 0]
|
|
131
|
-
dist[a2] = 1
|
|
137
|
+
dist = [0.0, 0.0]
|
|
138
|
+
dist[a2] = 1.0
|
|
132
139
|
stage_policy.append(dist)
|
|
133
140
|
else:
|
|
134
141
|
stage_policy.append([0.5, 0.5])
|
|
135
142
|
return np.array(stage_policy)
|
|
136
143
|
if isinstance(self.model, MixedMultiThresholdStoppingPolicy):
|
|
137
|
-
|
|
138
|
-
return stage_policy
|
|
144
|
+
return np.array(self.model.stage_policy(o=o))
|
|
139
145
|
else:
|
|
140
146
|
b1 = o[1]
|
|
141
147
|
l = int(o[0])
|
|
@@ -146,18 +152,19 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
|
|
|
146
152
|
stage_policy.append(self._get_attacker_dist(obs=o))
|
|
147
153
|
else:
|
|
148
154
|
stage_policy.append([0.5, 0.5])
|
|
149
|
-
|
|
150
|
-
return stage_policy
|
|
155
|
+
return np.array(stage_policy)
|
|
151
156
|
|
|
152
|
-
def _get_attacker_dist(self, obs: List) -> List:
|
|
157
|
+
def _get_attacker_dist(self, obs: List[Any]) -> List[float]:
|
|
153
158
|
"""
|
|
154
159
|
Utility function for getting the attacker's action distribution based on a given observation
|
|
155
160
|
|
|
156
161
|
:param obs: the given observation
|
|
157
162
|
:return: the action distribution
|
|
158
163
|
"""
|
|
159
|
-
|
|
160
|
-
|
|
164
|
+
np_obs = np.array([obs])
|
|
165
|
+
if self.model is None:
|
|
166
|
+
raise ValueError("Model is None")
|
|
167
|
+
actions, values, log_prob = self.model.policy.forward(obs=torch.tensor(np_obs).to(self.model.device))
|
|
161
168
|
action = actions[0]
|
|
162
169
|
if action == 1:
|
|
163
170
|
stop_prob = math.exp(log_prob)
|
|
@@ -211,7 +218,7 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
|
|
|
211
218
|
Closes the viewer (cleanup)
|
|
212
219
|
:return: None
|
|
213
220
|
"""
|
|
214
|
-
if self.viewer:
|
|
221
|
+
if self.viewer is not None:
|
|
215
222
|
self.viewer.close()
|
|
216
223
|
self.viewer = None
|
|
217
224
|
|
|
@@ -244,4 +251,4 @@ class StoppingGameMdpAttackerEnv(BaseEnv):
|
|
|
244
251
|
self.reset()
|
|
245
252
|
else:
|
|
246
253
|
action_idx = int(raw_input)
|
|
247
|
-
_, _, done, _ = self.step(pi2=action_idx)
|
|
254
|
+
_, _, done, _, _ = self.step(pi2=action_idx)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
from typing import Tuple, List, Dict, Any
|
|
2
|
-
import gymnasium as gym
|
|
1
|
+
from typing import Tuple, List, Dict, Any, Union
|
|
3
2
|
import numpy as np
|
|
3
|
+
import numpy.typing as npt
|
|
4
4
|
from csle_common.dao.simulation_config.base_env import BaseEnv
|
|
5
5
|
from gym_csle_stopping_game.dao.stopping_game_defender_pomdp_config import StoppingGameDefenderPomdpConfig
|
|
6
6
|
from csle_common.dao.simulation_config.simulation_trace import SimulationTrace
|
|
@@ -27,7 +27,7 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
|
|
|
27
27
|
:param attacker_strategy: the strategy of the static attacker
|
|
28
28
|
"""
|
|
29
29
|
self.config = config
|
|
30
|
-
self.stopping_game_env =
|
|
30
|
+
self.stopping_game_env = StoppingGameEnv(config=self.config.stopping_game_config)
|
|
31
31
|
|
|
32
32
|
# Setup spaces
|
|
33
33
|
self.observation_space = self.config.stopping_game_config.defender_observation_space()
|
|
@@ -37,18 +37,18 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
|
|
|
37
37
|
self.static_attacker_strategy = self.config.attacker_strategy
|
|
38
38
|
|
|
39
39
|
# Setup Config
|
|
40
|
-
self.viewer = None
|
|
40
|
+
self.viewer: Union[None, Any] = None
|
|
41
41
|
self.metadata = {
|
|
42
42
|
'render.modes': ['human', 'rgb_array'],
|
|
43
43
|
'video.frames_per_second': 50 # Video rendering speed
|
|
44
44
|
}
|
|
45
45
|
|
|
46
|
-
self.latest_attacker_obs = None
|
|
46
|
+
self.latest_attacker_obs: Union[None, npt.NDArray[Any]] = None
|
|
47
47
|
# Reset
|
|
48
48
|
self.reset()
|
|
49
49
|
super().__init__()
|
|
50
50
|
|
|
51
|
-
def step(self, a1: int) -> Tuple[
|
|
51
|
+
def step(self, a1: int) -> Tuple[npt.NDArray[Any], int, bool, bool, Dict[str, Any]]:
|
|
52
52
|
"""
|
|
53
53
|
Takes a step in the environment by executing the given action
|
|
54
54
|
|
|
@@ -66,7 +66,7 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
|
|
|
66
66
|
|
|
67
67
|
return defender_obs, r[0], d, d, info
|
|
68
68
|
|
|
69
|
-
def step_test(self, a1: int, sample_Z) -> Tuple[
|
|
69
|
+
def step_test(self, a1: int, sample_Z) -> Tuple[npt.NDArray[Any], int, bool, Dict[str, Any]]:
|
|
70
70
|
"""
|
|
71
71
|
Takes a step in the environment by executing the given action
|
|
72
72
|
|
|
@@ -84,7 +84,7 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
|
|
|
84
84
|
|
|
85
85
|
return defender_obs, r[0], d, info
|
|
86
86
|
|
|
87
|
-
def reset(self, seed: int = 0, soft: bool = False) -> Tuple[
|
|
87
|
+
def reset(self, seed: int = 0, soft: bool = False) -> Tuple[npt.NDArray[Any], Dict[str, Any]]:
|
|
88
88
|
"""
|
|
89
89
|
Resets the environment state, this should be called whenever step() returns <done>
|
|
90
90
|
|
|
@@ -93,7 +93,7 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
|
|
|
93
93
|
o, _ = self.stopping_game_env.reset()
|
|
94
94
|
self.latest_attacker_obs = o[1]
|
|
95
95
|
defender_obs = o[0]
|
|
96
|
-
dict = {}
|
|
96
|
+
dict: Dict[str, Any] = {}
|
|
97
97
|
return defender_obs, dict
|
|
98
98
|
|
|
99
99
|
def render(self, mode: str = 'human'):
|
|
@@ -105,7 +105,14 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
|
|
|
105
105
|
"""
|
|
106
106
|
raise NotImplementedError("Rendering is not implemented for this environment")
|
|
107
107
|
|
|
108
|
-
def step_trace(self, trace: EmulationTrace, a1: int) -> Tuple[
|
|
108
|
+
def step_trace(self, trace: EmulationTrace, a1: int) -> Tuple[npt.NDArray[Any], int, bool, Dict[str, Any]]:
|
|
109
|
+
"""
|
|
110
|
+
Utility method for stopping a pre-recorded trace
|
|
111
|
+
|
|
112
|
+
:param trace: the trace to step
|
|
113
|
+
:param a1: the action to step with
|
|
114
|
+
:return: the result of the step according to the trace
|
|
115
|
+
"""
|
|
109
116
|
pi2 = np.array(self.static_attacker_strategy.stage_policy(self.latest_attacker_obs))
|
|
110
117
|
o, r, d, info = self.stopping_game_env.step_trace(trace=trace, a1=a1, pi2=pi2)
|
|
111
118
|
self.latest_attacker_obs = o[1]
|
|
@@ -118,6 +125,17 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
|
|
|
118
125
|
defender_policy: Policy,
|
|
119
126
|
emulation_env_config: EmulationEnvConfig, simulation_env_config: SimulationEnvConfig) \
|
|
120
127
|
-> List[EmulationSimulationTrace]:
|
|
128
|
+
"""
|
|
129
|
+
Utility function for evaluating policies in the emulation environment
|
|
130
|
+
|
|
131
|
+
:param env: the environment to use for evaluation
|
|
132
|
+
:param n_episodes: the number of episodes to use for evaluation
|
|
133
|
+
:param intrusion_seq: the sequence of intrusion actions to use for evaluation
|
|
134
|
+
:param defender_policy: the defender policy to use for evaluation
|
|
135
|
+
:param emulation_env_config: the configuration of the emulation environment to use for evaluation
|
|
136
|
+
:param simulation_env_config: the configuration of the simulation environment to use for evaluation
|
|
137
|
+
:return: traces with the evaluation results
|
|
138
|
+
"""
|
|
121
139
|
return StoppingGameEnv.emulation_evaluation(
|
|
122
140
|
env=env.stopping_game_env, n_episodes=n_episodes, intrusion_seq=intrusion_seq,
|
|
123
141
|
defender_policy=defender_policy, attacker_policy=env.static_attacker_strategy,
|
|
@@ -160,7 +178,7 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
|
|
|
160
178
|
Closes the viewer (cleanup)
|
|
161
179
|
:return: None
|
|
162
180
|
"""
|
|
163
|
-
if self.viewer:
|
|
181
|
+
if self.viewer is not None:
|
|
164
182
|
self.viewer.close()
|
|
165
183
|
self.viewer = None
|
|
166
184
|
|
|
@@ -193,4 +211,4 @@ class StoppingGamePomdpDefenderEnv(BaseEnv):
|
|
|
193
211
|
self.reset()
|
|
194
212
|
else:
|
|
195
213
|
action_idx = int(raw_input)
|
|
196
|
-
_, _, done, _ = self.step(
|
|
214
|
+
_, _, done, _, _ = self.step(a1=action_idx)
|
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
from typing import List, Dict, Tuple
|
|
1
|
+
from typing import List, Dict, Tuple, Any
|
|
2
2
|
import numpy as np
|
|
3
|
+
import numpy.typing as npt
|
|
3
4
|
from scipy.stats import betabinom
|
|
4
5
|
from csle_common.dao.system_identification.emulation_statistics import EmulationStatistics
|
|
5
6
|
from csle_common.dao.simulation_config.observation_space_config import ObservationSpaceConfig
|
|
@@ -14,26 +15,37 @@ class StoppingGameUtil:
|
|
|
14
15
|
"""
|
|
15
16
|
|
|
16
17
|
@staticmethod
|
|
17
|
-
def b1() -> np.
|
|
18
|
+
def b1() -> npt.NDArray[np.int_]:
|
|
18
19
|
"""
|
|
20
|
+
Gets the initial belief
|
|
21
|
+
|
|
19
22
|
:return: the initial belief
|
|
20
23
|
"""
|
|
21
24
|
return np.array([1, 0, 0])
|
|
22
25
|
|
|
23
26
|
@staticmethod
|
|
24
27
|
def state_space():
|
|
28
|
+
"""
|
|
29
|
+
Gets the state space
|
|
30
|
+
|
|
31
|
+
:return: the state space of the game
|
|
32
|
+
"""
|
|
25
33
|
return np.array([0, 1, 2])
|
|
26
34
|
|
|
27
35
|
@staticmethod
|
|
28
|
-
def defender_actions() -> np.
|
|
36
|
+
def defender_actions() -> npt.NDArray[np.int_]:
|
|
29
37
|
"""
|
|
38
|
+
Gets the action space of the defender
|
|
39
|
+
|
|
30
40
|
:return: the action space of the defender
|
|
31
41
|
"""
|
|
32
42
|
return np.array([0, 1])
|
|
33
43
|
|
|
34
44
|
@staticmethod
|
|
35
|
-
def attacker_actions() -> np.
|
|
45
|
+
def attacker_actions() -> npt.NDArray[np.int_]:
|
|
36
46
|
"""
|
|
47
|
+
Gets the action space of the attacker
|
|
48
|
+
|
|
37
49
|
:return: the action space of the attacker
|
|
38
50
|
"""
|
|
39
51
|
return np.array([0, 1])
|
|
@@ -44,13 +56,15 @@ class StoppingGameUtil:
|
|
|
44
56
|
Returns the observation space of size n
|
|
45
57
|
|
|
46
58
|
:param n: the maximum observation
|
|
47
|
-
:return:
|
|
59
|
+
:return: the observation space
|
|
48
60
|
"""
|
|
49
61
|
return np.array(list(range(n + 1)))
|
|
50
62
|
|
|
51
63
|
@staticmethod
|
|
52
|
-
def reward_tensor(R_SLA: int, R_INT: int, R_COST: int, L: int, R_ST: int) ->
|
|
64
|
+
def reward_tensor(R_SLA: int, R_INT: int, R_COST: int, L: int, R_ST: int) -> npt.NDArray[Any]:
|
|
53
65
|
"""
|
|
66
|
+
Gets the reward tensor
|
|
67
|
+
|
|
54
68
|
:param R_SLA: the R_SLA constant
|
|
55
69
|
:param R_INT: the R_INT constant
|
|
56
70
|
:param R_COST: the R_COST constant
|
|
@@ -76,12 +90,13 @@ class StoppingGameUtil:
|
|
|
76
90
|
]
|
|
77
91
|
]
|
|
78
92
|
R_l.append(R)
|
|
79
|
-
|
|
80
|
-
return R
|
|
93
|
+
return np.array(R_l)
|
|
81
94
|
|
|
82
95
|
@staticmethod
|
|
83
|
-
def transition_tensor(L: int, p: float) ->
|
|
96
|
+
def transition_tensor(L: int, p: float) -> npt.NDArray[Any]:
|
|
84
97
|
"""
|
|
98
|
+
Gets the transition tensor
|
|
99
|
+
|
|
85
100
|
:param L: the maximum number of stop actions
|
|
86
101
|
:return: a |L|x|A1|x|A2||S|^2 tensor
|
|
87
102
|
"""
|
|
@@ -156,15 +171,14 @@ class StoppingGameUtil:
|
|
|
156
171
|
]
|
|
157
172
|
]
|
|
158
173
|
T_l.append(T)
|
|
159
|
-
|
|
160
|
-
return T
|
|
174
|
+
return np.array(T_l)
|
|
161
175
|
|
|
162
176
|
@staticmethod
|
|
163
177
|
def observation_tensor_from_emulation_statistics(emulation_statistic: EmulationStatistics,
|
|
164
178
|
observation_space_defender: ObservationSpaceConfig,
|
|
165
179
|
joint_action_space: JointActionSpaceConfig,
|
|
166
180
|
state_space: StateSpaceConfig) \
|
|
167
|
-
-> Tuple[
|
|
181
|
+
-> Tuple[npt.NDArray[Any], Dict[str, List[Any]]]:
|
|
168
182
|
"""
|
|
169
183
|
Returns an observation tensor based on measured emulation statistics
|
|
170
184
|
|
|
@@ -174,9 +188,9 @@ class StoppingGameUtil:
|
|
|
174
188
|
:param state_space: the state space
|
|
175
189
|
:return: a |A1|x|A2|x|S|x|O| tensor
|
|
176
190
|
"""
|
|
177
|
-
intrusion_severe_alerts_probabilities = []
|
|
178
|
-
intrusion_warning_alerts_probabilities = []
|
|
179
|
-
intrusion_login_attempts_probabilities = []
|
|
191
|
+
intrusion_severe_alerts_probabilities: List[float] = []
|
|
192
|
+
intrusion_warning_alerts_probabilities: List[float] = []
|
|
193
|
+
intrusion_login_attempts_probabilities: List[float] = []
|
|
180
194
|
norm = sum(emulation_statistic.conditionals_counts["intrusion"]["severe_alerts"].values())
|
|
181
195
|
for severe_alert_obs in observation_space_defender.component_observations["severe_alerts"]:
|
|
182
196
|
count = emulation_statistic.conditionals_counts["intrusion"]["severe_alerts"][severe_alert_obs.id]
|
|
@@ -214,14 +228,14 @@ class StoppingGameUtil:
|
|
|
214
228
|
login_attempts_a1_a2_s_o_dist = []
|
|
215
229
|
for a2 in range(len(joint_action_space.action_spaces[1].actions)):
|
|
216
230
|
a2_s_o_dist = []
|
|
217
|
-
severe_alerts_a2_s_o_dist = []
|
|
218
|
-
warning_alerts_a2_s_o_dist = []
|
|
219
|
-
login_attempts_a2_s_o_dist = []
|
|
231
|
+
severe_alerts_a2_s_o_dist: List[List[float]] = []
|
|
232
|
+
warning_alerts_a2_s_o_dist: List[List[float]] = []
|
|
233
|
+
login_attempts_a2_s_o_dist: List[List[float]] = []
|
|
220
234
|
for s in range(len(state_space.states)):
|
|
221
235
|
s_o_dist = []
|
|
222
|
-
severe_alerts_s_o_dist = []
|
|
223
|
-
warning_alerts_s_o_dist = []
|
|
224
|
-
login_attempts_s_o_dist = []
|
|
236
|
+
severe_alerts_s_o_dist: List[float] = []
|
|
237
|
+
warning_alerts_s_o_dist: List[float] = []
|
|
238
|
+
login_attempts_s_o_dist: List[float] = []
|
|
225
239
|
for o in range(len(observation_space_defender.observations)):
|
|
226
240
|
obs_vector = observation_space_defender.observation_id_to_observation_id_vector[o]
|
|
227
241
|
if s == 0:
|
|
@@ -241,8 +255,8 @@ class StoppingGameUtil:
|
|
|
241
255
|
s_o_dist.append(p)
|
|
242
256
|
a2_s_o_dist.append(s_o_dist)
|
|
243
257
|
severe_alerts_a2_s_o_dist.append(severe_alerts_s_o_dist)
|
|
244
|
-
warning_alerts_a2_s_o_dist.append(
|
|
245
|
-
login_attempts_a2_s_o_dist.append(
|
|
258
|
+
warning_alerts_a2_s_o_dist.append(warning_alerts_s_o_dist)
|
|
259
|
+
login_attempts_a2_s_o_dist.append(login_attempts_s_o_dist)
|
|
246
260
|
a1_a2_s_o_dist.append(a2_s_o_dist)
|
|
247
261
|
severe_alerts_a1_a2_s_o_dist.append(severe_alerts_a2_s_o_dist)
|
|
248
262
|
warning_alerts_a1_a2_s_o_dist.append(warning_alerts_a2_s_o_dist)
|
|
@@ -301,7 +315,7 @@ class StoppingGameUtil:
|
|
|
301
315
|
return Z
|
|
302
316
|
|
|
303
317
|
@staticmethod
|
|
304
|
-
def sample_next_state(T:
|
|
318
|
+
def sample_next_state(T: npt.NDArray[Any], l: int, s: int, a1: int, a2: int, S: npt.NDArray[np.int_]) -> int:
|
|
305
319
|
"""
|
|
306
320
|
Samples the next state
|
|
307
321
|
|
|
@@ -316,22 +330,20 @@ class StoppingGameUtil:
|
|
|
316
330
|
state_probs = []
|
|
317
331
|
for s_prime in S:
|
|
318
332
|
state_probs.append(T[l - 1][a1][a2][s][s_prime])
|
|
319
|
-
|
|
320
|
-
return s_prime
|
|
333
|
+
return int(np.random.choice(np.arange(0, len(S)), p=state_probs))
|
|
321
334
|
|
|
322
335
|
@staticmethod
|
|
323
|
-
def sample_initial_state(b1: np.
|
|
336
|
+
def sample_initial_state(b1: npt.NDArray[np.float_]) -> int:
|
|
324
337
|
"""
|
|
325
338
|
Samples the initial state
|
|
326
339
|
|
|
327
340
|
:param b1: the initial belief
|
|
328
341
|
:return: s1
|
|
329
342
|
"""
|
|
330
|
-
|
|
331
|
-
return s1
|
|
343
|
+
return int(np.random.choice(np.arange(0, len(b1)), p=b1))
|
|
332
344
|
|
|
333
345
|
@staticmethod
|
|
334
|
-
def sample_next_observation(Z:
|
|
346
|
+
def sample_next_observation(Z: npt.NDArray[Any], s_prime: int, O: npt.NDArray[np.int_]) -> int:
|
|
335
347
|
"""
|
|
336
348
|
Samples the next observation
|
|
337
349
|
|
|
@@ -351,7 +363,7 @@ class StoppingGameUtil:
|
|
|
351
363
|
return int(o)
|
|
352
364
|
|
|
353
365
|
@staticmethod
|
|
354
|
-
def bayes_filter(s_prime: int, o: int, a1: int, b: np.
|
|
366
|
+
def bayes_filter(s_prime: int, o: int, a1: int, b: npt.NDArray[np.float_], pi2: npt.NDArray[Any], l: int,
|
|
355
367
|
config: StoppingGameConfig) -> float:
|
|
356
368
|
"""
|
|
357
369
|
A Bayesian filter to compute the belief of player 1
|
|
@@ -389,7 +401,7 @@ class StoppingGameUtil:
|
|
|
389
401
|
return b_prime_s_prime
|
|
390
402
|
|
|
391
403
|
@staticmethod
|
|
392
|
-
def p_o_given_b_a1_a2(o: int, b: List, a1: int, a2: int, config: StoppingGameConfig) -> float:
|
|
404
|
+
def p_o_given_b_a1_a2(o: int, b: List[float], a1: int, a2: int, config: StoppingGameConfig) -> float:
|
|
393
405
|
"""
|
|
394
406
|
Computes P[o|a,b]
|
|
395
407
|
|
|
@@ -408,8 +420,8 @@ class StoppingGameUtil:
|
|
|
408
420
|
return prob
|
|
409
421
|
|
|
410
422
|
@staticmethod
|
|
411
|
-
def next_belief(o: int, a1: int, b: np.
|
|
412
|
-
a2: int = 0, s: int = 0) -> np.
|
|
423
|
+
def next_belief(o: int, a1: int, b: npt.NDArray[np.float_], pi2: npt.NDArray[Any],
|
|
424
|
+
config: StoppingGameConfig, l: int, a2: int = 0, s: int = 0) -> npt.NDArray[np.float_]:
|
|
413
425
|
"""
|
|
414
426
|
Computes the next belief using a Bayesian filter
|
|
415
427
|
|
|
@@ -434,7 +446,7 @@ class StoppingGameUtil:
|
|
|
434
446
|
return b_prime
|
|
435
447
|
|
|
436
448
|
@staticmethod
|
|
437
|
-
def sample_attacker_action(pi2:
|
|
449
|
+
def sample_attacker_action(pi2: npt.NDArray[Any], s: int) -> int:
|
|
438
450
|
"""
|
|
439
451
|
Samples the attacker action
|
|
440
452
|
|
|
@@ -442,5 +454,4 @@ class StoppingGameUtil:
|
|
|
442
454
|
:param s: the game state
|
|
443
455
|
:return: a2 (the attacker action
|
|
444
456
|
"""
|
|
445
|
-
|
|
446
|
-
return a2
|
|
457
|
+
return int(np.random.choice(np.arange(0, len(pi2[s])), p=pi2[s]))
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: gym-csle-stopping-game
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.20
|
|
4
4
|
Summary: OpenAI gym reinforcement learning environment of a Dynkin (Optimal stopping) game in CSLE
|
|
5
5
|
Author: Kim Hammar
|
|
6
6
|
Author-email: hammar.kim@gmail.com
|
|
@@ -15,6 +15,3 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
15
15
|
Classifier: Intended Audience :: Science/Research
|
|
16
16
|
Requires-Python: >=3.8
|
|
17
17
|
Provides-Extra: testing
|
|
18
|
-
|
|
19
|
-
UNKNOWN
|
|
20
|
-
|
|
@@ -21,4 +21,5 @@ src/gym_csle_stopping_game/envs/stopping_game_env.py
|
|
|
21
21
|
src/gym_csle_stopping_game/envs/stopping_game_mdp_attacker_env.py
|
|
22
22
|
src/gym_csle_stopping_game/envs/stopping_game_pomdp_defender_env.py
|
|
23
23
|
src/gym_csle_stopping_game/util/__init__.py
|
|
24
|
-
src/gym_csle_stopping_game/util/stopping_game_util.py
|
|
24
|
+
src/gym_csle_stopping_game/util/stopping_game_util.py
|
|
25
|
+
tests/test_stopping_game_util.py
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
gymnasium>=0.27.1
|
|
2
|
+
csle-base>=0.2.20
|
|
3
|
+
csle-common>=0.2.20
|
|
4
|
+
csle-attacker>=0.2.20
|
|
5
|
+
csle-defender>=0.2.20
|
|
6
|
+
csle-collector>=0.2.9
|
|
7
|
+
|
|
8
|
+
[testing]
|
|
9
|
+
pytest>=6.0
|
|
10
|
+
pytest-cov>=2.0
|
|
11
|
+
pytest-mock>=3.6.0
|
|
12
|
+
pytest-grpc>=0.8.0
|
|
13
|
+
mypy>=1.4.1
|
|
14
|
+
mypy-extensions>=1.0.0
|
|
15
|
+
mypy-protobuf>=3.5.0
|
|
16
|
+
types-PyYAML>=6.0.12.11
|
|
17
|
+
types-paramiko>=3.2.0.0
|
|
18
|
+
types-protobuf>=4.23.0.3
|
|
19
|
+
types-requests>=2.31.0.1
|
|
20
|
+
types-urllib3>=1.26.25.13
|
|
21
|
+
flake8>=3.9
|
|
22
|
+
flake8-rst-docstrings>=0.3.0
|
|
23
|
+
tox>=3.24
|
|
24
|
+
sphinx>=5.3.0
|
|
25
|
+
sphinxcontrib-napoleon>=0.7
|
|
26
|
+
sphinx-rtd-theme>=1.1.1
|
|
27
|
+
twine>=4.0.2
|
|
28
|
+
build>=0.10.0
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import pytest
|
|
3
|
+
from gym_csle_stopping_game.util.stopping_game_util import StoppingGameUtil
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class TestStoppingGameUtilSuite(object):
|
|
7
|
+
"""
|
|
8
|
+
Test suite for stopping_game_util.py
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
pytest.logger = logging.getLogger("stopping_game_util_tests")
|
|
12
|
+
|
|
13
|
+
def test_b1(self) -> None:
|
|
14
|
+
"""
|
|
15
|
+
Tests the b1 function
|
|
16
|
+
|
|
17
|
+
:return: None
|
|
18
|
+
"""
|
|
19
|
+
assert sum(StoppingGameUtil.b1()) == 1
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = '0.2.18'
|
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
gymnasium>=0.27.1
|
|
2
|
-
csle-base>=0.2.18
|
|
3
|
-
csle-common>=0.2.18
|
|
4
|
-
csle-attacker>=0.2.18
|
|
5
|
-
csle-defender>=0.2.18
|
|
6
|
-
csle-collector>=0.2.9
|
|
7
|
-
|
|
8
|
-
[testing]
|
|
9
|
-
pytest>=6.0
|
|
10
|
-
pytest-cov>=2.0
|
|
11
|
-
pytest-mock>=3.6.0
|
|
12
|
-
pytest-grpc>=0.8.0
|
|
13
|
-
mypy>=1.3.0
|
|
14
|
-
flake8>=3.9
|
|
15
|
-
tox>=3.24
|
|
16
|
-
sphinx>=5.3.0
|
|
17
|
-
sphinxcontrib-napoleon>=0.7
|
|
18
|
-
sphinx-rtd-theme>=1.1.1
|
|
19
|
-
twine>=4.0.2
|
|
20
|
-
build>=0.10.0
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|