gym-csle-stopping-game 0.9.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gym_csle_stopping_game/__init__.py +23 -0
- gym_csle_stopping_game/__version__.py +1 -0
- gym_csle_stopping_game/constants/__init__.py +0 -0
- gym_csle_stopping_game/constants/constants.py +40 -0
- gym_csle_stopping_game/dao/__init__.py +0 -0
- gym_csle_stopping_game/dao/stopping_game_attacker_mdp_config.py +86 -0
- gym_csle_stopping_game/dao/stopping_game_config.py +165 -0
- gym_csle_stopping_game/dao/stopping_game_defender_pomdp_config.py +92 -0
- gym_csle_stopping_game/dao/stopping_game_state.py +98 -0
- gym_csle_stopping_game/envs/__init__.py +1 -0
- gym_csle_stopping_game/envs/stopping_game_env.py +393 -0
- gym_csle_stopping_game/envs/stopping_game_mdp_attacker_env.py +282 -0
- gym_csle_stopping_game/envs/stopping_game_pomdp_defender_env.py +233 -0
- gym_csle_stopping_game/util/__init__.py +0 -0
- gym_csle_stopping_game/util/stopping_game_util.py +699 -0
- gym_csle_stopping_game-0.9.24.dist-info/METADATA +414 -0
- gym_csle_stopping_game-0.9.24.dist-info/RECORD +19 -0
- gym_csle_stopping_game-0.9.24.dist-info/WHEEL +5 -0
- gym_csle_stopping_game-0.9.24.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Register OpenAI Envs
|
|
3
|
+
"""
|
|
4
|
+
from . __version__ import __version__
|
|
5
|
+
from gymnasium.envs.registration import register
|
|
6
|
+
|
|
7
|
+
register(
|
|
8
|
+
id='csle-stopping-game-v1',
|
|
9
|
+
entry_point='gym_csle_stopping_game.envs.stopping_game_env:StoppingGameEnv',
|
|
10
|
+
kwargs={'config': None}
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
register(
|
|
14
|
+
id='csle-stopping-game-mdp-attacker-v1',
|
|
15
|
+
entry_point='gym_csle_stopping_game.envs.stopping_game_mdp_attacker_env:StoppingGameMdpAttackerEnv',
|
|
16
|
+
kwargs={'config': None}
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
register(
|
|
20
|
+
id='csle-stopping-game-pomdp-defender-v1',
|
|
21
|
+
entry_point='gym_csle_stopping_game.envs.stopping_game_pomdp_defender_env:StoppingGamePomdpDefenderEnv',
|
|
22
|
+
kwargs={'config': None}
|
|
23
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = '0.9.24'
|
|
File without changes
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Constants for gym-csle-stopping-game
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class STATIC_DEFENDER_STRATEGIES:
|
|
7
|
+
"""
|
|
8
|
+
String constants representing static defender strategies
|
|
9
|
+
"""
|
|
10
|
+
RANDOM = "random"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class STATIC_ATTACKER_STRATEGIES:
|
|
14
|
+
"""
|
|
15
|
+
String constants representing static attacker strategies
|
|
16
|
+
"""
|
|
17
|
+
RANDOM = "random"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ENV_METRICS:
|
|
21
|
+
"""
|
|
22
|
+
String constants representing environment metrics
|
|
23
|
+
"""
|
|
24
|
+
INTRUSION_LENGTH = "intrusion_length"
|
|
25
|
+
INTRUSION_START = "intrusion_start"
|
|
26
|
+
WEIGHTED_INTRUSION_PREDICTION_DISTANCE = "weighted_intrusion_prediction_distance"
|
|
27
|
+
START_POINT_CORRECT = "start_point_correct"
|
|
28
|
+
INTRUSION_END = "intrusion_end"
|
|
29
|
+
RETURN = "R"
|
|
30
|
+
TIME_HORIZON = "T"
|
|
31
|
+
STOP = "stop"
|
|
32
|
+
STOPS_REMAINING = "l"
|
|
33
|
+
STATE = "s"
|
|
34
|
+
DEFENDER_ACTION = "a1"
|
|
35
|
+
ATTACKER_ACTION = "a2"
|
|
36
|
+
OBSERVATION = "o"
|
|
37
|
+
BELIEF = "b"
|
|
38
|
+
TIME_STEP = "t"
|
|
39
|
+
AVERAGE_DEFENDER_BASELINE_STOP_ON_FIRST_ALERT_RETURN = "average_defender_baseline_stop_on_first_alert_return"
|
|
40
|
+
AVERAGE_UPPER_BOUND_RETURN = "average_upper_bound_return"
|
|
File without changes
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
from typing import Dict, Any
|
|
2
|
+
from gym_csle_stopping_game.dao.stopping_game_config import StoppingGameConfig
|
|
3
|
+
from csle_common.dao.simulation_config.simulation_env_input_config import SimulationEnvInputConfig
|
|
4
|
+
from csle_common.dao.training.policy import Policy
|
|
5
|
+
from csle_common.dao.training.random_policy import RandomPolicy
|
|
6
|
+
from csle_common.dao.training.multi_threshold_stopping_policy import MultiThresholdStoppingPolicy
|
|
7
|
+
from csle_common.dao.training.ppo_policy import PPOPolicy
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class StoppingGameAttackerMdpConfig(SimulationEnvInputConfig):
|
|
11
|
+
"""
|
|
12
|
+
DTO class representing the configuration of the MDP environnment of the attacker
|
|
13
|
+
when facing a static defender policy
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(self, env_name: str, stopping_game_config: StoppingGameConfig, defender_strategy: Policy,
|
|
17
|
+
stopping_game_name: str = "csle-stopping-game-v1"):
|
|
18
|
+
"""
|
|
19
|
+
Initializes the DTO
|
|
20
|
+
|
|
21
|
+
:param env_name: the environment name
|
|
22
|
+
:param stopping_game_config: the underlying stopping game config
|
|
23
|
+
:param defender_strategy: the static defender strategy name
|
|
24
|
+
:param stopping_game_name: the underlying stopping game name
|
|
25
|
+
"""
|
|
26
|
+
super().__init__()
|
|
27
|
+
self.env_name = env_name
|
|
28
|
+
self.stopping_game_config = stopping_game_config
|
|
29
|
+
self.defender_strategy = defender_strategy
|
|
30
|
+
self.stopping_game_name = stopping_game_name
|
|
31
|
+
|
|
32
|
+
@staticmethod
|
|
33
|
+
def from_dict(d: Dict[str, Any]) -> "StoppingGameAttackerMdpConfig":
|
|
34
|
+
"""
|
|
35
|
+
Converts a dict representation to an instance
|
|
36
|
+
|
|
37
|
+
:param d: the dict to convert
|
|
38
|
+
:return: the created instance
|
|
39
|
+
"""
|
|
40
|
+
try:
|
|
41
|
+
defender_strategy = MultiThresholdStoppingPolicy.from_dict(d["defender_strategy"])
|
|
42
|
+
except Exception:
|
|
43
|
+
try:
|
|
44
|
+
defender_strategy = RandomPolicy.from_dict(d["defender_strategy"])
|
|
45
|
+
except Exception:
|
|
46
|
+
defender_strategy = PPOPolicy.from_dict(d["defender_strategy"])
|
|
47
|
+
obj = StoppingGameAttackerMdpConfig(
|
|
48
|
+
stopping_game_config=StoppingGameConfig.from_dict(d["stopping_game_config"]),
|
|
49
|
+
defender_strategy=defender_strategy,
|
|
50
|
+
stopping_game_name=d["stopping_game_name"], env_name=d["env_name"]
|
|
51
|
+
)
|
|
52
|
+
return obj
|
|
53
|
+
|
|
54
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
55
|
+
"""
|
|
56
|
+
Converts the object to a dict representation
|
|
57
|
+
|
|
58
|
+
:return: a dict representation of the object
|
|
59
|
+
"""
|
|
60
|
+
d: Dict[str, Any] = {}
|
|
61
|
+
d["stopping_game_config"] = self.stopping_game_config.to_dict()
|
|
62
|
+
d["defender_strategy"] = self.defender_strategy.to_dict()
|
|
63
|
+
d["stopping_game_name"] = self.stopping_game_name
|
|
64
|
+
d["env_name"] = self.env_name
|
|
65
|
+
return d
|
|
66
|
+
|
|
67
|
+
def __str__(self):
|
|
68
|
+
"""
|
|
69
|
+
:return: a string representation of the object
|
|
70
|
+
"""
|
|
71
|
+
return f"stopping_game_config: {self.stopping_game_config}, defender_strategy:{self.defender_strategy}, " \
|
|
72
|
+
f"stopping_game_name:{self.stopping_game_name}, env_name: {self.env_name}"
|
|
73
|
+
|
|
74
|
+
@staticmethod
|
|
75
|
+
def from_json_file(json_file_path: str) -> "StoppingGameAttackerMdpConfig":
|
|
76
|
+
"""
|
|
77
|
+
Reads a json file and converts it to a DTO
|
|
78
|
+
|
|
79
|
+
:param json_file_path: the json file path
|
|
80
|
+
:return: the converted DTO
|
|
81
|
+
"""
|
|
82
|
+
import io
|
|
83
|
+
import json
|
|
84
|
+
with io.open(json_file_path, 'r') as f:
|
|
85
|
+
json_str = f.read()
|
|
86
|
+
return StoppingGameAttackerMdpConfig.from_dict(json.loads(json_str))
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
from typing import Dict, Any
|
|
2
|
+
import gymnasium as gym
|
|
3
|
+
import numpy as np
|
|
4
|
+
import numpy.typing as npt
|
|
5
|
+
from csle_common.dao.simulation_config.simulation_env_input_config import SimulationEnvInputConfig
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class StoppingGameConfig(SimulationEnvInputConfig):
|
|
9
|
+
"""
|
|
10
|
+
DTO class containing the configuration of the stopping game
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
def __init__(self, env_name: str,
|
|
14
|
+
T: npt.NDArray[Any], O: npt.NDArray[np.int32], Z: npt.NDArray[Any],
|
|
15
|
+
R: npt.NDArray[Any], S: npt.NDArray[np.int32], A1: npt.NDArray[np.int32],
|
|
16
|
+
A2: npt.NDArray[np.int32], L: int, R_INT: int, R_COST: int, R_SLA: int, R_ST: int,
|
|
17
|
+
b1: npt.NDArray[np.float64],
|
|
18
|
+
save_dir: str, checkpoint_traces_freq: int, gamma: float = 1, compute_beliefs: bool = True,
|
|
19
|
+
save_trace: bool = True) -> None:
|
|
20
|
+
"""
|
|
21
|
+
Initializes the DTO
|
|
22
|
+
|
|
23
|
+
:param env_name: the name of the environment
|
|
24
|
+
:param T: the transition tensor
|
|
25
|
+
:param O: the observation space
|
|
26
|
+
:param Z: the observation tensor
|
|
27
|
+
:param R: the reward function
|
|
28
|
+
:param S: the state space
|
|
29
|
+
:param A1: the action space of the defender
|
|
30
|
+
:param A2: the action space of the attacker
|
|
31
|
+
:param L: the maximum number of stops of the defender
|
|
32
|
+
:param R_INT: the R_INT constant for the reward function
|
|
33
|
+
:param R_COST: the R_COST constant for the reward function
|
|
34
|
+
:param R_SLA: the R_SLA constant for the reward function
|
|
35
|
+
:param R_ST: the R_ST constant for the reward function
|
|
36
|
+
:param b1: the initial belief
|
|
37
|
+
:param save_dir: the directory to save artefacts produced by the environment
|
|
38
|
+
:param checkpoint_traces_freq: how frequently to checkpoint traces to disk
|
|
39
|
+
:param gamma: the discount factor
|
|
40
|
+
:param compute_beliefs: boolean flag indicating whether beliefs should be computed or not
|
|
41
|
+
:param save_trace: boolean flag indicating whether traces should be saved
|
|
42
|
+
"""
|
|
43
|
+
super().__init__()
|
|
44
|
+
self.T = T
|
|
45
|
+
self.O = O
|
|
46
|
+
self.Z = Z
|
|
47
|
+
self.R = R
|
|
48
|
+
self.S = S
|
|
49
|
+
self.L = L
|
|
50
|
+
self.R_INT = R_INT
|
|
51
|
+
self.R_COST = R_COST
|
|
52
|
+
self.R_SLA = R_SLA
|
|
53
|
+
self.R_ST = R_ST
|
|
54
|
+
self.A1 = A1
|
|
55
|
+
self.A2 = A2
|
|
56
|
+
self.b1 = b1
|
|
57
|
+
self.save_dir = save_dir
|
|
58
|
+
self.env_name = env_name
|
|
59
|
+
self.checkpoint_traces_freq = checkpoint_traces_freq
|
|
60
|
+
self.gamma = gamma
|
|
61
|
+
self.compute_beliefs = compute_beliefs
|
|
62
|
+
self.save_trace = save_trace
|
|
63
|
+
|
|
64
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
65
|
+
"""
|
|
66
|
+
Converts the object to a dict representation
|
|
67
|
+
|
|
68
|
+
:return: a dict representation of the object
|
|
69
|
+
"""
|
|
70
|
+
d: Dict[str, Any] = {}
|
|
71
|
+
d["T"] = list(self.T.tolist())
|
|
72
|
+
d["O"] = list(self.O.tolist())
|
|
73
|
+
d["Z"] = list(self.Z.tolist())
|
|
74
|
+
d["R"] = list(self.R.tolist())
|
|
75
|
+
d["S"] = list(self.S.tolist())
|
|
76
|
+
d["L"] = self.L
|
|
77
|
+
d["R_INT"] = self.R_INT
|
|
78
|
+
d["R_COST"] = self.R_COST
|
|
79
|
+
d["R_SLA"] = self.R_SLA
|
|
80
|
+
d["R_ST"] = self.R_ST
|
|
81
|
+
d["A1"] = list(self.A1.tolist())
|
|
82
|
+
d["A2"] = list(self.A2.tolist())
|
|
83
|
+
d["b1"] = list(self.b1.tolist())
|
|
84
|
+
d["save_dir"] = self.save_dir
|
|
85
|
+
d["env_name"] = self.env_name
|
|
86
|
+
d["checkpoint_traces_freq"] = self.checkpoint_traces_freq
|
|
87
|
+
d["gamma"] = self.gamma
|
|
88
|
+
d["compute_beliefs"] = self.compute_beliefs
|
|
89
|
+
d["save_trace"] = self.save_trace
|
|
90
|
+
return d
|
|
91
|
+
|
|
92
|
+
@staticmethod
|
|
93
|
+
def from_dict(d: Dict[str, Any]) -> "StoppingGameConfig":
|
|
94
|
+
"""
|
|
95
|
+
Converts a dict representation to an instance
|
|
96
|
+
|
|
97
|
+
:param d: the dict to convert
|
|
98
|
+
:return: the created instance
|
|
99
|
+
"""
|
|
100
|
+
compute_beliefs = False
|
|
101
|
+
if "compute_beliefs" in d:
|
|
102
|
+
compute_beliefs = d["compute_beliefs"]
|
|
103
|
+
save_trace = False
|
|
104
|
+
if "save_trace" in d:
|
|
105
|
+
save_trace = d["save_trace"]
|
|
106
|
+
obj = StoppingGameConfig(
|
|
107
|
+
T=np.array(d["T"]), O=np.array(d["O"]), Z=np.array(d["Z"]), R=np.array(d["R"]), S=np.array(d["S"]),
|
|
108
|
+
A1=np.array(d["A1"]), A2=np.array(d["A2"]), L=d["L"], R_INT=d["R_INT"],
|
|
109
|
+
R_COST=d["R_COST"], R_SLA=d["R_SLA"], R_ST=d["R_ST"], b1=np.array(d["b1"]), save_dir=d["save_dir"],
|
|
110
|
+
env_name=d["env_name"], checkpoint_traces_freq=d["checkpoint_traces_freq"], gamma=d["gamma"],
|
|
111
|
+
compute_beliefs=compute_beliefs, save_trace=save_trace
|
|
112
|
+
)
|
|
113
|
+
return obj
|
|
114
|
+
|
|
115
|
+
def __str__(self) -> str:
|
|
116
|
+
"""
|
|
117
|
+
:return: a string representation of the object
|
|
118
|
+
"""
|
|
119
|
+
return f"T:{self.T}, O:{self.O}, Z:{self.Z}, R:{self.R}, S:{self.S}, A1:{self.A1}, A2:{self.A2}, L:{self.L}, " \
|
|
120
|
+
f"R_INT:{self.R_INT}, R_COST:{self.R_COST}, R_SLA:{self.R_SLA}, R_ST:{self.R_ST}, b1:{self.b1}, " \
|
|
121
|
+
f"save_dir: {self.save_dir}, env_name: {self.env_name}, " \
|
|
122
|
+
f"checkpoint_traces_freq: {self.checkpoint_traces_freq}, gamma: {self.gamma}, " \
|
|
123
|
+
f"compute_beliefs: {self.compute_beliefs}, save_trace: {self.save_trace}"
|
|
124
|
+
|
|
125
|
+
def attacker_observation_space(self) -> gym.spaces.Box:
|
|
126
|
+
"""
|
|
127
|
+
:return: the attacker's observation space
|
|
128
|
+
"""
|
|
129
|
+
return gym.spaces.Box(low=np.array([np.float64(0), np.float64(0), np.float64(0)]),
|
|
130
|
+
high=np.array([np.float64(self.L), np.float64(1), np.float64(2)]),
|
|
131
|
+
dtype=np.float64, shape=(3,))
|
|
132
|
+
|
|
133
|
+
def defender_observation_space(self) -> gym.spaces.Box:
|
|
134
|
+
"""
|
|
135
|
+
:return: the defender's observation space
|
|
136
|
+
"""
|
|
137
|
+
return gym.spaces.Box(low=np.array([np.float64(0), np.float64(0)]),
|
|
138
|
+
high=np.array([np.float64(self.L), np.float64(1)]),
|
|
139
|
+
dtype=np.float64, shape=(2,))
|
|
140
|
+
|
|
141
|
+
def attacker_action_space(self) -> gym.spaces.Discrete:
|
|
142
|
+
"""
|
|
143
|
+
:return: the attacker's action space
|
|
144
|
+
"""
|
|
145
|
+
return gym.spaces.Discrete(len(self.A2))
|
|
146
|
+
|
|
147
|
+
def defender_action_space(self) -> gym.spaces.Discrete:
|
|
148
|
+
"""
|
|
149
|
+
:return: the defender's action space
|
|
150
|
+
"""
|
|
151
|
+
return gym.spaces.Discrete(len(self.A1))
|
|
152
|
+
|
|
153
|
+
@staticmethod
|
|
154
|
+
def from_json_file(json_file_path: str) -> "StoppingGameConfig":
|
|
155
|
+
"""
|
|
156
|
+
Reads a json file and converts it to a DTO
|
|
157
|
+
|
|
158
|
+
:param json_file_path: the json file path
|
|
159
|
+
:return: the converted DTO
|
|
160
|
+
"""
|
|
161
|
+
import io
|
|
162
|
+
import json
|
|
163
|
+
with io.open(json_file_path, 'r') as f:
|
|
164
|
+
json_str = f.read()
|
|
165
|
+
return StoppingGameConfig.from_dict(json.loads(json_str))
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
from typing import Dict, Any
|
|
2
|
+
from gym_csle_stopping_game.dao.stopping_game_config import StoppingGameConfig
|
|
3
|
+
from csle_common.dao.simulation_config.simulation_env_input_config import SimulationEnvInputConfig
|
|
4
|
+
from csle_common.dao.training.policy import Policy
|
|
5
|
+
from csle_common.dao.training.random_policy import RandomPolicy
|
|
6
|
+
from csle_common.dao.training.multi_threshold_stopping_policy import MultiThresholdStoppingPolicy
|
|
7
|
+
from csle_common.dao.training.ppo_policy import PPOPolicy
|
|
8
|
+
from csle_common.dao.training.tabular_policy import TabularPolicy
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class StoppingGameDefenderPomdpConfig(SimulationEnvInputConfig):
|
|
12
|
+
"""
|
|
13
|
+
DTO class representing the configuration of the POMDP environnment of the defender
|
|
14
|
+
when facing a static attacker policy
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, env_name: str, stopping_game_config: StoppingGameConfig, attacker_strategy: Policy,
|
|
18
|
+
stopping_game_name: str = "csle-stopping-game-v1"):
|
|
19
|
+
"""
|
|
20
|
+
Initializes the DTO
|
|
21
|
+
|
|
22
|
+
:param env_name: the environment name
|
|
23
|
+
:param stopping_game_config: The underlying stopping game config
|
|
24
|
+
:param attacker_strategy: the attacker's strategy name
|
|
25
|
+
:param stopping_game_name: the name of the underlying stopping game
|
|
26
|
+
"""
|
|
27
|
+
super().__init__()
|
|
28
|
+
self.env_name = env_name
|
|
29
|
+
self.stopping_game_config = stopping_game_config
|
|
30
|
+
self.attacker_strategy = attacker_strategy
|
|
31
|
+
self.stopping_game_name = stopping_game_name
|
|
32
|
+
|
|
33
|
+
@staticmethod
|
|
34
|
+
def from_dict(d: Dict[str, Any]) -> "StoppingGameDefenderPomdpConfig":
|
|
35
|
+
"""
|
|
36
|
+
Converts a dict representation to an instance
|
|
37
|
+
|
|
38
|
+
:param d: the dict to convert
|
|
39
|
+
:return: the created instance
|
|
40
|
+
"""
|
|
41
|
+
try:
|
|
42
|
+
attacker_strategy = MultiThresholdStoppingPolicy.from_dict(d["attacker_strategy"])
|
|
43
|
+
except Exception:
|
|
44
|
+
try:
|
|
45
|
+
attacker_strategy = RandomPolicy.from_dict(d["attacker_strategy"])
|
|
46
|
+
except Exception:
|
|
47
|
+
try:
|
|
48
|
+
attacker_strategy = PPOPolicy.from_dict(d["attacker_strategy"])
|
|
49
|
+
except Exception:
|
|
50
|
+
attacker_strategy = TabularPolicy.from_dict(d["attacker_strategy"])
|
|
51
|
+
|
|
52
|
+
obj = StoppingGameDefenderPomdpConfig(
|
|
53
|
+
stopping_game_config=StoppingGameConfig.from_dict(d["stopping_game_config"]),
|
|
54
|
+
attacker_strategy=attacker_strategy, stopping_game_name=d["stopping_game_name"],
|
|
55
|
+
env_name=d["env_name"]
|
|
56
|
+
)
|
|
57
|
+
return obj
|
|
58
|
+
|
|
59
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
60
|
+
"""
|
|
61
|
+
Converts the object to a dict representation
|
|
62
|
+
|
|
63
|
+
:return: a dict representation of the object
|
|
64
|
+
"""
|
|
65
|
+
d: Dict[str, Any] = {}
|
|
66
|
+
d["stopping_game_config"] = self.stopping_game_config.to_dict()
|
|
67
|
+
d["attacker_strategy"] = self.attacker_strategy.to_dict()
|
|
68
|
+
d["stopping_game_name"] = self.stopping_game_name
|
|
69
|
+
d["env_name"] = self.env_name
|
|
70
|
+
return d
|
|
71
|
+
|
|
72
|
+
def __str__(self):
|
|
73
|
+
"""
|
|
74
|
+
:return: a string representation of the object
|
|
75
|
+
"""
|
|
76
|
+
return f"stopping_game_config: {self.stopping_game_config}, " \
|
|
77
|
+
f"attacker_strategy: {self.attacker_strategy}, stopping_game_name: {self.stopping_game_name}," \
|
|
78
|
+
f"env_name: {self.env_name}"
|
|
79
|
+
|
|
80
|
+
@staticmethod
|
|
81
|
+
def from_json_file(json_file_path: str) -> "StoppingGameDefenderPomdpConfig":
|
|
82
|
+
"""
|
|
83
|
+
Reads a json file and converts it to a DTO
|
|
84
|
+
|
|
85
|
+
:param json_file_path: the json file path
|
|
86
|
+
:return: the converted DTO
|
|
87
|
+
"""
|
|
88
|
+
import io
|
|
89
|
+
import json
|
|
90
|
+
with io.open(json_file_path, 'r') as f:
|
|
91
|
+
json_str = f.read()
|
|
92
|
+
return StoppingGameDefenderPomdpConfig.from_dict(json.loads(json_str))
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
from typing import Dict, Any
|
|
2
|
+
import numpy as np
|
|
3
|
+
import numpy.typing as npt
|
|
4
|
+
from gym_csle_stopping_game.util.stopping_game_util import StoppingGameUtil
|
|
5
|
+
from csle_base.json_serializable import JSONSerializable
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class StoppingGameState(JSONSerializable):
|
|
9
|
+
"""
|
|
10
|
+
Represents the state of the optimal stopping game
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
def __init__(self, b1: npt.NDArray[np.float64], L: int) -> None:
|
|
14
|
+
"""
|
|
15
|
+
Intializes the state
|
|
16
|
+
|
|
17
|
+
:param b1: the initial belief
|
|
18
|
+
:param L: the maximum number of stop actions of the defender
|
|
19
|
+
"""
|
|
20
|
+
self.L = L
|
|
21
|
+
self.b1 = b1
|
|
22
|
+
self.b = self.b1.copy()
|
|
23
|
+
self.l = self.L
|
|
24
|
+
self.s = StoppingGameUtil.sample_initial_state(b1=self.b1)
|
|
25
|
+
self.t = 1
|
|
26
|
+
|
|
27
|
+
def reset(self) -> None:
|
|
28
|
+
"""
|
|
29
|
+
Resets the state
|
|
30
|
+
|
|
31
|
+
:return: None
|
|
32
|
+
"""
|
|
33
|
+
self.l = self.L
|
|
34
|
+
self.t = 1
|
|
35
|
+
self.s = StoppingGameUtil.sample_initial_state(b1=self.b1)
|
|
36
|
+
self.b = self.b1.copy()
|
|
37
|
+
|
|
38
|
+
def attacker_observation(self) -> npt.NDArray[Any]:
|
|
39
|
+
"""
|
|
40
|
+
:return: the attacker's observation
|
|
41
|
+
"""
|
|
42
|
+
return np.array([np.float64(self.l), np.float64(self.b[1]), np.float64(self.s)])
|
|
43
|
+
|
|
44
|
+
def defender_observation(self) -> npt.NDArray[Any]:
|
|
45
|
+
"""
|
|
46
|
+
:return: the defender's observation
|
|
47
|
+
"""
|
|
48
|
+
return np.array([np.float64(self.l), np.float64(self.b[1])])
|
|
49
|
+
|
|
50
|
+
def __str__(self) -> str:
|
|
51
|
+
"""
|
|
52
|
+
:return: a string representation of the objectn
|
|
53
|
+
"""
|
|
54
|
+
return f"s:{self.s}, L:{self.L}, l: {self.l}, b:{self.b}, b1:{self.b1}, t:{self.t}"
|
|
55
|
+
|
|
56
|
+
@staticmethod
|
|
57
|
+
def from_dict(d: Dict[str, Any]) -> "StoppingGameState":
|
|
58
|
+
"""
|
|
59
|
+
Converts a dict representation to an instance
|
|
60
|
+
|
|
61
|
+
:param d: the dict to convert
|
|
62
|
+
:return: the created instance
|
|
63
|
+
"""
|
|
64
|
+
obj = StoppingGameState(b1=np.array(d["b1"]), L=d["L"])
|
|
65
|
+
obj.t = d["t"]
|
|
66
|
+
obj.l = d["l"]
|
|
67
|
+
obj.s = d["s"]
|
|
68
|
+
obj.b = np.array(d["b"])
|
|
69
|
+
return obj
|
|
70
|
+
|
|
71
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
72
|
+
"""
|
|
73
|
+
Converts the object to a dict representation
|
|
74
|
+
|
|
75
|
+
:return: a dict representation of the object
|
|
76
|
+
"""
|
|
77
|
+
d: Dict[str, Any] = {}
|
|
78
|
+
d["L"] = self.L
|
|
79
|
+
d["b1"] = list(self.b1)
|
|
80
|
+
d["b"] = list(self.b)
|
|
81
|
+
d["l"] = self.l
|
|
82
|
+
d["s"] = self.s
|
|
83
|
+
d["t"] = self.t
|
|
84
|
+
return d
|
|
85
|
+
|
|
86
|
+
@staticmethod
|
|
87
|
+
def from_json_file(json_file_path: str) -> "StoppingGameState":
|
|
88
|
+
"""
|
|
89
|
+
Reads a json file and converts it to a DTO
|
|
90
|
+
|
|
91
|
+
:param json_file_path: the json file path
|
|
92
|
+
:return: the converted DTO
|
|
93
|
+
"""
|
|
94
|
+
import io
|
|
95
|
+
import json
|
|
96
|
+
with io.open(json_file_path, 'r') as f:
|
|
97
|
+
json_str = f.read()
|
|
98
|
+
return StoppingGameState.from_dict(json.loads(json_str))
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from gym_csle_stopping_game.envs.stopping_game_env import StoppingGameEnv
|