gym-csle-stopping-game 0.9.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gym_csle_stopping_game/__init__.py +23 -0
- gym_csle_stopping_game/__version__.py +1 -0
- gym_csle_stopping_game/constants/__init__.py +0 -0
- gym_csle_stopping_game/constants/constants.py +40 -0
- gym_csle_stopping_game/dao/__init__.py +0 -0
- gym_csle_stopping_game/dao/stopping_game_attacker_mdp_config.py +86 -0
- gym_csle_stopping_game/dao/stopping_game_config.py +165 -0
- gym_csle_stopping_game/dao/stopping_game_defender_pomdp_config.py +92 -0
- gym_csle_stopping_game/dao/stopping_game_state.py +98 -0
- gym_csle_stopping_game/envs/__init__.py +1 -0
- gym_csle_stopping_game/envs/stopping_game_env.py +393 -0
- gym_csle_stopping_game/envs/stopping_game_mdp_attacker_env.py +282 -0
- gym_csle_stopping_game/envs/stopping_game_pomdp_defender_env.py +233 -0
- gym_csle_stopping_game/util/__init__.py +0 -0
- gym_csle_stopping_game/util/stopping_game_util.py +699 -0
- gym_csle_stopping_game-0.9.24.dist-info/METADATA +414 -0
- gym_csle_stopping_game-0.9.24.dist-info/RECORD +19 -0
- gym_csle_stopping_game-0.9.24.dist-info/WHEEL +5 -0
- gym_csle_stopping_game-0.9.24.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
from typing import Tuple, List, Dict, Any, Union
|
|
2
|
+
import numpy as np
|
|
3
|
+
import numpy.typing as npt
|
|
4
|
+
from csle_common.dao.simulation_config.base_env import BaseEnv
|
|
5
|
+
from gym_csle_stopping_game.dao.stopping_game_defender_pomdp_config import StoppingGameDefenderPomdpConfig
|
|
6
|
+
from csle_common.dao.simulation_config.simulation_trace import SimulationTrace
|
|
7
|
+
from csle_common.dao.emulation_config.emulation_trace import EmulationTrace
|
|
8
|
+
from gym_csle_stopping_game.envs.stopping_game_env import StoppingGameEnv
|
|
9
|
+
from gym_csle_stopping_game.util.stopping_game_util import StoppingGameUtil
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class StoppingGamePomdpDefenderEnv(BaseEnv):
|
|
13
|
+
"""
|
|
14
|
+
OpenAI Gym Env for the POMDP of the defender when facing a static attacker
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, config: StoppingGameDefenderPomdpConfig):
|
|
18
|
+
"""
|
|
19
|
+
Initializes the environment
|
|
20
|
+
|
|
21
|
+
:param config: the environment configuration
|
|
22
|
+
:param attacker_strategy: the strategy of the static attacker
|
|
23
|
+
"""
|
|
24
|
+
self.config = config
|
|
25
|
+
self.stopping_game_env = StoppingGameEnv(config=self.config.stopping_game_config)
|
|
26
|
+
|
|
27
|
+
# Setup spaces
|
|
28
|
+
self.observation_space = self.config.stopping_game_config.defender_observation_space()
|
|
29
|
+
self.action_space = self.config.stopping_game_config.defender_action_space()
|
|
30
|
+
|
|
31
|
+
# Setup static attacker strategy
|
|
32
|
+
self.static_attacker_strategy = self.config.attacker_strategy
|
|
33
|
+
|
|
34
|
+
# Setup Config
|
|
35
|
+
self.viewer: Union[None, Any] = None
|
|
36
|
+
self.metadata = {
|
|
37
|
+
'render.modes': ['human', 'rgb_array'],
|
|
38
|
+
'video.frames_per_second': 50 # Video rendering speed
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
self.latest_attacker_obs: Union[None, npt.NDArray[Any]] = None
|
|
42
|
+
# Reset
|
|
43
|
+
self.reset()
|
|
44
|
+
super().__init__()
|
|
45
|
+
|
|
46
|
+
def step(self, a1: int) -> Tuple[npt.NDArray[Any], int, bool, bool, Dict[str, Any]]:
|
|
47
|
+
"""
|
|
48
|
+
Takes a step in the environment by executing the given action
|
|
49
|
+
|
|
50
|
+
:param a1: defender action
|
|
51
|
+
:return: (obs, reward, terminated, truncated, info)
|
|
52
|
+
"""
|
|
53
|
+
# Get attacker action from static strategy
|
|
54
|
+
pi2 = np.array(self.static_attacker_strategy.stage_policy(self.latest_attacker_obs))
|
|
55
|
+
a2 = StoppingGameUtil.sample_attacker_action(pi2=pi2, s=self.stopping_game_env.state.s)
|
|
56
|
+
# Step the game
|
|
57
|
+
o, r, d, _, info = self.stopping_game_env.step((a1, (pi2, a2)))
|
|
58
|
+
self.latest_attacker_obs = o[1]
|
|
59
|
+
defender_obs = o[0]
|
|
60
|
+
|
|
61
|
+
return defender_obs, r[0], d, d, info
|
|
62
|
+
|
|
63
|
+
def reset(self, seed: Union[None, int] = None, soft: bool = False, options: Union[Dict[str, Any], None] = None) \
|
|
64
|
+
-> Tuple[npt.NDArray[Any], Dict[str, Any]]:
|
|
65
|
+
"""
|
|
66
|
+
Resets the environment state, this should be called whenever step() returns <done>
|
|
67
|
+
|
|
68
|
+
:param seed: the random seed
|
|
69
|
+
:param soft: boolean flag indicating whether it is a soft reset or not
|
|
70
|
+
:param options: optional configuration parameters
|
|
71
|
+
:return: initial observation
|
|
72
|
+
"""
|
|
73
|
+
o, info = self.stopping_game_env.reset()
|
|
74
|
+
self.latest_attacker_obs = o[1]
|
|
75
|
+
defender_obs = o[0]
|
|
76
|
+
return defender_obs, info
|
|
77
|
+
|
|
78
|
+
def render(self, mode: str = 'human'):
|
|
79
|
+
"""
|
|
80
|
+
Renders the environment. Supported rendering modes: (1) human; and (2) rgb_array
|
|
81
|
+
|
|
82
|
+
:param mode: the rendering mode
|
|
83
|
+
:return: True (if human mode) otherwise an rgb array
|
|
84
|
+
"""
|
|
85
|
+
raise NotImplementedError("Rendering is not implemented for this environment")
|
|
86
|
+
|
|
87
|
+
def step_trace(self, trace: EmulationTrace, a1: int) -> Tuple[npt.NDArray[Any], int, bool, Dict[str, Any]]:
|
|
88
|
+
"""
|
|
89
|
+
Utility method for stopping a pre-recorded trace
|
|
90
|
+
|
|
91
|
+
:param trace: the trace to step
|
|
92
|
+
:param a1: the action to step with
|
|
93
|
+
:return: the result of the step according to the trace
|
|
94
|
+
"""
|
|
95
|
+
pi2 = np.array(self.static_attacker_strategy.stage_policy(self.latest_attacker_obs))
|
|
96
|
+
o, r, d, info = self.stopping_game_env.step_trace(trace=trace, a1=a1, pi2=pi2)
|
|
97
|
+
self.latest_attacker_obs = o[1]
|
|
98
|
+
defender_obs = o[0]
|
|
99
|
+
return defender_obs, r[0], d, info
|
|
100
|
+
|
|
101
|
+
def is_defense_action_legal(self, defense_action_id: int) -> bool:
|
|
102
|
+
"""
|
|
103
|
+
Checks whether a defender action in the environment is legal or not
|
|
104
|
+
|
|
105
|
+
:param defense_action_id: the id of the action
|
|
106
|
+
:return: True or False
|
|
107
|
+
"""
|
|
108
|
+
return True
|
|
109
|
+
|
|
110
|
+
def is_attack_action_legal(self, attack_action_id: int) -> bool:
|
|
111
|
+
"""
|
|
112
|
+
Checks whether an attacker action in the environment is legal or not
|
|
113
|
+
|
|
114
|
+
:param attack_action_id: the id of the attacker action
|
|
115
|
+
:return: True or False
|
|
116
|
+
"""
|
|
117
|
+
return True
|
|
118
|
+
|
|
119
|
+
def get_traces(self) -> List[SimulationTrace]:
|
|
120
|
+
"""
|
|
121
|
+
:return: the list of simulation traces
|
|
122
|
+
"""
|
|
123
|
+
return self.stopping_game_env.get_traces()
|
|
124
|
+
|
|
125
|
+
def reset_traces(self) -> None:
|
|
126
|
+
"""
|
|
127
|
+
Resets the list of traces
|
|
128
|
+
|
|
129
|
+
:return: None
|
|
130
|
+
"""
|
|
131
|
+
return self.stopping_game_env.reset_traces()
|
|
132
|
+
|
|
133
|
+
def set_model(self, model) -> None:
|
|
134
|
+
"""
|
|
135
|
+
Sets the model. Useful when using RL frameworks where the stage policy is not easy to extract
|
|
136
|
+
|
|
137
|
+
:param model: the model
|
|
138
|
+
:return: None
|
|
139
|
+
"""
|
|
140
|
+
self.model = model
|
|
141
|
+
|
|
142
|
+
def set_state(self, state: Any) -> None:
|
|
143
|
+
"""
|
|
144
|
+
Sets the state. Allows to simulate samples from specific states
|
|
145
|
+
|
|
146
|
+
:param state: the state
|
|
147
|
+
:return: None
|
|
148
|
+
"""
|
|
149
|
+
self.stopping_game_env.set_state(state=state)
|
|
150
|
+
|
|
151
|
+
def get_observation_from_history(self, history: List[int]) -> List[Any]:
|
|
152
|
+
"""
|
|
153
|
+
Utiltiy function to get a defender observation (belief) from a history
|
|
154
|
+
|
|
155
|
+
:param history: the history to get the observation form
|
|
156
|
+
:return: the observation
|
|
157
|
+
"""
|
|
158
|
+
l = self.config.stopping_game_config.L
|
|
159
|
+
return self.stopping_game_env.get_observation_from_history(
|
|
160
|
+
history=history, pi2=self.static_attacker_strategy.stage_policy(o=0), l=l)
|
|
161
|
+
|
|
162
|
+
def is_state_terminal(self, state: Any) -> bool:
|
|
163
|
+
"""
|
|
164
|
+
Utility funciton to check whether a state is terminal or not
|
|
165
|
+
|
|
166
|
+
:param state: the state
|
|
167
|
+
:return: None
|
|
168
|
+
"""
|
|
169
|
+
return self.stopping_game_env.is_state_terminal(state=state)
|
|
170
|
+
|
|
171
|
+
def add_observation_vector(self, obs_vector: List[Any], obs_id: int) -> None:
|
|
172
|
+
"""
|
|
173
|
+
Adds an observation vector to the history
|
|
174
|
+
|
|
175
|
+
:param obs_vector: the observation vector to add
|
|
176
|
+
:param obs_id: the id of the observation
|
|
177
|
+
:return: None
|
|
178
|
+
"""
|
|
179
|
+
pass
|
|
180
|
+
|
|
181
|
+
def generate_random_particles(self, o: int, num_particles: int) -> List[int]:
|
|
182
|
+
"""
|
|
183
|
+
Generates a random list of state particles from a given observation
|
|
184
|
+
|
|
185
|
+
:param o: the latest observation
|
|
186
|
+
:param num_particles: the number of particles to generate
|
|
187
|
+
:return: the list of random particles
|
|
188
|
+
"""
|
|
189
|
+
return self.stopping_game_env.generate_random_particles(o=o, num_particles=num_particles)
|
|
190
|
+
|
|
191
|
+
def get_actions_from_particles(self, particles: List[int], t: int, observation: int,
|
|
192
|
+
verbose: bool = False) -> List[int]:
|
|
193
|
+
"""
|
|
194
|
+
Prunes the set of actiosn based on the current particle set
|
|
195
|
+
|
|
196
|
+
:param particles: the set of particles
|
|
197
|
+
:param t: the current time step
|
|
198
|
+
:param observation: the latest observation
|
|
199
|
+
:param verbose: boolean flag indicating whether logging should be verbose or not
|
|
200
|
+
:return: the list of pruned actions
|
|
201
|
+
"""
|
|
202
|
+
return list(self.config.stopping_game_config.A1)
|
|
203
|
+
|
|
204
|
+
def manual_play(self) -> None:
|
|
205
|
+
"""
|
|
206
|
+
An interactive loop to test the environment manually
|
|
207
|
+
|
|
208
|
+
:return: None
|
|
209
|
+
"""
|
|
210
|
+
done = False
|
|
211
|
+
while True:
|
|
212
|
+
raw_input = input("> ")
|
|
213
|
+
raw_input = raw_input.strip()
|
|
214
|
+
if raw_input == "help":
|
|
215
|
+
print("Enter an action id to execute the action, "
|
|
216
|
+
"press R to reset,"
|
|
217
|
+
"press S to print the state, press A to print the actions, "
|
|
218
|
+
"press D to check if done"
|
|
219
|
+
"press H to print the history of actions")
|
|
220
|
+
elif raw_input == "A":
|
|
221
|
+
print(f"Action space: {self.action_space}")
|
|
222
|
+
elif raw_input == "S":
|
|
223
|
+
print(self.stopping_game_env.state)
|
|
224
|
+
elif raw_input == "D":
|
|
225
|
+
print(done)
|
|
226
|
+
elif raw_input == "H":
|
|
227
|
+
print(self.stopping_game_env.trace)
|
|
228
|
+
elif raw_input == "R":
|
|
229
|
+
print("Resetting the state")
|
|
230
|
+
self.reset()
|
|
231
|
+
else:
|
|
232
|
+
action_idx = int(raw_input)
|
|
233
|
+
_, _, done, _, _ = self.step(a1=action_idx)
|
|
File without changes
|