gym-csle-stopping-game 0.9.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,233 @@
1
+ from typing import Tuple, List, Dict, Any, Union
2
+ import numpy as np
3
+ import numpy.typing as npt
4
+ from csle_common.dao.simulation_config.base_env import BaseEnv
5
+ from gym_csle_stopping_game.dao.stopping_game_defender_pomdp_config import StoppingGameDefenderPomdpConfig
6
+ from csle_common.dao.simulation_config.simulation_trace import SimulationTrace
7
+ from csle_common.dao.emulation_config.emulation_trace import EmulationTrace
8
+ from gym_csle_stopping_game.envs.stopping_game_env import StoppingGameEnv
9
+ from gym_csle_stopping_game.util.stopping_game_util import StoppingGameUtil
10
+
11
+
12
+ class StoppingGamePomdpDefenderEnv(BaseEnv):
13
+ """
14
+ OpenAI Gym Env for the POMDP of the defender when facing a static attacker
15
+ """
16
+
17
+ def __init__(self, config: StoppingGameDefenderPomdpConfig):
18
+ """
19
+ Initializes the environment
20
+
21
+ :param config: the environment configuration
22
+ :param attacker_strategy: the strategy of the static attacker
23
+ """
24
+ self.config = config
25
+ self.stopping_game_env = StoppingGameEnv(config=self.config.stopping_game_config)
26
+
27
+ # Setup spaces
28
+ self.observation_space = self.config.stopping_game_config.defender_observation_space()
29
+ self.action_space = self.config.stopping_game_config.defender_action_space()
30
+
31
+ # Setup static attacker strategy
32
+ self.static_attacker_strategy = self.config.attacker_strategy
33
+
34
+ # Setup Config
35
+ self.viewer: Union[None, Any] = None
36
+ self.metadata = {
37
+ 'render.modes': ['human', 'rgb_array'],
38
+ 'video.frames_per_second': 50 # Video rendering speed
39
+ }
40
+
41
+ self.latest_attacker_obs: Union[None, npt.NDArray[Any]] = None
42
+ # Reset
43
+ self.reset()
44
+ super().__init__()
45
+
46
+ def step(self, a1: int) -> Tuple[npt.NDArray[Any], int, bool, bool, Dict[str, Any]]:
47
+ """
48
+ Takes a step in the environment by executing the given action
49
+
50
+ :param a1: defender action
51
+ :return: (obs, reward, terminated, truncated, info)
52
+ """
53
+ # Get attacker action from static strategy
54
+ pi2 = np.array(self.static_attacker_strategy.stage_policy(self.latest_attacker_obs))
55
+ a2 = StoppingGameUtil.sample_attacker_action(pi2=pi2, s=self.stopping_game_env.state.s)
56
+ # Step the game
57
+ o, r, d, _, info = self.stopping_game_env.step((a1, (pi2, a2)))
58
+ self.latest_attacker_obs = o[1]
59
+ defender_obs = o[0]
60
+
61
+ return defender_obs, r[0], d, d, info
62
+
63
+ def reset(self, seed: Union[None, int] = None, soft: bool = False, options: Union[Dict[str, Any], None] = None) \
64
+ -> Tuple[npt.NDArray[Any], Dict[str, Any]]:
65
+ """
66
+ Resets the environment state, this should be called whenever step() returns <done>
67
+
68
+ :param seed: the random seed
69
+ :param soft: boolean flag indicating whether it is a soft reset or not
70
+ :param options: optional configuration parameters
71
+ :return: initial observation
72
+ """
73
+ o, info = self.stopping_game_env.reset()
74
+ self.latest_attacker_obs = o[1]
75
+ defender_obs = o[0]
76
+ return defender_obs, info
77
+
78
+ def render(self, mode: str = 'human'):
79
+ """
80
+ Renders the environment. Supported rendering modes: (1) human; and (2) rgb_array
81
+
82
+ :param mode: the rendering mode
83
+ :return: True (if human mode) otherwise an rgb array
84
+ """
85
+ raise NotImplementedError("Rendering is not implemented for this environment")
86
+
87
+ def step_trace(self, trace: EmulationTrace, a1: int) -> Tuple[npt.NDArray[Any], int, bool, Dict[str, Any]]:
88
+ """
89
+ Utility method for stopping a pre-recorded trace
90
+
91
+ :param trace: the trace to step
92
+ :param a1: the action to step with
93
+ :return: the result of the step according to the trace
94
+ """
95
+ pi2 = np.array(self.static_attacker_strategy.stage_policy(self.latest_attacker_obs))
96
+ o, r, d, info = self.stopping_game_env.step_trace(trace=trace, a1=a1, pi2=pi2)
97
+ self.latest_attacker_obs = o[1]
98
+ defender_obs = o[0]
99
+ return defender_obs, r[0], d, info
100
+
101
+ def is_defense_action_legal(self, defense_action_id: int) -> bool:
102
+ """
103
+ Checks whether a defender action in the environment is legal or not
104
+
105
+ :param defense_action_id: the id of the action
106
+ :return: True or False
107
+ """
108
+ return True
109
+
110
+ def is_attack_action_legal(self, attack_action_id: int) -> bool:
111
+ """
112
+ Checks whether an attacker action in the environment is legal or not
113
+
114
+ :param attack_action_id: the id of the attacker action
115
+ :return: True or False
116
+ """
117
+ return True
118
+
119
+ def get_traces(self) -> List[SimulationTrace]:
120
+ """
121
+ :return: the list of simulation traces
122
+ """
123
+ return self.stopping_game_env.get_traces()
124
+
125
+ def reset_traces(self) -> None:
126
+ """
127
+ Resets the list of traces
128
+
129
+ :return: None
130
+ """
131
+ return self.stopping_game_env.reset_traces()
132
+
133
+ def set_model(self, model) -> None:
134
+ """
135
+ Sets the model. Useful when using RL frameworks where the stage policy is not easy to extract
136
+
137
+ :param model: the model
138
+ :return: None
139
+ """
140
+ self.model = model
141
+
142
+ def set_state(self, state: Any) -> None:
143
+ """
144
+ Sets the state. Allows to simulate samples from specific states
145
+
146
+ :param state: the state
147
+ :return: None
148
+ """
149
+ self.stopping_game_env.set_state(state=state)
150
+
151
+ def get_observation_from_history(self, history: List[int]) -> List[Any]:
152
+ """
153
+ Utiltiy function to get a defender observation (belief) from a history
154
+
155
+ :param history: the history to get the observation form
156
+ :return: the observation
157
+ """
158
+ l = self.config.stopping_game_config.L
159
+ return self.stopping_game_env.get_observation_from_history(
160
+ history=history, pi2=self.static_attacker_strategy.stage_policy(o=0), l=l)
161
+
162
+ def is_state_terminal(self, state: Any) -> bool:
163
+ """
164
+ Utility funciton to check whether a state is terminal or not
165
+
166
+ :param state: the state
167
+ :return: None
168
+ """
169
+ return self.stopping_game_env.is_state_terminal(state=state)
170
+
171
+ def add_observation_vector(self, obs_vector: List[Any], obs_id: int) -> None:
172
+ """
173
+ Adds an observation vector to the history
174
+
175
+ :param obs_vector: the observation vector to add
176
+ :param obs_id: the id of the observation
177
+ :return: None
178
+ """
179
+ pass
180
+
181
+ def generate_random_particles(self, o: int, num_particles: int) -> List[int]:
182
+ """
183
+ Generates a random list of state particles from a given observation
184
+
185
+ :param o: the latest observation
186
+ :param num_particles: the number of particles to generate
187
+ :return: the list of random particles
188
+ """
189
+ return self.stopping_game_env.generate_random_particles(o=o, num_particles=num_particles)
190
+
191
+ def get_actions_from_particles(self, particles: List[int], t: int, observation: int,
192
+ verbose: bool = False) -> List[int]:
193
+ """
194
+ Prunes the set of actiosn based on the current particle set
195
+
196
+ :param particles: the set of particles
197
+ :param t: the current time step
198
+ :param observation: the latest observation
199
+ :param verbose: boolean flag indicating whether logging should be verbose or not
200
+ :return: the list of pruned actions
201
+ """
202
+ return list(self.config.stopping_game_config.A1)
203
+
204
+ def manual_play(self) -> None:
205
+ """
206
+ An interactive loop to test the environment manually
207
+
208
+ :return: None
209
+ """
210
+ done = False
211
+ while True:
212
+ raw_input = input("> ")
213
+ raw_input = raw_input.strip()
214
+ if raw_input == "help":
215
+ print("Enter an action id to execute the action, "
216
+ "press R to reset,"
217
+ "press S to print the state, press A to print the actions, "
218
+ "press D to check if done"
219
+ "press H to print the history of actions")
220
+ elif raw_input == "A":
221
+ print(f"Action space: {self.action_space}")
222
+ elif raw_input == "S":
223
+ print(self.stopping_game_env.state)
224
+ elif raw_input == "D":
225
+ print(done)
226
+ elif raw_input == "H":
227
+ print(self.stopping_game_env.trace)
228
+ elif raw_input == "R":
229
+ print("Resetting the state")
230
+ self.reset()
231
+ else:
232
+ action_idx = int(raw_input)
233
+ _, _, done, _, _ = self.step(a1=action_idx)
File without changes