gym-csle-stopping-game 0.7.2__tar.gz → 0.7.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gym-csle-stopping-game might be problematic. Click here for more details.

Files changed (34) hide show
  1. {gym_csle_stopping_game-0.7.2 → gym_csle_stopping_game-0.7.3}/PKG-INFO +6 -6
  2. {gym_csle_stopping_game-0.7.2 → gym_csle_stopping_game-0.7.3}/pyproject.toml +5 -5
  3. {gym_csle_stopping_game-0.7.2 → gym_csle_stopping_game-0.7.3}/setup.cfg +5 -5
  4. gym_csle_stopping_game-0.7.3/src/gym_csle_stopping_game/__version__.py +1 -0
  5. {gym_csle_stopping_game-0.7.2 → gym_csle_stopping_game-0.7.3}/src/gym_csle_stopping_game/constants/constants.py +1 -0
  6. gym_csle_stopping_game-0.7.3/src/gym_csle_stopping_game/util/stopping_game_util.py +698 -0
  7. {gym_csle_stopping_game-0.7.2 → gym_csle_stopping_game-0.7.3}/src/gym_csle_stopping_game.egg-info/PKG-INFO +6 -6
  8. {gym_csle_stopping_game-0.7.2 → gym_csle_stopping_game-0.7.3}/src/gym_csle_stopping_game.egg-info/requires.txt +5 -5
  9. {gym_csle_stopping_game-0.7.2 → gym_csle_stopping_game-0.7.3}/tests/test_stopping_game_env.py +2 -2
  10. {gym_csle_stopping_game-0.7.2 → gym_csle_stopping_game-0.7.3}/tests/test_stopping_game_mdp_attacker_env.py +1 -1
  11. {gym_csle_stopping_game-0.7.2 → gym_csle_stopping_game-0.7.3}/tests/test_stopping_game_pomdp_defender_env.py +1 -1
  12. {gym_csle_stopping_game-0.7.2 → gym_csle_stopping_game-0.7.3}/tests/test_stopping_game_util.py +2 -2
  13. gym_csle_stopping_game-0.7.2/src/gym_csle_stopping_game/__version__.py +0 -1
  14. gym_csle_stopping_game-0.7.2/src/gym_csle_stopping_game/util/stopping_game_util.py +0 -388
  15. {gym_csle_stopping_game-0.7.2 → gym_csle_stopping_game-0.7.3}/LICENSE.md +0 -0
  16. {gym_csle_stopping_game-0.7.2 → gym_csle_stopping_game-0.7.3}/README.md +0 -0
  17. {gym_csle_stopping_game-0.7.2 → gym_csle_stopping_game-0.7.3}/setup.py +0 -0
  18. {gym_csle_stopping_game-0.7.2 → gym_csle_stopping_game-0.7.3}/src/gym_csle_stopping_game/__init__.py +0 -0
  19. {gym_csle_stopping_game-0.7.2 → gym_csle_stopping_game-0.7.3}/src/gym_csle_stopping_game/constants/__init__.py +0 -0
  20. {gym_csle_stopping_game-0.7.2 → gym_csle_stopping_game-0.7.3}/src/gym_csle_stopping_game/dao/__init__.py +0 -0
  21. {gym_csle_stopping_game-0.7.2 → gym_csle_stopping_game-0.7.3}/src/gym_csle_stopping_game/dao/stopping_game_attacker_mdp_config.py +0 -0
  22. {gym_csle_stopping_game-0.7.2 → gym_csle_stopping_game-0.7.3}/src/gym_csle_stopping_game/dao/stopping_game_config.py +0 -0
  23. {gym_csle_stopping_game-0.7.2 → gym_csle_stopping_game-0.7.3}/src/gym_csle_stopping_game/dao/stopping_game_defender_pomdp_config.py +0 -0
  24. {gym_csle_stopping_game-0.7.2 → gym_csle_stopping_game-0.7.3}/src/gym_csle_stopping_game/dao/stopping_game_state.py +0 -0
  25. {gym_csle_stopping_game-0.7.2 → gym_csle_stopping_game-0.7.3}/src/gym_csle_stopping_game/envs/__init__.py +0 -0
  26. {gym_csle_stopping_game-0.7.2 → gym_csle_stopping_game-0.7.3}/src/gym_csle_stopping_game/envs/stopping_game_env.py +0 -0
  27. {gym_csle_stopping_game-0.7.2 → gym_csle_stopping_game-0.7.3}/src/gym_csle_stopping_game/envs/stopping_game_mdp_attacker_env.py +0 -0
  28. {gym_csle_stopping_game-0.7.2 → gym_csle_stopping_game-0.7.3}/src/gym_csle_stopping_game/envs/stopping_game_pomdp_defender_env.py +0 -0
  29. {gym_csle_stopping_game-0.7.2 → gym_csle_stopping_game-0.7.3}/src/gym_csle_stopping_game/util/__init__.py +0 -0
  30. {gym_csle_stopping_game-0.7.2 → gym_csle_stopping_game-0.7.3}/src/gym_csle_stopping_game.egg-info/SOURCES.txt +0 -0
  31. {gym_csle_stopping_game-0.7.2 → gym_csle_stopping_game-0.7.3}/src/gym_csle_stopping_game.egg-info/dependency_links.txt +0 -0
  32. {gym_csle_stopping_game-0.7.2 → gym_csle_stopping_game-0.7.3}/src/gym_csle_stopping_game.egg-info/not-zip-safe +0 -0
  33. {gym_csle_stopping_game-0.7.2 → gym_csle_stopping_game-0.7.3}/src/gym_csle_stopping_game.egg-info/top_level.txt +0 -0
  34. {gym_csle_stopping_game-0.7.2 → gym_csle_stopping_game-0.7.3}/tests/test_stopping_game_dao.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: gym-csle-stopping-game
3
- Version: 0.7.2
3
+ Version: 0.7.3
4
4
  Summary: OpenAI gym reinforcement learning environment of a Dynkin (Optimal stopping) game in CSLE
5
5
  Author: Kim Hammar
6
6
  Author-email: hammar.kim@gmail.com
@@ -204,11 +204,11 @@ Classifier: Intended Audience :: Science/Research
204
204
  Requires-Python: >=3.8
205
205
  Description-Content-Type: text/markdown
206
206
  Requires-Dist: gymnasium>=0.27.1
207
- Requires-Dist: csle-base>=0.7.2
208
- Requires-Dist: csle-common>=0.7.2
209
- Requires-Dist: csle-attacker>=0.7.2
210
- Requires-Dist: csle-defender>=0.7.2
211
- Requires-Dist: csle-collector>=0.7.2
207
+ Requires-Dist: csle-base>=0.7.3
208
+ Requires-Dist: csle-common>=0.7.3
209
+ Requires-Dist: csle-attacker>=0.7.3
210
+ Requires-Dist: csle-defender>=0.7.3
211
+ Requires-Dist: csle-collector>=0.7.3
212
212
  Provides-Extra: test
213
213
  Requires-Dist: pytest>=6.0; extra == "test"
214
214
  Requires-Dist: pytest-cov>=2.0; extra == "test"
@@ -19,11 +19,11 @@ classifiers = [
19
19
  ]
20
20
  dependencies = [
21
21
  "gymnasium>=0.27.1",
22
- "csle-base>=0.7.2",
23
- "csle-common>=0.7.2",
24
- "csle-attacker>=0.7.2",
25
- "csle-defender>=0.7.2",
26
- "csle-collector>=0.7.2"
22
+ "csle-base>=0.7.3",
23
+ "csle-common>=0.7.3",
24
+ "csle-attacker>=0.7.3",
25
+ "csle-defender>=0.7.3",
26
+ "csle-collector>=0.7.3"
27
27
  ]
28
28
 
29
29
  [project.optional-dependencies]
@@ -20,11 +20,11 @@ classifiers =
20
20
  [options]
21
21
  install_requires =
22
22
  gymnasium>=0.27.1
23
- csle-base>=0.7.2
24
- csle-common>=0.7.2
25
- csle-attacker>=0.7.2
26
- csle-defender>=0.7.2
27
- csle-collector>=0.7.2
23
+ csle-base>=0.7.3
24
+ csle-common>=0.7.3
25
+ csle-attacker>=0.7.3
26
+ csle-defender>=0.7.3
27
+ csle-collector>=0.7.3
28
28
  python_requires = >=3.8
29
29
  package_dir =
30
30
  =src
@@ -0,0 +1 @@
1
+ __version__ = '0.7.3'
@@ -34,6 +34,7 @@ class ENV_METRICS:
34
34
  DEFENDER_ACTION = "a1"
35
35
  ATTACKER_ACTION = "a2"
36
36
  OBSERVATION = "o"
37
+ BELIEF = "b"
37
38
  TIME_STEP = "t"
38
39
  AVERAGE_DEFENDER_BASELINE_STOP_ON_FIRST_ALERT_RETURN = "average_defender_baseline_stop_on_first_alert_return"
39
40
  AVERAGE_UPPER_BOUND_RETURN = "average_upper_bound_return"
@@ -0,0 +1,698 @@
1
+ from typing import Any, Tuple
2
+ import itertools
3
+ import numpy as np
4
+ import numpy.typing as npt
5
+ from scipy.stats import betabinom
6
+ from gym_csle_stopping_game.dao.stopping_game_config import StoppingGameConfig
7
+ from csle_common.dao.training.policy import Policy
8
+
9
+
10
+ class StoppingGameUtil:
11
+ """
12
+ Class with utility functions for the StoppingGame Environment
13
+ """
14
+
15
+ @staticmethod
16
+ def b1() -> npt.NDArray[np.float64]:
17
+ """
18
+ Gets the initial belief
19
+
20
+ :return: the initial belief
21
+ """
22
+ return np.array([1.0, 0.0, 0.0])
23
+
24
+ @staticmethod
25
+ def state_space():
26
+ """
27
+ Gets the state space
28
+
29
+ :return: the state space of the game
30
+ """
31
+ return np.array([0, 1, 2])
32
+
33
+ @staticmethod
34
+ def defender_actions() -> npt.NDArray[np.int_]:
35
+ """
36
+ Gets the action space of the defender
37
+
38
+ :return: the action space of the defender
39
+ """
40
+ return np.array([0, 1])
41
+
42
+ @staticmethod
43
+ def attacker_actions() -> npt.NDArray[np.int_]:
44
+ """
45
+ Gets the action space of the attacker
46
+
47
+ :return: the action space of the attacker
48
+ """
49
+ return np.array([0, 1])
50
+
51
+ @staticmethod
52
+ def observation_space(n):
53
+ """
54
+ Returns the observation space of size n
55
+
56
+ :param n: the maximum observation
57
+ :return: the observation space
58
+ """
59
+ return np.array(list(range(n + 1)))
60
+
61
+ @staticmethod
62
+ def reward_tensor(R_SLA: int, R_INT: int, R_COST: int, L: int, R_ST: int) -> npt.NDArray[Any]:
63
+ """
64
+ Gets the reward tensor
65
+
66
+ :param R_SLA: the R_SLA constant
67
+ :param R_INT: the R_INT constant
68
+ :param R_COST: the R_COST constant
69
+ :param R_ST: the R_ST constant
70
+ :return: a |L|x|A1|x|A2|x|S| tensor
71
+ """
72
+
73
+ R_l = []
74
+ for l in range(1, L + 1):
75
+ R = [
76
+ # Defender continues
77
+ [
78
+ # Attacker continues
79
+ [R_SLA, R_SLA + R_INT, 0],
80
+ # Attacker stops
81
+ [R_SLA, R_SLA, 0]
82
+ ],
83
+ # Defender stops
84
+ [
85
+ # Attacker continues
86
+ [R_COST / l, R_ST / l, 0],
87
+ # Attacker stops
88
+ [R_COST / l, R_SLA, 0]
89
+ ]
90
+ ]
91
+ R_l.append(R)
92
+ return np.array(R_l)
93
+
94
+ @staticmethod
95
+ def transition_tensor(L: int) -> npt.NDArray[Any]:
96
+ """
97
+ Gets the transition tensor
98
+
99
+ :param L: the maximum number of stop actions
100
+ :return: a |L|x|A1|x|A2||S|^2 tensor
101
+ """
102
+ T_l = []
103
+ for l in range(1, L + 1):
104
+ if l == 1:
105
+ T = [
106
+ # Defender continues
107
+ [
108
+ # Attacker continues
109
+ [
110
+ [1.0, 0.0, 0.0], # No intrusion
111
+ [0.0, 1.0, 0.0], # Intrusion
112
+ [0.0, 0.0, 1.0] # Terminal
113
+ ],
114
+ # Attacker stops
115
+ [
116
+ [0.0, 1.0, 0.0], # No intrusion
117
+ [0.0, 0.0, 1.0], # Intrusion
118
+ [0.0, 0.0, 1.0] # Terminal
119
+ ]
120
+ ],
121
+
122
+ # Defender stops
123
+ [
124
+ # Attacker continues
125
+ [
126
+ [0.0, 0.0, 1.0], # No intrusion
127
+ [0.0, 0.0, 1.0], # Intrusion
128
+ [0.0, 0.0, 1.0] # Terminal
129
+ ],
130
+ # Attacker stops
131
+ [
132
+ [0.0, 0.0, 1.0], # No Intrusion
133
+ [0.0, 0.0, 1.0], # Intrusion
134
+ [0.0, 0.0, 1.0] # Terminal
135
+ ]
136
+ ]
137
+ ]
138
+ else:
139
+ T = [
140
+ # Defender continues
141
+ [
142
+ # Attacker continues
143
+ [
144
+ [1.0, 0.0, 0.0], # No intrusion
145
+ [0.0, 1.0 - 1.0 / (2.0 * l), 1.0 / (2.0 * l)], # Intrusion
146
+ [0.0, 0.0, 1.0] # Terminal
147
+ ],
148
+ # Attacker stops
149
+ [
150
+ [0.0, 1.0, 0.0], # No intrusion
151
+ [0.0, 0.0, 1.0], # Intrusion
152
+ [0.0, 0.0, 1.0] # Terminal
153
+ ]
154
+ ],
155
+
156
+ # Defender stops
157
+ [
158
+ # Attacker continues
159
+ [
160
+ [1.0, 0.0, 0.0], # No intrusion
161
+ [0.0, 1.0 - 1.0 / (2.0 * l), 1.0 / (2.0 * l)], # Intrusion
162
+ [0.0, 0.0, 1.0] # Terminal
163
+ ],
164
+ # Attacker stops
165
+ [
166
+ [0.0, 1.0, 0.0], # No Intrusion
167
+ [0.0, 0.0, 1.0], # Intrusion
168
+ [0.0, 0.0, 1.0] # Terminal
169
+ ]
170
+ ]
171
+ ]
172
+ T_l.append(T)
173
+ return np.array(T_l)
174
+
175
+ @staticmethod
176
+ def observation_tensor(n):
177
+ """
178
+ :return: a |A1|x|A2|x|S|x|O| tensor
179
+ """
180
+ intrusion_dist = []
181
+ no_intrusion_dist = []
182
+ terminal_dist = np.zeros(n + 1)
183
+ terminal_dist[-1] = 1
184
+ intrusion_rv = betabinom(n=n, a=1, b=0.7)
185
+ no_intrusion_rv = betabinom(n=n, a=0.7, b=3)
186
+ for i in range(n + 1):
187
+ intrusion_dist.append(intrusion_rv.pmf(i))
188
+ no_intrusion_dist.append(no_intrusion_rv.pmf(i))
189
+ Z = np.array(
190
+ [
191
+ [
192
+ [
193
+ no_intrusion_dist,
194
+ intrusion_dist,
195
+ terminal_dist
196
+ ],
197
+ [
198
+ no_intrusion_dist,
199
+ intrusion_dist,
200
+ terminal_dist
201
+ ],
202
+ ],
203
+ [
204
+ [
205
+ no_intrusion_dist,
206
+ intrusion_dist,
207
+ terminal_dist
208
+ ],
209
+ [
210
+ no_intrusion_dist,
211
+ intrusion_dist,
212
+ terminal_dist
213
+ ],
214
+ ]
215
+ ]
216
+ )
217
+ return Z
218
+
219
+ @staticmethod
220
+ def sample_next_state(T: npt.NDArray[Any], l: int, s: int, a1: int, a2: int, S: npt.NDArray[np.int_]) -> int:
221
+ """
222
+ Samples the next state
223
+
224
+ :param T: the transition operator
225
+ :param s: the currrent state
226
+ :param a1: the defender action
227
+ :param a2: the attacker action
228
+ :param S: the state space
229
+ :param l: the number of stops remaining
230
+ :return: s'
231
+ """
232
+ state_probs = []
233
+ for s_prime in S:
234
+ state_probs.append(T[l - 1][a1][a2][s][s_prime])
235
+ return int(np.random.choice(np.arange(0, len(S)), p=state_probs))
236
+
237
+ @staticmethod
238
+ def sample_initial_state(b1: npt.NDArray[np.float64]) -> int:
239
+ """
240
+ Samples the initial state
241
+
242
+ :param b1: the initial belief
243
+ :return: s1
244
+ """
245
+ return int(np.random.choice(np.arange(0, len(b1)), p=b1))
246
+
247
+ @staticmethod
248
+ def sample_next_observation(Z: npt.NDArray[Any], s_prime: int, O: npt.NDArray[np.int_]) -> int:
249
+ """
250
+ Samples the next observation
251
+
252
+ :param Z: observation tensor which include the observation probables
253
+ :param s_prime: the new state
254
+ :param O: the observation space
255
+ :return: o
256
+ """
257
+ observation_probs = []
258
+ for o in O:
259
+ if len(Z.shape) == 4:
260
+ observation_probs.append(Z[0][0][s_prime][o])
261
+ elif len(Z.shape) == 3:
262
+ observation_probs.append(Z[0][s_prime][o])
263
+ elif len(Z.shape) == 2:
264
+ observation_probs.append(Z[s_prime][o])
265
+ o = np.random.choice(np.arange(0, len(O)), p=observation_probs)
266
+ return int(o)
267
+
268
+ @staticmethod
269
+ def bayes_filter(s_prime: int, o: int, a1: int, b: npt.NDArray[np.float64], pi2: npt.NDArray[Any], l: int,
270
+ config: StoppingGameConfig) -> float:
271
+ """
272
+ A Bayesian filter to compute the belief of player 1
273
+ of being in s_prime when observing o after taking action a in belief b given that the opponent follows
274
+ strategy pi2
275
+
276
+ :param s_prime: the state to compute the belief of
277
+ :param o: the observation
278
+ :param a1: the action of player 1
279
+ :param b: the current belief point
280
+ :param pi2: the policy of player 2
281
+ :param l: stops remaining
282
+ :return: b_prime(s_prime)
283
+ """
284
+ l = l - 1
285
+ norm = 0
286
+ for s in config.S:
287
+ for a2 in config.A2:
288
+ for s_prime_1 in config.S:
289
+ prob_1 = config.Z[a1][a2][s_prime_1][o]
290
+ norm += b[s] * prob_1 * config.T[l][a1][a2][s][s_prime_1] * pi2[s][a2]
291
+ if norm == 0:
292
+ return 0
293
+ temp = 0
294
+
295
+ for s in config.S:
296
+ for a2 in config.A2:
297
+ temp += config.Z[a1][a2][s_prime][o] * config.T[l][a1][a2][s][s_prime] * b[s] * pi2[s][a2]
298
+ b_prime_s_prime = temp / norm
299
+ if round(b_prime_s_prime, 2) > 1:
300
+ print(f"b_prime_s_prime >= 1: {b_prime_s_prime}, a1:{a1}, s_prime:{s_prime}, l:{l}, o:{o}, pi2:{pi2}")
301
+ assert round(b_prime_s_prime, 2) <= 1
302
+ if s_prime == 2 and o != config.O[-1]:
303
+ assert round(b_prime_s_prime, 2) <= 0.01
304
+ return float(b_prime_s_prime)
305
+
306
+ @staticmethod
307
+ def next_belief(o: int, a1: int, b: npt.NDArray[np.float64], pi2: npt.NDArray[Any],
308
+ config: StoppingGameConfig, l: int, a2: int = 0, s: int = 0) -> npt.NDArray[np.float64]:
309
+ """
310
+ Computes the next belief using a Bayesian filter
311
+
312
+ :param o: the latest observation
313
+ :param a1: the latest action of player 1
314
+ :param b: the current belief
315
+ :param pi2: the policy of player 2
316
+ :param config: the game config
317
+ :param l: stops remaining
318
+ :param a2: the attacker action (for debugging, should be consistent with pi2)
319
+ :param s: the true state (for debugging)
320
+ :return: the new belief
321
+ """
322
+ b_prime = np.zeros(len(config.S))
323
+ for s_prime in config.S:
324
+ b_prime[s_prime] = StoppingGameUtil.bayes_filter(s_prime=s_prime, o=o, a1=a1, b=b,
325
+ pi2=pi2, config=config, l=l)
326
+ if round(sum(b_prime), 2) != 1:
327
+ print(f"error, b_prime:{b_prime}, o:{o}, a1:{a1}, b:{b}, pi2:{pi2}, "
328
+ f"a2: {a2}, s:{s}")
329
+ assert round(sum(b_prime), 2) == 1
330
+ return b_prime
331
+
332
+ @staticmethod
333
+ def sample_attacker_action(pi2: npt.NDArray[Any], s: int) -> int:
334
+ """
335
+ Samples the attacker action
336
+
337
+ :param pi2: the attacker policy
338
+ :param s: the game state
339
+ :return: a2 is the attacker action
340
+ """
341
+ return int(np.random.choice(np.arange(0, len(pi2[s])), p=pi2[s]))
342
+
343
+ @staticmethod
344
+ def pomdp_solver_file(config: StoppingGameConfig, discount_factor: float, pi2: npt.NDArray[Any]) -> str:
345
+ """
346
+ Gets the POMDP environment specification based on the format at http://www.pomdp.org/code/index.html,
347
+ for the defender's local problem against a static attacker
348
+
349
+ :param config: the POMDP config
350
+ :param discount_factor: the discount factor
351
+ :param pi2: the attacker strategy
352
+ :return: the file content as a string
353
+ """
354
+ file_str = ""
355
+ file_str = file_str + f"discount: {discount_factor}\n\n"
356
+ file_str = file_str + "values: reward\n\n"
357
+ file_str = file_str + f"states: {len(config.S)}\n\n"
358
+ file_str = file_str + f"actions: {len(config.A1)}\n\n"
359
+ file_str = file_str + f"observations: {len(config.O)}\n\n"
360
+ initial_belief_str = " ".join(list(map(lambda x: str(x), config.b1)))
361
+ file_str = file_str + f"start: {initial_belief_str}\n\n\n"
362
+ num_transitions = 0
363
+ for s in config.S:
364
+ for a1 in config.A1:
365
+ probs = []
366
+ for s_prime in range(len(config.S)):
367
+ num_transitions += 1
368
+ prob = 0
369
+ for a2 in config.A2:
370
+ prob += config.T[0][a1][a2][s][s_prime] * pi2[s][a2]
371
+ file_str = file_str + f"T: {a1} : {s} : {s_prime} {prob:.80f}\n"
372
+ probs.append(prob)
373
+ assert round(sum(probs), 3) == 1
374
+ file_str = file_str + "\n\n"
375
+ for a1 in config.A1:
376
+ for s_prime in config.S:
377
+ probs = []
378
+ for o in range(len(config.O)):
379
+ prob = config.Z[0][0][s_prime][o]
380
+ file_str = file_str + f"O : {a1} : {s_prime} : {o} {prob:.80f}\n"
381
+ probs.append(prob)
382
+ assert round(sum(probs), 3) == 1
383
+ file_str = file_str + "\n\n"
384
+ for s in config.S:
385
+ for a1 in config.A1:
386
+ for s_prime in config.S:
387
+ for o in config.O:
388
+ r = config.R[0][a1][0][s]
389
+ file_str = file_str + f"R: {a1} : {s} : {s_prime} : {o} {r:.80f}\n"
390
+ return file_str
391
+
392
+ @staticmethod
393
+ def reduce_T_attacker(T: npt.NDArray[np.float_], strategy: Policy) -> npt.NDArray[np.float_]:
394
+ """
395
+ Reduces the transition tensor based on a given attacker strategy
396
+
397
+ :param T: the tensor to reduce
398
+ :param strategy: the strategy to use for the reduction
399
+ :return: the reduced tensor (|A1|x|S|x|S|)
400
+ """
401
+ if len(T.shape) == 5:
402
+ T = T[0]
403
+ reduced_T = np.zeros((T.shape[0], T.shape[2], T.shape[3]))
404
+ for i in range(T.shape[0]):
405
+ for j in range(T.shape[2]):
406
+ for k in range(T.shape[3]):
407
+ reduced_T[i][j][k] = T[i][0][j][k] * strategy.probability(j, 0) + T[i][1][j][
408
+ k] * strategy.probability(j, 1)
409
+ # if j == 0:
410
+ # reduced_T[i][j][k] = T[i][0][j][k] * strategy.probability(j, 0) + T[i][1][j][
411
+ # k] * strategy.probability(j, 1)
412
+ # else:
413
+ # reduced_T[i][j][k] = (T[i][0][j][k] * (1 - strategy.probability(j, 0)) + T[i][1][j][k] *
414
+ # strategy.probability(j, 1))
415
+ return reduced_T
416
+
417
+ @staticmethod
418
+ def reduce_R_attacker(R: npt.NDArray[np.float_], strategy: Policy) -> npt.NDArray[np.float_]:
419
+ """
420
+ Reduces the reward tensor based on a given attacker strategy
421
+
422
+ :param R: the reward tensor to reduce
423
+ :param strategy: the strategy to use for the reduction
424
+ :return: the reduced reward tensor (|A1|x|S|)
425
+ """
426
+ if len(R.shape) == 4:
427
+ R = R[0]
428
+ reduced_R = np.zeros((R.shape[0], R.shape[2]))
429
+ for i in range(R.shape[0]):
430
+ for j in range(R.shape[2]):
431
+ reduced_R[i][j] = (R[i][0][j] * strategy.probability(j, 0) + R[i][1][j] *
432
+ strategy.probability(j, 1))
433
+ return reduced_R
434
+
435
+ @staticmethod
436
+ def reduce_Z_attacker(Z: npt.NDArray[np.float_], strategy: Policy) -> npt.NDArray[np.float_]:
437
+ """
438
+ Reduces the observation tensor based on a given attacker strategy
439
+
440
+ :param Z: the observation tensor to reduce
441
+ :param strategy: the strategy to use for the reduction
442
+ :return: the reduced observation tensor (|A1|x|S|x|O|)
443
+ """
444
+ reduced_Z = np.zeros((Z.shape[0], Z.shape[2], Z.shape[3]))
445
+ for i in range(Z.shape[0]):
446
+ for j in range(Z.shape[2]):
447
+ for k in range(Z.shape[3]):
448
+ reduced_Z[i][j][k] = Z[i][0][j][k] * strategy.probability(j, 0) + Z[i][1][j][
449
+ k] * strategy.probability(j, 1)
450
+ return reduced_Z
451
+
452
+ @staticmethod
453
+ def reduce_T_defender(T: npt.NDArray[np.float_], strategy: Policy) -> npt.NDArray[np.float_]:
454
+ """
455
+ Reduces the transition tensor based on a given defender strategy
456
+
457
+ :param T: the tensor to reduce
458
+ :param strategy: the strategy to use for the reduction
459
+ :return: the reduced tensor (|A2|x|S|x|S|)
460
+ """
461
+ if len(T.shape) == 5:
462
+ T = T[0]
463
+ reduced_T = np.zeros((T.shape[1], T.shape[2], T.shape[3]))
464
+ for i in range(T.shape[1]):
465
+ for j in range(T.shape[2]):
466
+ for k in range(T.shape[3]):
467
+ reduced_T[i][j][k] = (T[0][i][j][k] * strategy.probability(j, 0) + T[1][i][j][k]
468
+ * strategy.probability(j, 1))
469
+ return reduced_T
470
+
471
+ @staticmethod
472
+ def reduce_R_defender(R: npt.NDArray[np.float_], strategy: Policy) -> npt.NDArray[np.float_]:
473
+ """
474
+ Reduces the reward tensor based on a given defender strategy
475
+
476
+ :param R: the reward tensor to reduce
477
+ :param strategy: the strategy to use for the reduction
478
+ :return: the reduced reward tensor (|A2|x|S|)
479
+ """
480
+ if len(R.shape) == 4:
481
+ R = R[0]
482
+ reduced_R = np.zeros((R.shape[1], R.shape[2]))
483
+ for i in range(R.shape[1]):
484
+ for j in range(R.shape[2]):
485
+ reduced_R[i][j] = (R[0][i][j] * strategy.probability(j, 0) + R[1][i][j] *
486
+ strategy.probability(j, 1))
487
+ return reduced_R
488
+
489
+ @staticmethod
490
+ def aggregate_belief_mdp_defender(aggregation_resolution: int, T: npt.NDArray[np.float_],
491
+ R: npt.NDArray[np.float_], Z: npt.NDArray[np.float_],
492
+ S: npt.NDArray[np.int_], A: npt.NDArray[np.int_], O: npt.NDArray[np.int_]) \
493
+ -> Tuple[npt.NDArray[np.float_], npt.NDArray[np.int_], npt.NDArray[np.float_], npt.NDArray[np.float_]]:
494
+ """
495
+ Generates an aggregate belief MDP from a given POMDP specification and aggregation resolution
496
+
497
+ :param aggregation_resolution: the belief aggregation resolution
498
+ :param T: the transition tensor of the POMDP
499
+ :param R: the reward tensor of the POMDP
500
+ :param Z: the observation tensor of the POMDP
501
+ :param S: the state space of the POMDP
502
+ :param A: the action space of the POMDP
503
+ :param O: the observation space of the POMDP
504
+ :return: the state space, action space, transition operator, and belief operator of the belief MDP
505
+ """
506
+ aggregate_belief_space = StoppingGameUtil.generate_aggregate_belief_space(
507
+ n=aggregation_resolution, belief_space_dimension=len(S))
508
+ belief_T = StoppingGameUtil.generate_aggregate_belief_transition_operator(
509
+ aggregate_belief_space=aggregate_belief_space, S=S, A=A, O=O, T=T, Z=Z)
510
+ belief_R = StoppingGameUtil.generate_aggregate_belief_reward_tensor(
511
+ aggregate_belief_space=aggregate_belief_space, S=S, A=A, R=R)
512
+ return aggregate_belief_space, A, belief_T, belief_R
513
+
514
+ @staticmethod
515
+ def generate_aggregate_belief_space(n: int, belief_space_dimension: int) -> npt.NDArray[np.float_]:
516
+ """
517
+ Generate an aggregate belief space B_n.
518
+
519
+ :param n: the aggregation resolution
520
+ :param belief_space_dimension: the belief space dimension
521
+ :return: the aggregate belief space
522
+ """
523
+
524
+ # Generate all combinations of integer allocations k_i such that sum(k_i) = n
525
+ combinations = [k for k in itertools.product(range(n + 1), repeat=belief_space_dimension) if sum(k) == n]
526
+
527
+ # Convert integer allocations to belief points by dividing each k_i by n
528
+ belief_points = [list(k_i / n for k_i in k) for k in combinations]
529
+
530
+ # Remove all beliefs that violate the stopping dynamics
531
+ belief_points = list(filter(lambda x: x[-1] == 1.0 or x[-1] == 0.0, belief_points))
532
+
533
+ return np.array(belief_points)
534
+
535
+ @staticmethod
536
+ def generate_aggregate_belief_reward_tensor(
537
+ aggregate_belief_space: npt.NDArray[np.float_], S: npt.NDArray[np.int_], A: npt.NDArray[np.int_],
538
+ R: npt.NDArray[np.float_]) -> npt.NDArray[np.float_]:
539
+ """
540
+ Generates an aggregate reward tensor for the aggregate belief MDP
541
+
542
+ :param aggregate_belief_space: the aggregate belief space
543
+ :param S: the state space of the POMDP
544
+ :param A: the action space of the POMDP
545
+ :param R: the reward tensor of the POMDP
546
+ :return: the reward tensor of the aggregate belief MDP
547
+ """
548
+ belief_R = np.zeros((len(A), len(aggregate_belief_space)))
549
+ belief_space_list = aggregate_belief_space.tolist()
550
+ for a in A:
551
+ for b in aggregate_belief_space:
552
+ expected_reward = 0
553
+ for s in S:
554
+ expected_reward += R[a][s] * b[s]
555
+ belief_R[a][belief_space_list.index(b.tolist())] = expected_reward
556
+ return belief_R
557
+
558
+ @staticmethod
559
+ def generate_aggregate_belief_transition_operator(
560
+ aggregate_belief_space: npt.NDArray[np.float_], S: npt.NDArray[np.int_], A: npt.NDArray[np.int_],
561
+ O: npt.NDArray[np.int_], T: npt.NDArray[np.float_], Z: npt.NDArray[np.float_]) -> npt.NDArray[np.float_]:
562
+ """
563
+ Generates an aggregate belief space transition operator
564
+
565
+ :param aggregate_belief_space: the aggregate belief space
566
+ :param O: the observation space of the POMDP
567
+ :param S: the state space of the POMDP
568
+ :param A: the action space of the POMDP
569
+ :param T: the transition operator of the POMDP
570
+ :param Z: the observation tensor of the POMDP
571
+ :return: the aggregate belief space operator
572
+ """
573
+ belief_space_list = aggregate_belief_space.tolist()
574
+ belief_T = np.zeros((len(A), len(aggregate_belief_space), len(aggregate_belief_space)))
575
+ for a in A:
576
+ for b1 in aggregate_belief_space:
577
+ for b2 in aggregate_belief_space:
578
+ belief_T[a][belief_space_list.index(b1.tolist())][belief_space_list.index(b2.tolist())] \
579
+ = StoppingGameUtil.aggregate_belief_transition_probability(
580
+ b1=b1, b2=b2, a=a, S=S, O=O, T=T, Z=Z, aggregate_belief_space=aggregate_belief_space, A=A)
581
+ return belief_T
582
+
583
+ @staticmethod
584
+ def aggregate_belief_transition_probability(b1: npt.NDArray[np.float_], b2: npt.NDArray[np.float_], a: int,
585
+ S: npt.NDArray[np.int_], O: npt.NDArray[np.int_],
586
+ A: npt.NDArray[np.int_],
587
+ T: npt.NDArray[np.float_], Z: npt.NDArray[np.float_],
588
+ aggregate_belief_space: npt.NDArray[np.float_]) -> float:
589
+ """
590
+ Calculates the probability of transitioning from belief b1 to belief b2 when taking action a
591
+
592
+ :param b1: the source belief
593
+ :param b2: the target belief
594
+ :param a: the action
595
+ :param S: the state space of the POMDP
596
+ :param O: the observation space of the POMDP
597
+ :param A: the action space of the POMDP
598
+ :param T: the transition operator
599
+ :param Z: the observation tensor
600
+ :param aggregate_belief_space: the aggregate belief space
601
+ :return: the probability P(b2 | b1, a)
602
+ """
603
+ prob = 0
604
+ for o in O:
605
+ if sum([Z[a][s_prime][o] * b1[s] * T[a][s][s_prime] for s in S for s_prime in S]) == 0:
606
+ continue
607
+ b_prime = StoppingGameUtil.pomdp_next_belief(
608
+ o=o, a=a, b=b1, states=S, observations=O, observation_tensor=Z, transition_tensor=T)
609
+ nearest_neighbor = StoppingGameUtil.find_nearest_neighbor_belief(belief_space=aggregate_belief_space,
610
+ target_belief=b_prime)
611
+ if np.array_equal(nearest_neighbor, b2):
612
+ for s in S:
613
+ for s_prime in S:
614
+ prob += Z[a][s_prime][o] * b1[s] * T[a][s][s_prime]
615
+ return prob
616
+
617
+ @staticmethod
618
+ def pomdp_next_belief(o: int, a: int, b: npt.NDArray[np.float64], states: npt.NDArray[np.int_],
619
+ observations: npt.NDArray[np.int_], observation_tensor: npt.NDArray[np.float_],
620
+ transition_tensor: npt.NDArray[np.float_]) \
621
+ -> npt.NDArray[np.float64]:
622
+ """
623
+ Computes the next belief of the POMDP using a Bayesian filter
624
+
625
+ :param o: the latest observation
626
+ :param a: the latest action of player 1
627
+ :param b: the current belief
628
+ :param states: the list of states
629
+ :param observations: the list of observations
630
+ :param observation_tensor: the observation tensor
631
+ :param transition_tensor: the transition tensor
632
+ :return: the new belief
633
+ """
634
+ b_prime = [0.0] * len(states)
635
+ for s_prime in states:
636
+ b_prime[s_prime] = StoppingGameUtil.pomdp_bayes_filter(
637
+ s_prime=s_prime, o=o, a=a, b=b, states=states, observations=observations,
638
+ transition_tensor=transition_tensor, observation_tensor=observation_tensor)
639
+ if round(sum(b_prime), 2) != 1:
640
+ print(f"error, b_prime:{b_prime}, o:{o}, a:{a}, b:{b}")
641
+ assert round(sum(b_prime), 2) == 1
642
+ return np.array(b_prime)
643
+
644
+ @staticmethod
645
+ def pomdp_bayes_filter(s_prime: int, o: int, a: int, b: npt.NDArray[np.float64], states: npt.NDArray[np.int_],
646
+ observations: npt.NDArray[np.int_], observation_tensor: npt.NDArray[np.float_],
647
+ transition_tensor: npt.NDArray[np.float_]) -> float:
648
+ """
649
+ A Bayesian filter to compute b[s_prime] of the POMDP
650
+
651
+ :param s_prime: the state to compute the belief for
652
+ :param o: the latest observation
653
+ :param a: the latest action
654
+ :param b: the current belief
655
+ :param states: the list of states
656
+ :param observations: the list of observations
657
+ :param observation_tensor: the observation tensor
658
+ :param transition_tensor: the transition tensor of the POMDP
659
+ :return: b[s_prime]
660
+ """
661
+ norm = 0.0
662
+ for s in states:
663
+ for s_prime_1 in states:
664
+ prob_1 = observation_tensor[a][s_prime_1][o]
665
+ norm += b[s] * prob_1 * transition_tensor[a][s][s_prime_1]
666
+ if norm == 0.0:
667
+ print(f"zero norm, a: {a}, b: {b}, o: {o}")
668
+ return 0.0
669
+ temp = 0.0
670
+
671
+ for s in states:
672
+ temp += observation_tensor[a][s_prime][o] * transition_tensor[a][s][s_prime] * b[s]
673
+ b_prime_s_prime = temp / norm
674
+ if round(b_prime_s_prime, 2) > 1:
675
+ print(f"b_prime_s_prime >= 1: {b_prime_s_prime}, a1:{a}, s_prime:{s_prime}")
676
+ assert round(b_prime_s_prime, 2) <= 1
677
+ if s_prime == 2 and o != observations[-1]:
678
+ assert round(b_prime_s_prime, 2) <= 0.01
679
+ return b_prime_s_prime
680
+
681
+ @staticmethod
682
+ def find_nearest_neighbor_belief(belief_space: npt.NDArray[np.float_], target_belief: npt.NDArray[np.float_]) \
683
+ -> npt.NDArray[np.float_]:
684
+ """
685
+ Finds the nearest neighbor (in the Euclidean sense) of a given belief in a certain belief space
686
+
687
+ :param belief_space: the belief to search from
688
+ :param target_belief: the belief to find the nearest neighbor of
689
+ :return: the nearest neighbor belief from the belief space
690
+ """
691
+
692
+ # Compute Euclidean distances between the target belief and all points in the belief space
693
+ distances = np.linalg.norm(belief_space - target_belief, axis=1)
694
+
695
+ # Find the index of the minimum distance (break ties consistently by choosing the smallest index)
696
+ nearest_index = int(np.argmin(distances))
697
+
698
+ return np.array(belief_space[nearest_index])
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: gym-csle-stopping-game
3
- Version: 0.7.2
3
+ Version: 0.7.3
4
4
  Summary: OpenAI gym reinforcement learning environment of a Dynkin (Optimal stopping) game in CSLE
5
5
  Author: Kim Hammar
6
6
  Author-email: hammar.kim@gmail.com
@@ -204,11 +204,11 @@ Classifier: Intended Audience :: Science/Research
204
204
  Requires-Python: >=3.8
205
205
  Description-Content-Type: text/markdown
206
206
  Requires-Dist: gymnasium>=0.27.1
207
- Requires-Dist: csle-base>=0.7.2
208
- Requires-Dist: csle-common>=0.7.2
209
- Requires-Dist: csle-attacker>=0.7.2
210
- Requires-Dist: csle-defender>=0.7.2
211
- Requires-Dist: csle-collector>=0.7.2
207
+ Requires-Dist: csle-base>=0.7.3
208
+ Requires-Dist: csle-common>=0.7.3
209
+ Requires-Dist: csle-attacker>=0.7.3
210
+ Requires-Dist: csle-defender>=0.7.3
211
+ Requires-Dist: csle-collector>=0.7.3
212
212
  Provides-Extra: test
213
213
  Requires-Dist: pytest>=6.0; extra == "test"
214
214
  Requires-Dist: pytest-cov>=2.0; extra == "test"
@@ -1,9 +1,9 @@
1
1
  gymnasium>=0.27.1
2
- csle-base>=0.7.2
3
- csle-common>=0.7.2
4
- csle-attacker>=0.7.2
5
- csle-defender>=0.7.2
6
- csle-collector>=0.7.2
2
+ csle-base>=0.7.3
3
+ csle-common>=0.7.3
4
+ csle-attacker>=0.7.3
5
+ csle-defender>=0.7.3
6
+ csle-collector>=0.7.3
7
7
 
8
8
  [test]
9
9
  pytest>=6.0
@@ -24,7 +24,7 @@ class TestStoppingGameEnvSuite:
24
24
  :return: None
25
25
  """
26
26
  env_name = "test_env"
27
- T = StoppingGameUtil.transition_tensor(L=3, p=0)
27
+ T = StoppingGameUtil.transition_tensor(L=3)
28
28
  O = StoppingGameUtil.observation_space(n=100)
29
29
  Z = StoppingGameUtil.observation_tensor(n=100)
30
30
  R = np.zeros((2, 3, 3, 3))
@@ -70,7 +70,7 @@ class TestStoppingGameEnvSuite:
70
70
 
71
71
  :return: None
72
72
  """
73
- T = StoppingGameUtil.transition_tensor(L=3, p=0)
73
+ T = StoppingGameUtil.transition_tensor(L=3)
74
74
  O = StoppingGameUtil.observation_space(n=100)
75
75
  A1 = StoppingGameUtil.defender_actions()
76
76
  A2 = StoppingGameUtil.attacker_actions()
@@ -29,7 +29,7 @@ class TestStoppingGameMdpAttackerEnvSuite:
29
29
  :return: None
30
30
  """
31
31
  env_name = "test_env"
32
- T = StoppingGameUtil.transition_tensor(L=3, p=0)
32
+ T = StoppingGameUtil.transition_tensor(L=3)
33
33
  O = StoppingGameUtil.observation_space(n=100)
34
34
  Z = StoppingGameUtil.observation_tensor(n=100)
35
35
  R = np.zeros((2, 3, 3, 3))
@@ -29,7 +29,7 @@ class TestStoppingGamePomdpDefenderEnvSuite:
29
29
  :return: None
30
30
  """
31
31
  env_name = "test_env"
32
- T = StoppingGameUtil.transition_tensor(L=3, p=0)
32
+ T = StoppingGameUtil.transition_tensor(L=3)
33
33
  O = StoppingGameUtil.observation_space(n=100)
34
34
  Z = StoppingGameUtil.observation_tensor(n=100)
35
35
  R = np.zeros((2, 3, 3, 3))
@@ -73,7 +73,7 @@ class TestStoppingGameUtilSuite(object):
73
73
  :return: None
74
74
  """
75
75
  l = 6
76
- example_transition_tensor = StoppingGameUtil.transition_tensor(L=l, p=0.1)
76
+ example_transition_tensor = StoppingGameUtil.transition_tensor(L=l)
77
77
  for i in range(l):
78
78
  for j in range(2):
79
79
  for k in range(2):
@@ -105,7 +105,7 @@ class TestStoppingGameUtilSuite(object):
105
105
  :return: None
106
106
  """
107
107
  example_sample_next_state = StoppingGameUtil.sample_next_state(
108
- T=example_stopping_game_util.transition_tensor(L=3, p=0.1), l=3, s=2, a1=1, a2=1,
108
+ T=example_stopping_game_util.transition_tensor(L=3), l=3, s=2, a1=1, a2=1,
109
109
  S=example_stopping_game_util.state_space())
110
110
  assert example_sample_next_state in example_stopping_game_util.state_space()
111
111
 
@@ -1 +0,0 @@
1
- __version__ = '0.7.2'
@@ -1,388 +0,0 @@
1
- from typing import Any
2
- import numpy as np
3
- import numpy.typing as npt
4
- from scipy.stats import betabinom
5
- from gym_csle_stopping_game.dao.stopping_game_config import StoppingGameConfig
6
-
7
-
8
- class StoppingGameUtil:
9
- """
10
- Class with utility functions for the StoppingGame Environment
11
- """
12
-
13
- @staticmethod
14
- def b1() -> npt.NDArray[np.float64]:
15
- """
16
- Gets the initial belief
17
-
18
- :return: the initial belief
19
- """
20
- return np.array([1.0, 0.0, 0.0])
21
-
22
- @staticmethod
23
- def state_space():
24
- """
25
- Gets the state space
26
-
27
- :return: the state space of the game
28
- """
29
- return np.array([0, 1, 2])
30
-
31
- @staticmethod
32
- def defender_actions() -> npt.NDArray[np.int_]:
33
- """
34
- Gets the action space of the defender
35
-
36
- :return: the action space of the defender
37
- """
38
- return np.array([0, 1])
39
-
40
- @staticmethod
41
- def attacker_actions() -> npt.NDArray[np.int_]:
42
- """
43
- Gets the action space of the attacker
44
-
45
- :return: the action space of the attacker
46
- """
47
- return np.array([0, 1])
48
-
49
- @staticmethod
50
- def observation_space(n):
51
- """
52
- Returns the observation space of size n
53
-
54
- :param n: the maximum observation
55
- :return: the observation space
56
- """
57
- return np.array(list(range(n + 1)))
58
-
59
- @staticmethod
60
- def reward_tensor(R_SLA: int, R_INT: int, R_COST: int, L: int, R_ST: int) -> npt.NDArray[Any]:
61
- """
62
- Gets the reward tensor
63
-
64
- :param R_SLA: the R_SLA constant
65
- :param R_INT: the R_INT constant
66
- :param R_COST: the R_COST constant
67
- :param R_ST: the R_ST constant
68
- :return: a |L|x|A1|x|A2|x|S| tensor
69
- """
70
-
71
- R_l = []
72
- for l in range(1, L + 1):
73
- R = [
74
- # Defender continues
75
- [
76
- # Attacker continues
77
- [R_SLA, R_SLA + R_INT, 0],
78
- # Attacker stops
79
- [R_SLA, R_SLA, 0]
80
- ],
81
- # Defender stops
82
- [
83
- # Attacker continues
84
- [R_COST / l, R_ST / l, 0],
85
- # Attacker stops
86
- [R_COST / l, R_SLA, 0]
87
- ]
88
- ]
89
- R_l.append(R)
90
- return np.array(R_l)
91
-
92
- @staticmethod
93
- def transition_tensor(L: int, p: float) -> npt.NDArray[Any]:
94
- """
95
- Gets the transition tensor
96
-
97
- :param L: the maximum number of stop actions
98
- :return: a |L|x|A1|x|A2||S|^2 tensor
99
- """
100
- T_l = []
101
- for l in range(1, L + 1):
102
- if l == 1:
103
- T = [
104
- # Defender continues
105
- [
106
- # Attacker continues
107
- [
108
- [1, 0, 0], # No intrusion
109
- [0, 1 - 1 / (2 * l), 1 / (2 * l)], # Intrusion
110
- [0, 0, 1] # Terminal
111
- ],
112
- # Attacker stops
113
- [
114
- [0, 1, 0], # No intrusion
115
- [0, 0, 1], # Intrusion
116
- [0, 0, 1] # Terminal
117
- ]
118
- ],
119
-
120
- # Defender stops
121
- [
122
- # Attacker continues
123
- [
124
- [0, 0, 1], # No intrusion
125
- [0, 0, 1], # Intrusion
126
- [0, 0, 1] # Terminal
127
- ],
128
- # Attacker stops
129
- [
130
- [0, 0, 1], # No Intrusion
131
- [0, 0, 1], # Intrusion
132
- [0, 0, 1] # Terminal
133
- ]
134
- ]
135
- ]
136
- else:
137
- T = [
138
- # Defender continues
139
- [
140
- # Attacker continues
141
- [
142
- [1, 0, 0], # No intrusion
143
- [0, 1 - 1 / (2 * l), 1 / (2 * l)], # Intrusion
144
- [0, 0, 1] # Terminal
145
- ],
146
- # Attacker stops
147
- [
148
- [0, 1, 0], # No intrusion
149
- [0, 0, 1], # Intrusion
150
- [0, 0, 1] # Terminal
151
- ]
152
- ],
153
-
154
- # Defender stops
155
- [
156
- # Attacker continues
157
- [
158
- [1, 0, 0], # No intrusion
159
- [0, 1 - 1 / (2 * l), 1 / (2 * l)], # Intrusion
160
- [0, 0, 1] # Terminal
161
- ],
162
- # Attacker stops
163
- [
164
- [0, 1, 0], # No Intrusion
165
- [0, 0, 1], # Intrusion
166
- [0, 0, 1] # Terminal
167
- ]
168
- ]
169
- ]
170
- T_l.append(T)
171
- return np.array(T_l)
172
-
173
- @staticmethod
174
- def observation_tensor(n):
175
- """
176
- :return: a |A1|x|A2|x|S|x|O| tensor
177
- """
178
- intrusion_dist = []
179
- no_intrusion_dist = []
180
- terminal_dist = np.zeros(n + 1)
181
- terminal_dist[-1] = 1
182
- intrusion_rv = betabinom(n=n, a=1, b=0.7)
183
- no_intrusion_rv = betabinom(n=n, a=0.7, b=3)
184
- for i in range(n + 1):
185
- intrusion_dist.append(intrusion_rv.pmf(i))
186
- no_intrusion_dist.append(no_intrusion_rv.pmf(i))
187
- Z = np.array(
188
- [
189
- [
190
- [
191
- no_intrusion_dist,
192
- intrusion_dist,
193
- terminal_dist
194
- ],
195
- [
196
- no_intrusion_dist,
197
- intrusion_dist,
198
- terminal_dist
199
- ],
200
- ],
201
- [
202
- [
203
- no_intrusion_dist,
204
- intrusion_dist,
205
- terminal_dist
206
- ],
207
- [
208
- no_intrusion_dist,
209
- intrusion_dist,
210
- terminal_dist
211
- ],
212
- ]
213
- ]
214
- )
215
- return Z
216
-
217
- @staticmethod
218
- def sample_next_state(T: npt.NDArray[Any], l: int, s: int, a1: int, a2: int, S: npt.NDArray[np.int_]) -> int:
219
- """
220
- Samples the next state
221
-
222
- :param T: the transition operator
223
- :param s: the currrent state
224
- :param a1: the defender action
225
- :param a2: the attacker action
226
- :param S: the state space
227
- :param l: the number of stops remaining
228
- :return: s'
229
- """
230
- state_probs = []
231
- for s_prime in S:
232
- state_probs.append(T[l - 1][a1][a2][s][s_prime])
233
- return int(np.random.choice(np.arange(0, len(S)), p=state_probs))
234
-
235
- @staticmethod
236
- def sample_initial_state(b1: npt.NDArray[np.float64]) -> int:
237
- """
238
- Samples the initial state
239
-
240
- :param b1: the initial belief
241
- :return: s1
242
- """
243
- return int(np.random.choice(np.arange(0, len(b1)), p=b1))
244
-
245
- @staticmethod
246
- def sample_next_observation(Z: npt.NDArray[Any], s_prime: int, O: npt.NDArray[np.int_]) -> int:
247
- """
248
- Samples the next observation
249
-
250
- :param Z: observation tensor which include the observation probables
251
- :param s_prime: the new state
252
- :param O: the observation space
253
- :return: o
254
- """
255
- observation_probs = []
256
- for o in O:
257
- if len(Z.shape) == 4:
258
- observation_probs.append(Z[0][0][s_prime][o])
259
- elif len(Z.shape) == 3:
260
- observation_probs.append(Z[0][s_prime][o])
261
- elif len(Z.shape) == 2:
262
- observation_probs.append(Z[s_prime][o])
263
- o = np.random.choice(np.arange(0, len(O)), p=observation_probs)
264
- return int(o)
265
-
266
- @staticmethod
267
- def bayes_filter(s_prime: int, o: int, a1: int, b: npt.NDArray[np.float64], pi2: npt.NDArray[Any], l: int,
268
- config: StoppingGameConfig) -> float:
269
- """
270
- A Bayesian filter to compute the belief of player 1
271
- of being in s_prime when observing o after taking action a in belief b given that the opponent follows
272
- strategy pi2
273
-
274
- :param s_prime: the state to compute the belief of
275
- :param o: the observation
276
- :param a1: the action of player 1
277
- :param b: the current belief point
278
- :param pi2: the policy of player 2
279
- :param l: stops remaining
280
- :return: b_prime(s_prime)
281
- """
282
- l = l - 1
283
- norm = 0
284
- for s in config.S:
285
- for a2 in config.A2:
286
- for s_prime_1 in config.S:
287
- prob_1 = config.Z[a1][a2][s_prime_1][o]
288
- norm += b[s] * prob_1 * config.T[l][a1][a2][s][s_prime_1] * pi2[s][a2]
289
- if norm == 0:
290
- return 0
291
- temp = 0
292
-
293
- for s in config.S:
294
- for a2 in config.A2:
295
- temp += config.Z[a1][a2][s_prime][o] * config.T[l][a1][a2][s][s_prime] * b[s] * pi2[s][a2]
296
- b_prime_s_prime = temp / norm
297
- if round(b_prime_s_prime, 2) > 1:
298
- print(f"b_prime_s_prime >= 1: {b_prime_s_prime}, a1:{a1}, s_prime:{s_prime}, l:{l}, o:{o}, pi2:{pi2}")
299
- assert round(b_prime_s_prime, 2) <= 1
300
- if s_prime == 2 and o != config.O[-1]:
301
- assert round(b_prime_s_prime, 2) <= 0.01
302
- return float(b_prime_s_prime)
303
-
304
- @staticmethod
305
- def next_belief(o: int, a1: int, b: npt.NDArray[np.float64], pi2: npt.NDArray[Any],
306
- config: StoppingGameConfig, l: int, a2: int = 0, s: int = 0) -> npt.NDArray[np.float64]:
307
- """
308
- Computes the next belief using a Bayesian filter
309
-
310
- :param o: the latest observation
311
- :param a1: the latest action of player 1
312
- :param b: the current belief
313
- :param pi2: the policy of player 2
314
- :param config: the game config
315
- :param l: stops remaining
316
- :param a2: the attacker action (for debugging, should be consistent with pi2)
317
- :param s: the true state (for debugging)
318
- :return: the new belief
319
- """
320
- b_prime = np.zeros(len(config.S))
321
- for s_prime in config.S:
322
- b_prime[s_prime] = StoppingGameUtil.bayes_filter(s_prime=s_prime, o=o, a1=a1, b=b,
323
- pi2=pi2, config=config, l=l)
324
- if round(sum(b_prime), 2) != 1:
325
- print(f"error, b_prime:{b_prime}, o:{o}, a1:{a1}, b:{b}, pi2:{pi2}, "
326
- f"a2: {a2}, s:{s}")
327
- assert round(sum(b_prime), 2) == 1
328
- return b_prime
329
-
330
- @staticmethod
331
- def sample_attacker_action(pi2: npt.NDArray[Any], s: int) -> int:
332
- """
333
- Samples the attacker action
334
-
335
- :param pi2: the attacker policy
336
- :param s: the game state
337
- :return: a2 is the attacker action
338
- """
339
- return int(np.random.choice(np.arange(0, len(pi2[s])), p=pi2[s]))
340
-
341
- @staticmethod
342
- def pomdp_solver_file(config: StoppingGameConfig, discount_factor: float, pi2: npt.NDArray[Any]) -> str:
343
- """
344
- Gets the POMDP environment specification based on the format at http://www.pomdp.org/code/index.html,
345
- for the defender's local problem against a static attacker
346
-
347
- :param config: the POMDP config
348
- :param discount_factor: the discount factor
349
- :param pi2: the attacker strategy
350
- :return: the file content as a string
351
- """
352
- file_str = ""
353
- file_str = file_str + f"discount: {discount_factor}\n\n"
354
- file_str = file_str + "values: reward\n\n"
355
- file_str = file_str + f"states: {len(config.S)}\n\n"
356
- file_str = file_str + f"actions: {len(config.A1)}\n\n"
357
- file_str = file_str + f"observations: {len(config.O)}\n\n"
358
- initial_belief_str = " ".join(list(map(lambda x: str(x), config.b1)))
359
- file_str = file_str + f"start: {initial_belief_str}\n\n\n"
360
- num_transitions = 0
361
- for s in config.S:
362
- for a1 in config.A1:
363
- probs = []
364
- for s_prime in range(len(config.S)):
365
- num_transitions += 1
366
- prob = 0
367
- for a2 in config.A2:
368
- prob += config.T[0][a1][a2][s][s_prime] * pi2[s][a2]
369
- file_str = file_str + f"T: {a1} : {s} : {s_prime} {prob:.80f}\n"
370
- probs.append(prob)
371
- assert round(sum(probs), 3) == 1
372
- file_str = file_str + "\n\n"
373
- for a1 in config.A1:
374
- for s_prime in config.S:
375
- probs = []
376
- for o in range(len(config.O)):
377
- prob = config.Z[0][0][s_prime][o]
378
- file_str = file_str + f"O : {a1} : {s_prime} : {o} {prob:.80f}\n"
379
- probs.append(prob)
380
- assert round(sum(probs), 3) == 1
381
- file_str = file_str + "\n\n"
382
- for s in config.S:
383
- for a1 in config.A1:
384
- for s_prime in config.S:
385
- for o in config.O:
386
- r = config.R[0][a1][0][s]
387
- file_str = file_str + f"R: {a1} : {s} : {s_prime} : {o} {r:.80f}\n"
388
- return file_str