gym-csle-stopping-game 0.7.2__py3-none-any.whl → 0.7.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gym-csle-stopping-game might be problematic. Click here for more details.
- gym_csle_stopping_game/__version__.py +1 -1
- gym_csle_stopping_game/constants/constants.py +1 -0
- gym_csle_stopping_game/util/stopping_game_util.py +336 -26
- {gym_csle_stopping_game-0.7.2.dist-info → gym_csle_stopping_game-0.7.4.dist-info}/METADATA +6 -6
- {gym_csle_stopping_game-0.7.2.dist-info → gym_csle_stopping_game-0.7.4.dist-info}/RECORD +7 -7
- {gym_csle_stopping_game-0.7.2.dist-info → gym_csle_stopping_game-0.7.4.dist-info}/WHEEL +0 -0
- {gym_csle_stopping_game-0.7.2.dist-info → gym_csle_stopping_game-0.7.4.dist-info}/top_level.txt +0 -0
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = '0.7.
|
|
1
|
+
__version__ = '0.7.4'
|
|
@@ -34,6 +34,7 @@ class ENV_METRICS:
|
|
|
34
34
|
DEFENDER_ACTION = "a1"
|
|
35
35
|
ATTACKER_ACTION = "a2"
|
|
36
36
|
OBSERVATION = "o"
|
|
37
|
+
BELIEF = "b"
|
|
37
38
|
TIME_STEP = "t"
|
|
38
39
|
AVERAGE_DEFENDER_BASELINE_STOP_ON_FIRST_ALERT_RETURN = "average_defender_baseline_stop_on_first_alert_return"
|
|
39
40
|
AVERAGE_UPPER_BOUND_RETURN = "average_upper_bound_return"
|
|
@@ -1,8 +1,10 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any, Tuple
|
|
2
|
+
import itertools
|
|
2
3
|
import numpy as np
|
|
3
4
|
import numpy.typing as npt
|
|
4
5
|
from scipy.stats import betabinom
|
|
5
6
|
from gym_csle_stopping_game.dao.stopping_game_config import StoppingGameConfig
|
|
7
|
+
from csle_common.dao.training.policy import Policy
|
|
6
8
|
|
|
7
9
|
|
|
8
10
|
class StoppingGameUtil:
|
|
@@ -90,7 +92,7 @@ class StoppingGameUtil:
|
|
|
90
92
|
return np.array(R_l)
|
|
91
93
|
|
|
92
94
|
@staticmethod
|
|
93
|
-
def transition_tensor(L: int
|
|
95
|
+
def transition_tensor(L: int) -> npt.NDArray[Any]:
|
|
94
96
|
"""
|
|
95
97
|
Gets the transition tensor
|
|
96
98
|
|
|
@@ -105,15 +107,15 @@ class StoppingGameUtil:
|
|
|
105
107
|
[
|
|
106
108
|
# Attacker continues
|
|
107
109
|
[
|
|
108
|
-
[1, 0, 0], # No intrusion
|
|
109
|
-
[0, 1
|
|
110
|
-
[0, 0, 1] # Terminal
|
|
110
|
+
[1.0, 0.0, 0.0], # No intrusion
|
|
111
|
+
[0.0, 1.0, 0.0], # Intrusion
|
|
112
|
+
[0.0, 0.0, 1.0] # Terminal
|
|
111
113
|
],
|
|
112
114
|
# Attacker stops
|
|
113
115
|
[
|
|
114
|
-
[0, 1, 0], # No intrusion
|
|
115
|
-
[0, 0, 1], # Intrusion
|
|
116
|
-
[0, 0, 1] # Terminal
|
|
116
|
+
[0.0, 1.0, 0.0], # No intrusion
|
|
117
|
+
[0.0, 0.0, 1.0], # Intrusion
|
|
118
|
+
[0.0, 0.0, 1.0] # Terminal
|
|
117
119
|
]
|
|
118
120
|
],
|
|
119
121
|
|
|
@@ -121,15 +123,15 @@ class StoppingGameUtil:
|
|
|
121
123
|
[
|
|
122
124
|
# Attacker continues
|
|
123
125
|
[
|
|
124
|
-
[0, 0, 1], # No intrusion
|
|
125
|
-
[0, 0, 1], # Intrusion
|
|
126
|
-
[0, 0, 1] # Terminal
|
|
126
|
+
[0.0, 0.0, 1.0], # No intrusion
|
|
127
|
+
[0.0, 0.0, 1.0], # Intrusion
|
|
128
|
+
[0.0, 0.0, 1.0] # Terminal
|
|
127
129
|
],
|
|
128
130
|
# Attacker stops
|
|
129
131
|
[
|
|
130
|
-
[0, 0, 1], # No Intrusion
|
|
131
|
-
[0, 0, 1], # Intrusion
|
|
132
|
-
[0, 0, 1] # Terminal
|
|
132
|
+
[0.0, 0.0, 1.0], # No Intrusion
|
|
133
|
+
[0.0, 0.0, 1.0], # Intrusion
|
|
134
|
+
[0.0, 0.0, 1.0] # Terminal
|
|
133
135
|
]
|
|
134
136
|
]
|
|
135
137
|
]
|
|
@@ -139,15 +141,15 @@ class StoppingGameUtil:
|
|
|
139
141
|
[
|
|
140
142
|
# Attacker continues
|
|
141
143
|
[
|
|
142
|
-
[1, 0, 0], # No intrusion
|
|
143
|
-
[0, 1 - 1 / (2 * l), 1 / (2 * l)], # Intrusion
|
|
144
|
-
[0, 0, 1] # Terminal
|
|
144
|
+
[1.0, 0.0, 0.0], # No intrusion
|
|
145
|
+
[0.0, 1.0 - 1.0 / (2.0 * l), 1.0 / (2.0 * l)], # Intrusion
|
|
146
|
+
[0.0, 0.0, 1.0] # Terminal
|
|
145
147
|
],
|
|
146
148
|
# Attacker stops
|
|
147
149
|
[
|
|
148
|
-
[0, 1, 0], # No intrusion
|
|
149
|
-
[0, 0, 1], # Intrusion
|
|
150
|
-
[0, 0, 1] # Terminal
|
|
150
|
+
[0.0, 1.0, 0.0], # No intrusion
|
|
151
|
+
[0.0, 0.0, 1.0], # Intrusion
|
|
152
|
+
[0.0, 0.0, 1.0] # Terminal
|
|
151
153
|
]
|
|
152
154
|
],
|
|
153
155
|
|
|
@@ -155,15 +157,15 @@ class StoppingGameUtil:
|
|
|
155
157
|
[
|
|
156
158
|
# Attacker continues
|
|
157
159
|
[
|
|
158
|
-
[1, 0, 0], # No intrusion
|
|
159
|
-
[0, 1 - 1 / (2 * l), 1 / (2 * l)], # Intrusion
|
|
160
|
-
[0, 0, 1] # Terminal
|
|
160
|
+
[1.0, 0.0, 0.0], # No intrusion
|
|
161
|
+
[0.0, 1.0 - 1.0 / (2.0 * l), 1.0 / (2.0 * l)], # Intrusion
|
|
162
|
+
[0.0, 0.0, 1.0] # Terminal
|
|
161
163
|
],
|
|
162
164
|
# Attacker stops
|
|
163
165
|
[
|
|
164
|
-
[0, 1, 0], # No Intrusion
|
|
165
|
-
[0, 0, 1], # Intrusion
|
|
166
|
-
[0, 0, 1] # Terminal
|
|
166
|
+
[0.0, 1.0, 0.0], # No Intrusion
|
|
167
|
+
[0.0, 0.0, 1.0], # Intrusion
|
|
168
|
+
[0.0, 0.0, 1.0] # Terminal
|
|
167
169
|
]
|
|
168
170
|
]
|
|
169
171
|
]
|
|
@@ -386,3 +388,311 @@ class StoppingGameUtil:
|
|
|
386
388
|
r = config.R[0][a1][0][s]
|
|
387
389
|
file_str = file_str + f"R: {a1} : {s} : {s_prime} : {o} {r:.80f}\n"
|
|
388
390
|
return file_str
|
|
391
|
+
|
|
392
|
+
@staticmethod
|
|
393
|
+
def reduce_T_attacker(T: npt.NDArray[np.float_], strategy: Policy) -> npt.NDArray[np.float_]:
|
|
394
|
+
"""
|
|
395
|
+
Reduces the transition tensor based on a given attacker strategy
|
|
396
|
+
|
|
397
|
+
:param T: the tensor to reduce
|
|
398
|
+
:param strategy: the strategy to use for the reduction
|
|
399
|
+
:return: the reduced tensor (|A1|x|S|x|S|)
|
|
400
|
+
"""
|
|
401
|
+
if len(T.shape) == 5:
|
|
402
|
+
T = T[0]
|
|
403
|
+
reduced_T = np.zeros((T.shape[0], T.shape[2], T.shape[3]))
|
|
404
|
+
for i in range(T.shape[0]):
|
|
405
|
+
for j in range(T.shape[2]):
|
|
406
|
+
for k in range(T.shape[3]):
|
|
407
|
+
reduced_T[i][j][k] = T[i][0][j][k] * strategy.probability(j, 0) + T[i][1][j][
|
|
408
|
+
k] * strategy.probability(j, 1)
|
|
409
|
+
# if j == 0:
|
|
410
|
+
# reduced_T[i][j][k] = T[i][0][j][k] * strategy.probability(j, 0) + T[i][1][j][
|
|
411
|
+
# k] * strategy.probability(j, 1)
|
|
412
|
+
# else:
|
|
413
|
+
# reduced_T[i][j][k] = (T[i][0][j][k] * (1 - strategy.probability(j, 0)) + T[i][1][j][k] *
|
|
414
|
+
# strategy.probability(j, 1))
|
|
415
|
+
return reduced_T
|
|
416
|
+
|
|
417
|
+
@staticmethod
|
|
418
|
+
def reduce_R_attacker(R: npt.NDArray[np.float_], strategy: Policy) -> npt.NDArray[np.float_]:
|
|
419
|
+
"""
|
|
420
|
+
Reduces the reward tensor based on a given attacker strategy
|
|
421
|
+
|
|
422
|
+
:param R: the reward tensor to reduce
|
|
423
|
+
:param strategy: the strategy to use for the reduction
|
|
424
|
+
:return: the reduced reward tensor (|A1|x|S|)
|
|
425
|
+
"""
|
|
426
|
+
if len(R.shape) == 4:
|
|
427
|
+
R = R[0]
|
|
428
|
+
reduced_R = np.zeros((R.shape[0], R.shape[2]))
|
|
429
|
+
for i in range(R.shape[0]):
|
|
430
|
+
for j in range(R.shape[2]):
|
|
431
|
+
reduced_R[i][j] = (R[i][0][j] * strategy.probability(j, 0) + R[i][1][j] *
|
|
432
|
+
strategy.probability(j, 1))
|
|
433
|
+
return reduced_R
|
|
434
|
+
|
|
435
|
+
@staticmethod
|
|
436
|
+
def reduce_Z_attacker(Z: npt.NDArray[np.float_], strategy: Policy) -> npt.NDArray[np.float_]:
|
|
437
|
+
"""
|
|
438
|
+
Reduces the observation tensor based on a given attacker strategy
|
|
439
|
+
|
|
440
|
+
:param Z: the observation tensor to reduce
|
|
441
|
+
:param strategy: the strategy to use for the reduction
|
|
442
|
+
:return: the reduced observation tensor (|A1|x|S|x|O|)
|
|
443
|
+
"""
|
|
444
|
+
reduced_Z = np.zeros((Z.shape[0], Z.shape[2], Z.shape[3]))
|
|
445
|
+
for i in range(Z.shape[0]):
|
|
446
|
+
for j in range(Z.shape[2]):
|
|
447
|
+
for k in range(Z.shape[3]):
|
|
448
|
+
reduced_Z[i][j][k] = Z[i][0][j][k] * strategy.probability(j, 0) + Z[i][1][j][
|
|
449
|
+
k] * strategy.probability(j, 1)
|
|
450
|
+
return reduced_Z
|
|
451
|
+
|
|
452
|
+
@staticmethod
|
|
453
|
+
def reduce_T_defender(T: npt.NDArray[np.float_], strategy: Policy) -> npt.NDArray[np.float_]:
|
|
454
|
+
"""
|
|
455
|
+
Reduces the transition tensor based on a given defender strategy
|
|
456
|
+
|
|
457
|
+
:param T: the tensor to reduce
|
|
458
|
+
:param strategy: the strategy to use for the reduction
|
|
459
|
+
:return: the reduced tensor (|A2|x|S|x|S|)
|
|
460
|
+
"""
|
|
461
|
+
if len(T.shape) == 5:
|
|
462
|
+
T = T[0]
|
|
463
|
+
reduced_T = np.zeros((T.shape[1], T.shape[2], T.shape[3]))
|
|
464
|
+
for i in range(T.shape[1]):
|
|
465
|
+
for j in range(T.shape[2]):
|
|
466
|
+
for k in range(T.shape[3]):
|
|
467
|
+
reduced_T[i][j][k] = (T[0][i][j][k] * strategy.probability(j, 0) + T[1][i][j][k]
|
|
468
|
+
* strategy.probability(j, 1))
|
|
469
|
+
return reduced_T
|
|
470
|
+
|
|
471
|
+
@staticmethod
|
|
472
|
+
def reduce_R_defender(R: npt.NDArray[np.float_], strategy: Policy) -> npt.NDArray[np.float_]:
|
|
473
|
+
"""
|
|
474
|
+
Reduces the reward tensor based on a given defender strategy
|
|
475
|
+
|
|
476
|
+
:param R: the reward tensor to reduce
|
|
477
|
+
:param strategy: the strategy to use for the reduction
|
|
478
|
+
:return: the reduced reward tensor (|A2|x|S|)
|
|
479
|
+
"""
|
|
480
|
+
if len(R.shape) == 4:
|
|
481
|
+
R = R[0]
|
|
482
|
+
reduced_R = np.zeros((R.shape[1], R.shape[2]))
|
|
483
|
+
for i in range(R.shape[1]):
|
|
484
|
+
for j in range(R.shape[2]):
|
|
485
|
+
reduced_R[i][j] = (R[0][i][j] * strategy.probability(j, 0) + R[1][i][j] *
|
|
486
|
+
strategy.probability(j, 1))
|
|
487
|
+
return reduced_R
|
|
488
|
+
|
|
489
|
+
@staticmethod
|
|
490
|
+
def aggregate_belief_mdp_defender(aggregation_resolution: int, T: npt.NDArray[np.float_],
|
|
491
|
+
R: npt.NDArray[np.float_], Z: npt.NDArray[np.float_],
|
|
492
|
+
S: npt.NDArray[np.int_], A: npt.NDArray[np.int_], O: npt.NDArray[np.int_]) \
|
|
493
|
+
-> Tuple[npt.NDArray[np.float_], npt.NDArray[np.int_], npt.NDArray[np.float_], npt.NDArray[np.float_]]:
|
|
494
|
+
"""
|
|
495
|
+
Generates an aggregate belief MDP from a given POMDP specification and aggregation resolution
|
|
496
|
+
|
|
497
|
+
:param aggregation_resolution: the belief aggregation resolution
|
|
498
|
+
:param T: the transition tensor of the POMDP
|
|
499
|
+
:param R: the reward tensor of the POMDP
|
|
500
|
+
:param Z: the observation tensor of the POMDP
|
|
501
|
+
:param S: the state space of the POMDP
|
|
502
|
+
:param A: the action space of the POMDP
|
|
503
|
+
:param O: the observation space of the POMDP
|
|
504
|
+
:return: the state space, action space, transition operator, and belief operator of the belief MDP
|
|
505
|
+
"""
|
|
506
|
+
aggregate_belief_space = StoppingGameUtil.generate_aggregate_belief_space(
|
|
507
|
+
n=aggregation_resolution, belief_space_dimension=len(S))
|
|
508
|
+
belief_T = StoppingGameUtil.generate_aggregate_belief_transition_operator(
|
|
509
|
+
aggregate_belief_space=aggregate_belief_space, S=S, A=A, O=O, T=T, Z=Z)
|
|
510
|
+
belief_R = StoppingGameUtil.generate_aggregate_belief_reward_tensor(
|
|
511
|
+
aggregate_belief_space=aggregate_belief_space, S=S, A=A, R=R)
|
|
512
|
+
return aggregate_belief_space, A, belief_T, belief_R
|
|
513
|
+
|
|
514
|
+
@staticmethod
|
|
515
|
+
def generate_aggregate_belief_space(n: int, belief_space_dimension: int) -> npt.NDArray[np.float_]:
|
|
516
|
+
"""
|
|
517
|
+
Generate an aggregate belief space B_n.
|
|
518
|
+
|
|
519
|
+
:param n: the aggregation resolution
|
|
520
|
+
:param belief_space_dimension: the belief space dimension
|
|
521
|
+
:return: the aggregate belief space
|
|
522
|
+
"""
|
|
523
|
+
|
|
524
|
+
# Generate all combinations of integer allocations k_i such that sum(k_i) = n
|
|
525
|
+
combinations = [k for k in itertools.product(range(n + 1), repeat=belief_space_dimension) if sum(k) == n]
|
|
526
|
+
|
|
527
|
+
# Convert integer allocations to belief points by dividing each k_i by n
|
|
528
|
+
belief_points = [list(k_i / n for k_i in k) for k in combinations]
|
|
529
|
+
|
|
530
|
+
# Remove all beliefs that violate the stopping dynamics
|
|
531
|
+
belief_points = list(filter(lambda x: x[-1] == 1.0 or x[-1] == 0.0, belief_points))
|
|
532
|
+
|
|
533
|
+
return np.array(belief_points)
|
|
534
|
+
|
|
535
|
+
@staticmethod
|
|
536
|
+
def generate_aggregate_belief_reward_tensor(
|
|
537
|
+
aggregate_belief_space: npt.NDArray[np.float_], S: npt.NDArray[np.int_], A: npt.NDArray[np.int_],
|
|
538
|
+
R: npt.NDArray[np.float_]) -> npt.NDArray[np.float_]:
|
|
539
|
+
"""
|
|
540
|
+
Generates an aggregate reward tensor for the aggregate belief MDP
|
|
541
|
+
|
|
542
|
+
:param aggregate_belief_space: the aggregate belief space
|
|
543
|
+
:param S: the state space of the POMDP
|
|
544
|
+
:param A: the action space of the POMDP
|
|
545
|
+
:param R: the reward tensor of the POMDP
|
|
546
|
+
:return: the reward tensor of the aggregate belief MDP
|
|
547
|
+
"""
|
|
548
|
+
belief_R = np.zeros((len(A), len(aggregate_belief_space)))
|
|
549
|
+
belief_space_list = aggregate_belief_space.tolist()
|
|
550
|
+
for a in A:
|
|
551
|
+
for b in aggregate_belief_space:
|
|
552
|
+
expected_reward = 0
|
|
553
|
+
for s in S:
|
|
554
|
+
expected_reward += R[a][s] * b[s]
|
|
555
|
+
belief_R[a][belief_space_list.index(b.tolist())] = expected_reward
|
|
556
|
+
return belief_R
|
|
557
|
+
|
|
558
|
+
@staticmethod
|
|
559
|
+
def generate_aggregate_belief_transition_operator(
|
|
560
|
+
aggregate_belief_space: npt.NDArray[np.float_], S: npt.NDArray[np.int_], A: npt.NDArray[np.int_],
|
|
561
|
+
O: npt.NDArray[np.int_], T: npt.NDArray[np.float_], Z: npt.NDArray[np.float_]) -> npt.NDArray[np.float_]:
|
|
562
|
+
"""
|
|
563
|
+
Generates an aggregate belief space transition operator
|
|
564
|
+
|
|
565
|
+
:param aggregate_belief_space: the aggregate belief space
|
|
566
|
+
:param O: the observation space of the POMDP
|
|
567
|
+
:param S: the state space of the POMDP
|
|
568
|
+
:param A: the action space of the POMDP
|
|
569
|
+
:param T: the transition operator of the POMDP
|
|
570
|
+
:param Z: the observation tensor of the POMDP
|
|
571
|
+
:return: the aggregate belief space operator
|
|
572
|
+
"""
|
|
573
|
+
belief_space_list = aggregate_belief_space.tolist()
|
|
574
|
+
belief_T = np.zeros((len(A), len(aggregate_belief_space), len(aggregate_belief_space)))
|
|
575
|
+
for a in A:
|
|
576
|
+
for b1 in aggregate_belief_space:
|
|
577
|
+
for b2 in aggregate_belief_space:
|
|
578
|
+
belief_T[a][belief_space_list.index(b1.tolist())][belief_space_list.index(b2.tolist())] \
|
|
579
|
+
= StoppingGameUtil.aggregate_belief_transition_probability(
|
|
580
|
+
b1=b1, b2=b2, a=a, S=S, O=O, T=T, Z=Z, aggregate_belief_space=aggregate_belief_space, A=A)
|
|
581
|
+
return belief_T
|
|
582
|
+
|
|
583
|
+
@staticmethod
|
|
584
|
+
def aggregate_belief_transition_probability(b1: npt.NDArray[np.float_], b2: npt.NDArray[np.float_], a: int,
|
|
585
|
+
S: npt.NDArray[np.int_], O: npt.NDArray[np.int_],
|
|
586
|
+
A: npt.NDArray[np.int_],
|
|
587
|
+
T: npt.NDArray[np.float_], Z: npt.NDArray[np.float_],
|
|
588
|
+
aggregate_belief_space: npt.NDArray[np.float_]) -> float:
|
|
589
|
+
"""
|
|
590
|
+
Calculates the probability of transitioning from belief b1 to belief b2 when taking action a
|
|
591
|
+
|
|
592
|
+
:param b1: the source belief
|
|
593
|
+
:param b2: the target belief
|
|
594
|
+
:param a: the action
|
|
595
|
+
:param S: the state space of the POMDP
|
|
596
|
+
:param O: the observation space of the POMDP
|
|
597
|
+
:param A: the action space of the POMDP
|
|
598
|
+
:param T: the transition operator
|
|
599
|
+
:param Z: the observation tensor
|
|
600
|
+
:param aggregate_belief_space: the aggregate belief space
|
|
601
|
+
:return: the probability P(b2 | b1, a)
|
|
602
|
+
"""
|
|
603
|
+
prob = 0
|
|
604
|
+
for o in O:
|
|
605
|
+
if sum([Z[a][s_prime][o] * b1[s] * T[a][s][s_prime] for s in S for s_prime in S]) == 0:
|
|
606
|
+
continue
|
|
607
|
+
b_prime = StoppingGameUtil.pomdp_next_belief(
|
|
608
|
+
o=o, a=a, b=b1, states=S, observations=O, observation_tensor=Z, transition_tensor=T)
|
|
609
|
+
nearest_neighbor = StoppingGameUtil.find_nearest_neighbor_belief(belief_space=aggregate_belief_space,
|
|
610
|
+
target_belief=b_prime)
|
|
611
|
+
if np.array_equal(nearest_neighbor, b2):
|
|
612
|
+
for s in S:
|
|
613
|
+
for s_prime in S:
|
|
614
|
+
prob += Z[a][s_prime][o] * b1[s] * T[a][s][s_prime]
|
|
615
|
+
return prob
|
|
616
|
+
|
|
617
|
+
@staticmethod
|
|
618
|
+
def pomdp_next_belief(o: int, a: int, b: npt.NDArray[np.float64], states: npt.NDArray[np.int_],
|
|
619
|
+
observations: npt.NDArray[np.int_], observation_tensor: npt.NDArray[np.float_],
|
|
620
|
+
transition_tensor: npt.NDArray[np.float_]) \
|
|
621
|
+
-> npt.NDArray[np.float64]:
|
|
622
|
+
"""
|
|
623
|
+
Computes the next belief of the POMDP using a Bayesian filter
|
|
624
|
+
|
|
625
|
+
:param o: the latest observation
|
|
626
|
+
:param a: the latest action of player 1
|
|
627
|
+
:param b: the current belief
|
|
628
|
+
:param states: the list of states
|
|
629
|
+
:param observations: the list of observations
|
|
630
|
+
:param observation_tensor: the observation tensor
|
|
631
|
+
:param transition_tensor: the transition tensor
|
|
632
|
+
:return: the new belief
|
|
633
|
+
"""
|
|
634
|
+
b_prime = [0.0] * len(states)
|
|
635
|
+
for s_prime in states:
|
|
636
|
+
b_prime[s_prime] = StoppingGameUtil.pomdp_bayes_filter(
|
|
637
|
+
s_prime=s_prime, o=o, a=a, b=b, states=states, observations=observations,
|
|
638
|
+
transition_tensor=transition_tensor, observation_tensor=observation_tensor)
|
|
639
|
+
if round(sum(b_prime), 2) != 1:
|
|
640
|
+
print(f"error, b_prime:{b_prime}, o:{o}, a:{a}, b:{b}")
|
|
641
|
+
assert round(sum(b_prime), 2) == 1
|
|
642
|
+
return np.array(b_prime)
|
|
643
|
+
|
|
644
|
+
@staticmethod
|
|
645
|
+
def pomdp_bayes_filter(s_prime: int, o: int, a: int, b: npt.NDArray[np.float64], states: npt.NDArray[np.int_],
|
|
646
|
+
observations: npt.NDArray[np.int_], observation_tensor: npt.NDArray[np.float_],
|
|
647
|
+
transition_tensor: npt.NDArray[np.float_]) -> float:
|
|
648
|
+
"""
|
|
649
|
+
A Bayesian filter to compute b[s_prime] of the POMDP
|
|
650
|
+
|
|
651
|
+
:param s_prime: the state to compute the belief for
|
|
652
|
+
:param o: the latest observation
|
|
653
|
+
:param a: the latest action
|
|
654
|
+
:param b: the current belief
|
|
655
|
+
:param states: the list of states
|
|
656
|
+
:param observations: the list of observations
|
|
657
|
+
:param observation_tensor: the observation tensor
|
|
658
|
+
:param transition_tensor: the transition tensor of the POMDP
|
|
659
|
+
:return: b[s_prime]
|
|
660
|
+
"""
|
|
661
|
+
norm = 0.0
|
|
662
|
+
for s in states:
|
|
663
|
+
for s_prime_1 in states:
|
|
664
|
+
prob_1 = observation_tensor[a][s_prime_1][o]
|
|
665
|
+
norm += b[s] * prob_1 * transition_tensor[a][s][s_prime_1]
|
|
666
|
+
if norm == 0.0:
|
|
667
|
+
print(f"zero norm, a: {a}, b: {b}, o: {o}")
|
|
668
|
+
return 0.0
|
|
669
|
+
temp = 0.0
|
|
670
|
+
|
|
671
|
+
for s in states:
|
|
672
|
+
temp += observation_tensor[a][s_prime][o] * transition_tensor[a][s][s_prime] * b[s]
|
|
673
|
+
b_prime_s_prime = temp / norm
|
|
674
|
+
if round(b_prime_s_prime, 2) > 1:
|
|
675
|
+
print(f"b_prime_s_prime >= 1: {b_prime_s_prime}, a1:{a}, s_prime:{s_prime}")
|
|
676
|
+
assert round(b_prime_s_prime, 2) <= 1
|
|
677
|
+
if s_prime == 2 and o != observations[-1]:
|
|
678
|
+
assert round(b_prime_s_prime, 2) <= 0.01
|
|
679
|
+
return b_prime_s_prime
|
|
680
|
+
|
|
681
|
+
@staticmethod
|
|
682
|
+
def find_nearest_neighbor_belief(belief_space: npt.NDArray[np.float_], target_belief: npt.NDArray[np.float_]) \
|
|
683
|
+
-> npt.NDArray[np.float_]:
|
|
684
|
+
"""
|
|
685
|
+
Finds the nearest neighbor (in the Euclidean sense) of a given belief in a certain belief space
|
|
686
|
+
|
|
687
|
+
:param belief_space: the belief to search from
|
|
688
|
+
:param target_belief: the belief to find the nearest neighbor of
|
|
689
|
+
:return: the nearest neighbor belief from the belief space
|
|
690
|
+
"""
|
|
691
|
+
|
|
692
|
+
# Compute Euclidean distances between the target belief and all points in the belief space
|
|
693
|
+
distances = np.linalg.norm(belief_space - target_belief, axis=1)
|
|
694
|
+
|
|
695
|
+
# Find the index of the minimum distance (break ties consistently by choosing the smallest index)
|
|
696
|
+
nearest_index = int(np.argmin(distances))
|
|
697
|
+
|
|
698
|
+
return np.array(belief_space[nearest_index])
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: gym-csle-stopping-game
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.4
|
|
4
4
|
Summary: OpenAI gym reinforcement learning environment of a Dynkin (Optimal stopping) game in CSLE
|
|
5
5
|
Author: Kim Hammar
|
|
6
6
|
Author-email: hammar.kim@gmail.com
|
|
@@ -204,11 +204,11 @@ Classifier: Intended Audience :: Science/Research
|
|
|
204
204
|
Requires-Python: >=3.8
|
|
205
205
|
Description-Content-Type: text/markdown
|
|
206
206
|
Requires-Dist: gymnasium>=0.27.1
|
|
207
|
-
Requires-Dist: csle-base>=0.7.
|
|
208
|
-
Requires-Dist: csle-common>=0.7.
|
|
209
|
-
Requires-Dist: csle-attacker>=0.7.
|
|
210
|
-
Requires-Dist: csle-defender>=0.7.
|
|
211
|
-
Requires-Dist: csle-collector>=0.7.
|
|
207
|
+
Requires-Dist: csle-base>=0.7.4
|
|
208
|
+
Requires-Dist: csle-common>=0.7.4
|
|
209
|
+
Requires-Dist: csle-attacker>=0.7.4
|
|
210
|
+
Requires-Dist: csle-defender>=0.7.4
|
|
211
|
+
Requires-Dist: csle-collector>=0.7.4
|
|
212
212
|
Provides-Extra: test
|
|
213
213
|
Requires-Dist: pytest>=6.0; extra == "test"
|
|
214
214
|
Requires-Dist: pytest-cov>=2.0; extra == "test"
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
gym_csle_stopping_game/__init__.py,sha256=ooy6TjxvBi1sZMEX3_mVlvfskqI5GqwITWzI882tfk0,657
|
|
2
|
-
gym_csle_stopping_game/__version__.py,sha256=
|
|
2
|
+
gym_csle_stopping_game/__version__.py,sha256=lBlBxbnftiADKPpC3XRA3jUPjRsVclCd3P95aQ33p_g,22
|
|
3
3
|
gym_csle_stopping_game/constants/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
gym_csle_stopping_game/constants/constants.py,sha256=
|
|
4
|
+
gym_csle_stopping_game/constants/constants.py,sha256=u8QPVgn51LHpV1BY6jqLN0N1eYPzA8bb87AsorPLApU,1047
|
|
5
5
|
gym_csle_stopping_game/dao/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
gym_csle_stopping_game/dao/stopping_game_attacker_mdp_config.py,sha256=kmtrVa2CCVbi5xfd6uPWqMvhGmP8ccrtn1e_VmVvH7k,3494
|
|
7
7
|
gym_csle_stopping_game/dao/stopping_game_config.py,sha256=r77noSBWbP1AeBFiBVmiDeEn58SkBcHGklJStzAEEkE,6641
|
|
@@ -12,8 +12,8 @@ gym_csle_stopping_game/envs/stopping_game_env.py,sha256=J9h73alytskNgHxa7LHM83R1
|
|
|
12
12
|
gym_csle_stopping_game/envs/stopping_game_mdp_attacker_env.py,sha256=UoSDY2U8x3AmOqiquwnAzolsya6bM2vVsAkW7i8ljWk,10878
|
|
13
13
|
gym_csle_stopping_game/envs/stopping_game_pomdp_defender_env.py,sha256=cyC2OuJA41aqE84KA9-oJRWsKLIqzekh8A8zyf6Qo5I,8766
|
|
14
14
|
gym_csle_stopping_game/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
|
-
gym_csle_stopping_game/util/stopping_game_util.py,sha256=
|
|
16
|
-
gym_csle_stopping_game-0.7.
|
|
17
|
-
gym_csle_stopping_game-0.7.
|
|
18
|
-
gym_csle_stopping_game-0.7.
|
|
19
|
-
gym_csle_stopping_game-0.7.
|
|
15
|
+
gym_csle_stopping_game/util/stopping_game_util.py,sha256=sLmXNHrv_HG1kebLHvY8iLRjuJp-2oks9Z6jsdE4gnU,28609
|
|
16
|
+
gym_csle_stopping_game-0.7.4.dist-info/METADATA,sha256=cL-96STBmmQUzEWGoUW-zqXg05-Uuf198DPnEL6y68Q,27252
|
|
17
|
+
gym_csle_stopping_game-0.7.4.dist-info/WHEEL,sha256=A3WOREP4zgxI0fKrHUG8DC8013e3dK3n7a6HDbcEIwE,91
|
|
18
|
+
gym_csle_stopping_game-0.7.4.dist-info/top_level.txt,sha256=3DBHkAEI00nq0aXZlJUkXJrLiwkcJCfaFoYcaOzEZUU,23
|
|
19
|
+
gym_csle_stopping_game-0.7.4.dist-info/RECORD,,
|
|
File without changes
|
{gym_csle_stopping_game-0.7.2.dist-info → gym_csle_stopping_game-0.7.4.dist-info}/top_level.txt
RENAMED
|
File without changes
|