gr-libs 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evaluation/analyze_results_cross_alg_cross_domain.py +277 -0
- evaluation/create_minigrid_map_image.py +34 -0
- evaluation/file_system.py +42 -0
- evaluation/generate_experiments_results.py +92 -0
- evaluation/generate_experiments_results_new_ver1.py +254 -0
- evaluation/generate_experiments_results_new_ver2.py +331 -0
- evaluation/generate_task_specific_statistics_plots.py +272 -0
- evaluation/get_plans_images.py +47 -0
- evaluation/increasing_and_decreasing_.py +63 -0
- gr_libs/__init__.py +2 -0
- gr_libs/environment/__init__.py +0 -0
- gr_libs/environment/environment.py +227 -0
- gr_libs/environment/utils/__init__.py +0 -0
- gr_libs/environment/utils/utils.py +17 -0
- gr_libs/metrics/__init__.py +0 -0
- gr_libs/metrics/metrics.py +224 -0
- gr_libs/ml/__init__.py +6 -0
- gr_libs/ml/agent.py +56 -0
- gr_libs/ml/base/__init__.py +1 -0
- gr_libs/ml/base/rl_agent.py +54 -0
- gr_libs/ml/consts.py +22 -0
- gr_libs/ml/neural/__init__.py +3 -0
- gr_libs/ml/neural/deep_rl_learner.py +395 -0
- gr_libs/ml/neural/utils/__init__.py +2 -0
- gr_libs/ml/neural/utils/dictlist.py +33 -0
- gr_libs/ml/neural/utils/penv.py +57 -0
- gr_libs/ml/planner/__init__.py +0 -0
- gr_libs/ml/planner/mcts/__init__.py +0 -0
- gr_libs/ml/planner/mcts/mcts_model.py +330 -0
- gr_libs/ml/planner/mcts/utils/__init__.py +2 -0
- gr_libs/ml/planner/mcts/utils/node.py +33 -0
- gr_libs/ml/planner/mcts/utils/tree.py +102 -0
- gr_libs/ml/sequential/__init__.py +1 -0
- gr_libs/ml/sequential/lstm_model.py +192 -0
- gr_libs/ml/tabular/__init__.py +3 -0
- gr_libs/ml/tabular/state.py +21 -0
- gr_libs/ml/tabular/tabular_q_learner.py +453 -0
- gr_libs/ml/tabular/tabular_rl_agent.py +126 -0
- gr_libs/ml/utils/__init__.py +6 -0
- gr_libs/ml/utils/env.py +7 -0
- gr_libs/ml/utils/format.py +100 -0
- gr_libs/ml/utils/math.py +13 -0
- gr_libs/ml/utils/other.py +24 -0
- gr_libs/ml/utils/storage.py +127 -0
- gr_libs/recognizer/__init__.py +0 -0
- gr_libs/recognizer/gr_as_rl/__init__.py +0 -0
- gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +102 -0
- gr_libs/recognizer/graml/__init__.py +0 -0
- gr_libs/recognizer/graml/gr_dataset.py +134 -0
- gr_libs/recognizer/graml/graml_recognizer.py +266 -0
- gr_libs/recognizer/recognizer.py +46 -0
- gr_libs/recognizer/utils/__init__.py +1 -0
- gr_libs/recognizer/utils/format.py +13 -0
- gr_libs-0.1.3.dist-info/METADATA +197 -0
- gr_libs-0.1.3.dist-info/RECORD +62 -0
- gr_libs-0.1.3.dist-info/WHEEL +5 -0
- gr_libs-0.1.3.dist-info/top_level.txt +3 -0
- tutorials/graml_minigrid_tutorial.py +30 -0
- tutorials/graml_panda_tutorial.py +32 -0
- tutorials/graml_parking_tutorial.py +38 -0
- tutorials/graml_point_maze_tutorial.py +43 -0
- tutorials/graql_minigrid_tutorial.py +29 -0
@@ -0,0 +1,224 @@
|
|
1
|
+
import math
|
2
|
+
import dill
|
3
|
+
import gymnasium
|
4
|
+
import numpy as np
|
5
|
+
|
6
|
+
from typing import Callable, Generator, List, Dict, Tuple, Any
|
7
|
+
from math import log2
|
8
|
+
from numpy.core.fromnumeric import mean
|
9
|
+
from scipy.stats import wasserstein_distance
|
10
|
+
from gymnasium.spaces.discrete import Discrete
|
11
|
+
# import torch
|
12
|
+
# from torch.distributions.categorical import Categorical
|
13
|
+
|
14
|
+
from ..ml.base import State
|
15
|
+
from ..ml.base.rl_agent import RLAgent
|
16
|
+
from ..ml.neural.deep_rl_learner import DeepRLAgent
|
17
|
+
|
18
|
+
|
19
|
+
def kl_divergence(p1: List[float], p2: List[float]) -> float:
|
20
|
+
"""Computes Kullback–Leibler divergence from two probabilities distributions p1 and p2.
|
21
|
+
We follow the formula in Wikipedia https://en.wikipedia.org/wiki/Kullback–Leibler_divergence
|
22
|
+
|
23
|
+
Args:
|
24
|
+
p1 (List[float]): A probability distribution
|
25
|
+
p2 (List[float]): Another probability distribution
|
26
|
+
|
27
|
+
Returns:
|
28
|
+
float: The KL-divergence between p1 and p2
|
29
|
+
"""
|
30
|
+
assert (len(p1) == len(p2))
|
31
|
+
return sum(p1[i] * log2(p1[i] / p2[i]) for i in range(len(p1)))
|
32
|
+
|
33
|
+
|
34
|
+
def kl_divergence_norm_softmax(observations: List[Tuple[State, Any]], agent, actions: Discrete):
|
35
|
+
distances = []
|
36
|
+
p_traj = traj_to_policy(observations=observations, actions=actions)
|
37
|
+
|
38
|
+
for (observation, agent_pos), action in observations:
|
39
|
+
state = observation['image']
|
40
|
+
state_pickled = dill.dumps(state)
|
41
|
+
|
42
|
+
qp1 = p_traj[state_pickled]
|
43
|
+
qp2_flatten_distribution_list: List[float] = agent.get_actions_probabilities(
|
44
|
+
observation=(observation, agent_pos))
|
45
|
+
distances.append(kl_divergence(qp1, qp2_flatten_distribution_list))
|
46
|
+
return mean(distances)
|
47
|
+
|
48
|
+
|
49
|
+
def amplify(values, alpha=1.0):
|
50
|
+
"""Computes amplified softmax probabilities for an array of values
|
51
|
+
Args:
|
52
|
+
values (list): Input values for which to compute softmax
|
53
|
+
alpha (float): Amplification factor, where alpha > 1 increases differences between probabilities
|
54
|
+
Returns:
|
55
|
+
np.array: amplified softmax probabilities
|
56
|
+
"""
|
57
|
+
values = values[:3]**alpha # currently only choose to turn or move forward
|
58
|
+
return values / np.sum(values)
|
59
|
+
|
60
|
+
def stochastic_amplified_selection(actions_probs, alpha=8.0):
|
61
|
+
action_probs_amplified = amplify(actions_probs, alpha)
|
62
|
+
choice = np.random.choice(len(action_probs_amplified), p=action_probs_amplified)
|
63
|
+
if choice == 3:
|
64
|
+
choice = 6
|
65
|
+
return choice
|
66
|
+
|
67
|
+
def stochastic_selection(actions_probs):
|
68
|
+
return np.random.choice(len(actions_probs), p=actions_probs)
|
69
|
+
|
70
|
+
def greedy_selection(actions_probs):
|
71
|
+
return np.argmax(actions_probs)
|
72
|
+
|
73
|
+
def measure_average_sequence_distance(seq1, seq2):
|
74
|
+
"""Measures the sequence similarity between two sequences of observations and actions.
|
75
|
+
|
76
|
+
Args:
|
77
|
+
seq1: A tensor of tensors representing the first sequence.
|
78
|
+
seq2: A tensor of tensors representing the second sequence.
|
79
|
+
|
80
|
+
Returns:
|
81
|
+
A float representing the sequence similarity.
|
82
|
+
"""
|
83
|
+
|
84
|
+
# Ensure both sequences have the same length
|
85
|
+
min_seq_len = np.min([len(seq1), len(seq2)])
|
86
|
+
assert np.max([len(seq1), len(seq2)]) <= 30*min_seq_len, "We can't really measure similarity in case the sequences are really not the same... maybe just return a default NOT_SIMILAR here."
|
87
|
+
|
88
|
+
# Calculate the Euclidean distance between corresponding elements in the sequences
|
89
|
+
distances = []
|
90
|
+
for i in range(0, min_seq_len):
|
91
|
+
distances.append(np.sum(np.abs(np.array(seq1[i])-np.array(seq2[i]))))
|
92
|
+
|
93
|
+
# Calculate the average distance over all elements
|
94
|
+
return np.mean(np.array(distances))
|
95
|
+
|
96
|
+
|
97
|
+
def traj_to_policy(observations: List[Tuple[State, Any]], actions: Discrete, epsilon: float = 0.) -> Dict[
|
98
|
+
str, List[float]]:
|
99
|
+
# converts a trajectory from a planner to a policy
|
100
|
+
# where the taken action has 99.99999% probability
|
101
|
+
trajectory_as_policy = {}
|
102
|
+
for (observation, agent_pos), action in observations:
|
103
|
+
# in the discrete world the action is the index
|
104
|
+
action_index = action
|
105
|
+
|
106
|
+
actions_len = actions.n
|
107
|
+
qs = [1e-6 + epsilon / actions_len for _ in range(actions_len)]
|
108
|
+
qs[action_index] = 1. - 1e-6 * (actions_len - 1) - epsilon
|
109
|
+
|
110
|
+
state = observation['image']
|
111
|
+
state_pickled = dill.dumps(state)
|
112
|
+
trajectory_as_policy[state_pickled] = qs
|
113
|
+
return trajectory_as_policy
|
114
|
+
|
115
|
+
def pass_observation_patcher(observations: List[Any], agent: RLAgent) -> Generator[None, None, None]:
|
116
|
+
for observation in observations:
|
117
|
+
yield observation
|
118
|
+
|
119
|
+
def mean_wasserstein_distance(
|
120
|
+
observations: List[Tuple[State, Any]],
|
121
|
+
agent: DeepRLAgent,
|
122
|
+
actions: gymnasium.spaces.Box,
|
123
|
+
observation_patcher: Callable[[List[Any], RLAgent], Generator[None, None, None]] = pass_observation_patcher
|
124
|
+
):
|
125
|
+
distances = []
|
126
|
+
|
127
|
+
for observation, observed_action in observation_patcher(observations, agent):
|
128
|
+
# execute prediction X times and add to list (observed_action * X) |X| Len
|
129
|
+
actor_means, log_std_dev = agent.get_mean_and_std_dev(observation=observation)
|
130
|
+
|
131
|
+
# split to 3 axis and for each one calculate wasserstein distance and report mean
|
132
|
+
observed_action = observed_action[0]
|
133
|
+
actor_means = actor_means[0]
|
134
|
+
|
135
|
+
if len(observed_action) != len(actor_means):
|
136
|
+
raise Exception(
|
137
|
+
f"Length of observed actions, actor mean should be equal! "
|
138
|
+
f"{len(observed_action)},{len(actor_means)}"
|
139
|
+
)
|
140
|
+
wasserstein_distances = []
|
141
|
+
for observation_action, actor_mean in zip(observed_action, actor_means):
|
142
|
+
wasserstein_distances.append(
|
143
|
+
wasserstein_distance([observation_action], [actor_mean])
|
144
|
+
)
|
145
|
+
distances.append(mean(wasserstein_distances))
|
146
|
+
return mean(distances)
|
147
|
+
|
148
|
+
|
149
|
+
def mean_action_distance_continuous(observations: List[Tuple[State, Any]], agent: DeepRLAgent, actions: gymnasium.spaces.Box):
|
150
|
+
distances = []
|
151
|
+
for observation, action in observations:
|
152
|
+
action2, _ = agent.model.predict(
|
153
|
+
observation,
|
154
|
+
state=None,
|
155
|
+
deterministic=True,
|
156
|
+
episode_start=np.ones((1,), dtype=bool)
|
157
|
+
)
|
158
|
+
action_arr, action2_arr = action[0], action2[0]
|
159
|
+
print(f"actor means:{action2}")
|
160
|
+
assert len(action_arr) == len(action2_arr), f"Actions should be on the same length:{action},{action2}"
|
161
|
+
|
162
|
+
total_diff = 0
|
163
|
+
# total_diff = []
|
164
|
+
for action1, action2 in zip(action_arr, action2_arr):
|
165
|
+
total_diff += math.fabs(action1 - action2)
|
166
|
+
# distances.append(statistics.mean(total_diff))
|
167
|
+
distances.append(total_diff)
|
168
|
+
# print(f"distances:{distances}")
|
169
|
+
return mean(distances)
|
170
|
+
|
171
|
+
|
172
|
+
def set_agent_goal_observation(observations: List[Any], agent: RLAgent) -> Generator[None, None, None]:
|
173
|
+
copy_observation = observations.copy()
|
174
|
+
for observation, action in copy_observation:
|
175
|
+
observation['desired_goal'] = agent.goal
|
176
|
+
yield observation, action
|
177
|
+
|
178
|
+
|
179
|
+
def z_score(x, mean_action: float, std_dev: float):
|
180
|
+
return (x - mean_action) / std_dev
|
181
|
+
|
182
|
+
def mean_p_value(
|
183
|
+
observations: List[Tuple[State, Any]],
|
184
|
+
agent: DeepRLAgent,
|
185
|
+
actions: gymnasium.spaces.Box,
|
186
|
+
observation_patcher: Callable[[List[Any], RLAgent], Generator[None, None, None]] = pass_observation_patcher
|
187
|
+
):
|
188
|
+
distances = []
|
189
|
+
for observation, observed_action in observation_patcher(observations, agent):
|
190
|
+
# execute prediction X times and add to list (observed_action * X) |X| Len
|
191
|
+
actor_means, log_std_dev = agent.get_mean_and_std_dev(observation=observation)
|
192
|
+
|
193
|
+
# for each axis, calculate z-score distance and report mean
|
194
|
+
actor_means = actor_means[0]
|
195
|
+
observed_actions = observed_action[0]
|
196
|
+
log_std_dev = log_std_dev[0]
|
197
|
+
|
198
|
+
if len(actor_means) != len(observed_actions) or len(actor_means) != len(log_std_dev) or len(observed_actions) != len(log_std_dev):
|
199
|
+
raise Exception(
|
200
|
+
f"Length of observed actions, actor mean and std-dev should be equal! "
|
201
|
+
f"{len(observed_actions)},{len(actor_means)},{len(log_std_dev)}"
|
202
|
+
)
|
203
|
+
z_scores = []
|
204
|
+
for actor_mean, observation_action, action_log_std_dev in zip(actor_means, observed_actions, log_std_dev):
|
205
|
+
z_scores.append(
|
206
|
+
math.fabs(z_score(observation_action, actor_mean, math.pow(2, math.fabs(action_log_std_dev))))
|
207
|
+
)
|
208
|
+
mean_distances = mean(z_scores)
|
209
|
+
|
210
|
+
distances.append(mean_distances)
|
211
|
+
return mean(distances)
|
212
|
+
|
213
|
+
def normalize(values: List[float]) -> List[float]:
|
214
|
+
values /= sum(values)
|
215
|
+
return values
|
216
|
+
|
217
|
+
def max(values: List[float]) -> List[float]:
|
218
|
+
if not len(values):
|
219
|
+
return values
|
220
|
+
vals = np.array(values)
|
221
|
+
argmax = vals.argmax()
|
222
|
+
vals[:] = 0.0
|
223
|
+
vals[argmax] = 1.0
|
224
|
+
return vals
|
gr_libs/ml/__init__.py
ADDED
gr_libs/ml/agent.py
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
import torch
|
2
|
+
|
3
|
+
from gr_libs.ml import utils
|
4
|
+
from gr_libs.ml.utils.other import device
|
5
|
+
# from ml.neural import ACModel
|
6
|
+
|
7
|
+
|
8
|
+
class Agent:
|
9
|
+
"""An agent.
|
10
|
+
|
11
|
+
It is able:
|
12
|
+
- to choose an action given an observation,
|
13
|
+
- to analyze the feedback (i.e. reward and done state) of its action."""
|
14
|
+
|
15
|
+
def __init__(self, obs_space, action_space, model_dir,
|
16
|
+
argmax=False, num_envs=1, use_memory=True, use_text=False):
|
17
|
+
obs_space, self.preprocess_obss = utils.get_obss_preprocessor(obs_space)
|
18
|
+
self.acmodel = ACModel(obs_space, action_space, use_memory=use_memory, use_text=use_text)
|
19
|
+
self.argmax = argmax
|
20
|
+
self.num_envs = num_envs
|
21
|
+
|
22
|
+
if self.acmodel.recurrent:
|
23
|
+
self.memories = torch.zeros(self.num_envs, self.acmodel.memory_size, device=device)
|
24
|
+
|
25
|
+
self.acmodel.load_state_dict(utils.get_model_state(model_dir))
|
26
|
+
self.acmodel.to(device)
|
27
|
+
self.acmodel.eval()
|
28
|
+
if hasattr(self.preprocess_obss, "vocab"):
|
29
|
+
self.preprocess_obss.vocab.load_vocab(utils.get_vocab(model_dir))
|
30
|
+
|
31
|
+
def get_actions(self, obss):
|
32
|
+
preprocessed_obss = self.preprocess_obss(obss, device=device)
|
33
|
+
|
34
|
+
with torch.no_grad():
|
35
|
+
if self.acmodel.recurrent:
|
36
|
+
dist, _, self.memories = self.acmodel(preprocessed_obss, self.memories)
|
37
|
+
else:
|
38
|
+
dist, _ = self.acmodel(preprocessed_obss)
|
39
|
+
|
40
|
+
if self.argmax:
|
41
|
+
actions = dist.probs.max(1, keepdim=True)[1]
|
42
|
+
else:
|
43
|
+
actions = dist.sample()
|
44
|
+
|
45
|
+
return actions.cpu().numpy()
|
46
|
+
|
47
|
+
def get_action(self, obs):
|
48
|
+
return self.get_actions([obs])[0]
|
49
|
+
|
50
|
+
def analyze_feedbacks(self, rewards, dones):
|
51
|
+
if self.acmodel.recurrent:
|
52
|
+
masks = 1 - torch.tensor(dones, dtype=torch.float, device=device).unsqueeze(1)
|
53
|
+
self.memories *= masks
|
54
|
+
|
55
|
+
def analyze_feedback(self, reward, done):
|
56
|
+
return self.analyze_feedbacks([reward], [done])
|
@@ -0,0 +1 @@
|
|
1
|
+
from gr_libs.ml.base.rl_agent import RLAgent, State, ContextualAgent
|
@@ -0,0 +1,54 @@
|
|
1
|
+
from typing import Any
|
2
|
+
from abc import ABC, abstractmethod
|
3
|
+
import numpy as np
|
4
|
+
|
5
|
+
State = Any
|
6
|
+
|
7
|
+
class ContextualAgent:
|
8
|
+
def __init__(self, problem_name, problem_goal, agent):
|
9
|
+
self.problem_name = problem_name
|
10
|
+
self.problem_goal = problem_goal
|
11
|
+
self.agent = agent
|
12
|
+
|
13
|
+
class RLAgent(ABC):
|
14
|
+
def __init__(
|
15
|
+
self,
|
16
|
+
episodes: int,
|
17
|
+
decaying_eps: bool,
|
18
|
+
epsilon: float,
|
19
|
+
learning_rate: float,
|
20
|
+
gamma: float,
|
21
|
+
problem_name: str,
|
22
|
+
domain_name: str
|
23
|
+
):
|
24
|
+
self.episodes = episodes
|
25
|
+
self.decaying_eps = decaying_eps
|
26
|
+
self.epsilon = epsilon
|
27
|
+
self.learning_rate = learning_rate
|
28
|
+
self.gamma = gamma
|
29
|
+
self.problem_name = problem_name
|
30
|
+
self.domain_name = domain_name
|
31
|
+
self.env = None
|
32
|
+
self.states_counter = {}
|
33
|
+
|
34
|
+
@abstractmethod
|
35
|
+
def learn(self):
|
36
|
+
pass
|
37
|
+
|
38
|
+
def class_name(self):
|
39
|
+
return self.__class__.__name__
|
40
|
+
|
41
|
+
def get_actions_probabilities(self, observation):
|
42
|
+
raise Exception("function get_actions_probabilities is unimplemented")
|
43
|
+
|
44
|
+
def get_number_of_unique_states(self):
|
45
|
+
return len(self.states_counter)
|
46
|
+
|
47
|
+
def update_states_counter(self, observation_str: str):
|
48
|
+
if observation_str in self.states_counter:
|
49
|
+
self.states_counter[observation_str] = self.states_counter[observation_str] + 1
|
50
|
+
else:
|
51
|
+
self.states_counter[observation_str] = 1
|
52
|
+
if len(self.states_counter) % 10000 == 0:
|
53
|
+
print(f"probably error to many {len(self.states_counter)}")
|
54
|
+
|
gr_libs/ml/consts.py
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
MODEL = None
|
2
|
+
SEED = 1
|
3
|
+
LOG_INTERVAL = 1
|
4
|
+
SAVE_INTERVAL = 10
|
5
|
+
PROCS = 16
|
6
|
+
FRAMES = 10**7
|
7
|
+
EPISODES = FRAMES
|
8
|
+
EPOCS = 4
|
9
|
+
BATCH_SIZE = 256
|
10
|
+
FRAMES_PER_PROC = None
|
11
|
+
DISCOUNT = 0.99
|
12
|
+
GAMMA = DISCOUNT
|
13
|
+
LEARNING_RATE = 0.001
|
14
|
+
GEA_LAMBDA = 0.95
|
15
|
+
ENTROPY_COEF = 0.01
|
16
|
+
VALUE_LOSS_COEF = 0.5
|
17
|
+
MAX_GRAD_NORM = 0.5
|
18
|
+
OPTIM_EPS = 1e-8
|
19
|
+
OPTIM_ALPHA = 0.99
|
20
|
+
CLIP_EPS = 0.2
|
21
|
+
RECURRENCE = 1
|
22
|
+
TEXT = False
|