gr-libs 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. evaluation/analyze_results_cross_alg_cross_domain.py +277 -0
  2. evaluation/create_minigrid_map_image.py +34 -0
  3. evaluation/file_system.py +42 -0
  4. evaluation/generate_experiments_results.py +92 -0
  5. evaluation/generate_experiments_results_new_ver1.py +254 -0
  6. evaluation/generate_experiments_results_new_ver2.py +331 -0
  7. evaluation/generate_task_specific_statistics_plots.py +272 -0
  8. evaluation/get_plans_images.py +47 -0
  9. evaluation/increasing_and_decreasing_.py +63 -0
  10. gr_libs/__init__.py +2 -0
  11. gr_libs/environment/__init__.py +0 -0
  12. gr_libs/environment/environment.py +227 -0
  13. gr_libs/environment/utils/__init__.py +0 -0
  14. gr_libs/environment/utils/utils.py +17 -0
  15. gr_libs/metrics/__init__.py +0 -0
  16. gr_libs/metrics/metrics.py +224 -0
  17. gr_libs/ml/__init__.py +6 -0
  18. gr_libs/ml/agent.py +56 -0
  19. gr_libs/ml/base/__init__.py +1 -0
  20. gr_libs/ml/base/rl_agent.py +54 -0
  21. gr_libs/ml/consts.py +22 -0
  22. gr_libs/ml/neural/__init__.py +3 -0
  23. gr_libs/ml/neural/deep_rl_learner.py +395 -0
  24. gr_libs/ml/neural/utils/__init__.py +2 -0
  25. gr_libs/ml/neural/utils/dictlist.py +33 -0
  26. gr_libs/ml/neural/utils/penv.py +57 -0
  27. gr_libs/ml/planner/__init__.py +0 -0
  28. gr_libs/ml/planner/mcts/__init__.py +0 -0
  29. gr_libs/ml/planner/mcts/mcts_model.py +330 -0
  30. gr_libs/ml/planner/mcts/utils/__init__.py +2 -0
  31. gr_libs/ml/planner/mcts/utils/node.py +33 -0
  32. gr_libs/ml/planner/mcts/utils/tree.py +102 -0
  33. gr_libs/ml/sequential/__init__.py +1 -0
  34. gr_libs/ml/sequential/lstm_model.py +192 -0
  35. gr_libs/ml/tabular/__init__.py +3 -0
  36. gr_libs/ml/tabular/state.py +21 -0
  37. gr_libs/ml/tabular/tabular_q_learner.py +453 -0
  38. gr_libs/ml/tabular/tabular_rl_agent.py +126 -0
  39. gr_libs/ml/utils/__init__.py +6 -0
  40. gr_libs/ml/utils/env.py +7 -0
  41. gr_libs/ml/utils/format.py +100 -0
  42. gr_libs/ml/utils/math.py +13 -0
  43. gr_libs/ml/utils/other.py +24 -0
  44. gr_libs/ml/utils/storage.py +127 -0
  45. gr_libs/recognizer/__init__.py +0 -0
  46. gr_libs/recognizer/gr_as_rl/__init__.py +0 -0
  47. gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +102 -0
  48. gr_libs/recognizer/graml/__init__.py +0 -0
  49. gr_libs/recognizer/graml/gr_dataset.py +134 -0
  50. gr_libs/recognizer/graml/graml_recognizer.py +266 -0
  51. gr_libs/recognizer/recognizer.py +46 -0
  52. gr_libs/recognizer/utils/__init__.py +1 -0
  53. gr_libs/recognizer/utils/format.py +13 -0
  54. gr_libs-0.1.3.dist-info/METADATA +197 -0
  55. gr_libs-0.1.3.dist-info/RECORD +62 -0
  56. gr_libs-0.1.3.dist-info/WHEEL +5 -0
  57. gr_libs-0.1.3.dist-info/top_level.txt +3 -0
  58. tutorials/graml_minigrid_tutorial.py +30 -0
  59. tutorials/graml_panda_tutorial.py +32 -0
  60. tutorials/graml_parking_tutorial.py +38 -0
  61. tutorials/graml_point_maze_tutorial.py +43 -0
  62. tutorials/graql_minigrid_tutorial.py +29 -0
@@ -0,0 +1,224 @@
1
+ import math
2
+ import dill
3
+ import gymnasium
4
+ import numpy as np
5
+
6
+ from typing import Callable, Generator, List, Dict, Tuple, Any
7
+ from math import log2
8
+ from numpy.core.fromnumeric import mean
9
+ from scipy.stats import wasserstein_distance
10
+ from gymnasium.spaces.discrete import Discrete
11
+ # import torch
12
+ # from torch.distributions.categorical import Categorical
13
+
14
+ from ..ml.base import State
15
+ from ..ml.base.rl_agent import RLAgent
16
+ from ..ml.neural.deep_rl_learner import DeepRLAgent
17
+
18
+
19
+ def kl_divergence(p1: List[float], p2: List[float]) -> float:
20
+ """Computes Kullback–Leibler divergence from two probabilities distributions p1 and p2.
21
+ We follow the formula in Wikipedia https://en.wikipedia.org/wiki/Kullback–Leibler_divergence
22
+
23
+ Args:
24
+ p1 (List[float]): A probability distribution
25
+ p2 (List[float]): Another probability distribution
26
+
27
+ Returns:
28
+ float: The KL-divergence between p1 and p2
29
+ """
30
+ assert (len(p1) == len(p2))
31
+ return sum(p1[i] * log2(p1[i] / p2[i]) for i in range(len(p1)))
32
+
33
+
34
+ def kl_divergence_norm_softmax(observations: List[Tuple[State, Any]], agent, actions: Discrete):
35
+ distances = []
36
+ p_traj = traj_to_policy(observations=observations, actions=actions)
37
+
38
+ for (observation, agent_pos), action in observations:
39
+ state = observation['image']
40
+ state_pickled = dill.dumps(state)
41
+
42
+ qp1 = p_traj[state_pickled]
43
+ qp2_flatten_distribution_list: List[float] = agent.get_actions_probabilities(
44
+ observation=(observation, agent_pos))
45
+ distances.append(kl_divergence(qp1, qp2_flatten_distribution_list))
46
+ return mean(distances)
47
+
48
+
49
+ def amplify(values, alpha=1.0):
50
+ """Computes amplified softmax probabilities for an array of values
51
+ Args:
52
+ values (list): Input values for which to compute softmax
53
+ alpha (float): Amplification factor, where alpha > 1 increases differences between probabilities
54
+ Returns:
55
+ np.array: amplified softmax probabilities
56
+ """
57
+ values = values[:3]**alpha # currently only choose to turn or move forward
58
+ return values / np.sum(values)
59
+
60
+ def stochastic_amplified_selection(actions_probs, alpha=8.0):
61
+ action_probs_amplified = amplify(actions_probs, alpha)
62
+ choice = np.random.choice(len(action_probs_amplified), p=action_probs_amplified)
63
+ if choice == 3:
64
+ choice = 6
65
+ return choice
66
+
67
+ def stochastic_selection(actions_probs):
68
+ return np.random.choice(len(actions_probs), p=actions_probs)
69
+
70
+ def greedy_selection(actions_probs):
71
+ return np.argmax(actions_probs)
72
+
73
+ def measure_average_sequence_distance(seq1, seq2):
74
+ """Measures the sequence similarity between two sequences of observations and actions.
75
+
76
+ Args:
77
+ seq1: A tensor of tensors representing the first sequence.
78
+ seq2: A tensor of tensors representing the second sequence.
79
+
80
+ Returns:
81
+ A float representing the sequence similarity.
82
+ """
83
+
84
+ # Ensure both sequences have the same length
85
+ min_seq_len = np.min([len(seq1), len(seq2)])
86
+ assert np.max([len(seq1), len(seq2)]) <= 30*min_seq_len, "We can't really measure similarity in case the sequences are really not the same... maybe just return a default NOT_SIMILAR here."
87
+
88
+ # Calculate the Euclidean distance between corresponding elements in the sequences
89
+ distances = []
90
+ for i in range(0, min_seq_len):
91
+ distances.append(np.sum(np.abs(np.array(seq1[i])-np.array(seq2[i]))))
92
+
93
+ # Calculate the average distance over all elements
94
+ return np.mean(np.array(distances))
95
+
96
+
97
+ def traj_to_policy(observations: List[Tuple[State, Any]], actions: Discrete, epsilon: float = 0.) -> Dict[
98
+ str, List[float]]:
99
+ # converts a trajectory from a planner to a policy
100
+ # where the taken action has 99.99999% probability
101
+ trajectory_as_policy = {}
102
+ for (observation, agent_pos), action in observations:
103
+ # in the discrete world the action is the index
104
+ action_index = action
105
+
106
+ actions_len = actions.n
107
+ qs = [1e-6 + epsilon / actions_len for _ in range(actions_len)]
108
+ qs[action_index] = 1. - 1e-6 * (actions_len - 1) - epsilon
109
+
110
+ state = observation['image']
111
+ state_pickled = dill.dumps(state)
112
+ trajectory_as_policy[state_pickled] = qs
113
+ return trajectory_as_policy
114
+
115
+ def pass_observation_patcher(observations: List[Any], agent: RLAgent) -> Generator[None, None, None]:
116
+ for observation in observations:
117
+ yield observation
118
+
119
+ def mean_wasserstein_distance(
120
+ observations: List[Tuple[State, Any]],
121
+ agent: DeepRLAgent,
122
+ actions: gymnasium.spaces.Box,
123
+ observation_patcher: Callable[[List[Any], RLAgent], Generator[None, None, None]] = pass_observation_patcher
124
+ ):
125
+ distances = []
126
+
127
+ for observation, observed_action in observation_patcher(observations, agent):
128
+ # execute prediction X times and add to list (observed_action * X) |X| Len
129
+ actor_means, log_std_dev = agent.get_mean_and_std_dev(observation=observation)
130
+
131
+ # split to 3 axis and for each one calculate wasserstein distance and report mean
132
+ observed_action = observed_action[0]
133
+ actor_means = actor_means[0]
134
+
135
+ if len(observed_action) != len(actor_means):
136
+ raise Exception(
137
+ f"Length of observed actions, actor mean should be equal! "
138
+ f"{len(observed_action)},{len(actor_means)}"
139
+ )
140
+ wasserstein_distances = []
141
+ for observation_action, actor_mean in zip(observed_action, actor_means):
142
+ wasserstein_distances.append(
143
+ wasserstein_distance([observation_action], [actor_mean])
144
+ )
145
+ distances.append(mean(wasserstein_distances))
146
+ return mean(distances)
147
+
148
+
149
+ def mean_action_distance_continuous(observations: List[Tuple[State, Any]], agent: DeepRLAgent, actions: gymnasium.spaces.Box):
150
+ distances = []
151
+ for observation, action in observations:
152
+ action2, _ = agent.model.predict(
153
+ observation,
154
+ state=None,
155
+ deterministic=True,
156
+ episode_start=np.ones((1,), dtype=bool)
157
+ )
158
+ action_arr, action2_arr = action[0], action2[0]
159
+ print(f"actor means:{action2}")
160
+ assert len(action_arr) == len(action2_arr), f"Actions should be on the same length:{action},{action2}"
161
+
162
+ total_diff = 0
163
+ # total_diff = []
164
+ for action1, action2 in zip(action_arr, action2_arr):
165
+ total_diff += math.fabs(action1 - action2)
166
+ # distances.append(statistics.mean(total_diff))
167
+ distances.append(total_diff)
168
+ # print(f"distances:{distances}")
169
+ return mean(distances)
170
+
171
+
172
+ def set_agent_goal_observation(observations: List[Any], agent: RLAgent) -> Generator[None, None, None]:
173
+ copy_observation = observations.copy()
174
+ for observation, action in copy_observation:
175
+ observation['desired_goal'] = agent.goal
176
+ yield observation, action
177
+
178
+
179
+ def z_score(x, mean_action: float, std_dev: float):
180
+ return (x - mean_action) / std_dev
181
+
182
+ def mean_p_value(
183
+ observations: List[Tuple[State, Any]],
184
+ agent: DeepRLAgent,
185
+ actions: gymnasium.spaces.Box,
186
+ observation_patcher: Callable[[List[Any], RLAgent], Generator[None, None, None]] = pass_observation_patcher
187
+ ):
188
+ distances = []
189
+ for observation, observed_action in observation_patcher(observations, agent):
190
+ # execute prediction X times and add to list (observed_action * X) |X| Len
191
+ actor_means, log_std_dev = agent.get_mean_and_std_dev(observation=observation)
192
+
193
+ # for each axis, calculate z-score distance and report mean
194
+ actor_means = actor_means[0]
195
+ observed_actions = observed_action[0]
196
+ log_std_dev = log_std_dev[0]
197
+
198
+ if len(actor_means) != len(observed_actions) or len(actor_means) != len(log_std_dev) or len(observed_actions) != len(log_std_dev):
199
+ raise Exception(
200
+ f"Length of observed actions, actor mean and std-dev should be equal! "
201
+ f"{len(observed_actions)},{len(actor_means)},{len(log_std_dev)}"
202
+ )
203
+ z_scores = []
204
+ for actor_mean, observation_action, action_log_std_dev in zip(actor_means, observed_actions, log_std_dev):
205
+ z_scores.append(
206
+ math.fabs(z_score(observation_action, actor_mean, math.pow(2, math.fabs(action_log_std_dev))))
207
+ )
208
+ mean_distances = mean(z_scores)
209
+
210
+ distances.append(mean_distances)
211
+ return mean(distances)
212
+
213
+ def normalize(values: List[float]) -> List[float]:
214
+ values /= sum(values)
215
+ return values
216
+
217
+ def max(values: List[float]) -> List[float]:
218
+ if not len(values):
219
+ return values
220
+ vals = np.array(values)
221
+ argmax = vals.argmax()
222
+ vals[:] = 0.0
223
+ vals[argmax] = 1.0
224
+ return vals
gr_libs/ml/__init__.py ADDED
@@ -0,0 +1,6 @@
1
+ from ..ml.utils import device, seed, synthesize
2
+ # from ml.neural import PPOAlgo
3
+ from ..ml.tabular import TabularQLearner
4
+ # from ml.neural import ACModel, RecurrentACModel
5
+ from ..ml.neural import DictList
6
+ from ..ml.agent import Agent
gr_libs/ml/agent.py ADDED
@@ -0,0 +1,56 @@
1
+ import torch
2
+
3
+ from gr_libs.ml import utils
4
+ from gr_libs.ml.utils.other import device
5
+ # from ml.neural import ACModel
6
+
7
+
8
+ class Agent:
9
+ """An agent.
10
+
11
+ It is able:
12
+ - to choose an action given an observation,
13
+ - to analyze the feedback (i.e. reward and done state) of its action."""
14
+
15
+ def __init__(self, obs_space, action_space, model_dir,
16
+ argmax=False, num_envs=1, use_memory=True, use_text=False):
17
+ obs_space, self.preprocess_obss = utils.get_obss_preprocessor(obs_space)
18
+ self.acmodel = ACModel(obs_space, action_space, use_memory=use_memory, use_text=use_text)
19
+ self.argmax = argmax
20
+ self.num_envs = num_envs
21
+
22
+ if self.acmodel.recurrent:
23
+ self.memories = torch.zeros(self.num_envs, self.acmodel.memory_size, device=device)
24
+
25
+ self.acmodel.load_state_dict(utils.get_model_state(model_dir))
26
+ self.acmodel.to(device)
27
+ self.acmodel.eval()
28
+ if hasattr(self.preprocess_obss, "vocab"):
29
+ self.preprocess_obss.vocab.load_vocab(utils.get_vocab(model_dir))
30
+
31
+ def get_actions(self, obss):
32
+ preprocessed_obss = self.preprocess_obss(obss, device=device)
33
+
34
+ with torch.no_grad():
35
+ if self.acmodel.recurrent:
36
+ dist, _, self.memories = self.acmodel(preprocessed_obss, self.memories)
37
+ else:
38
+ dist, _ = self.acmodel(preprocessed_obss)
39
+
40
+ if self.argmax:
41
+ actions = dist.probs.max(1, keepdim=True)[1]
42
+ else:
43
+ actions = dist.sample()
44
+
45
+ return actions.cpu().numpy()
46
+
47
+ def get_action(self, obs):
48
+ return self.get_actions([obs])[0]
49
+
50
+ def analyze_feedbacks(self, rewards, dones):
51
+ if self.acmodel.recurrent:
52
+ masks = 1 - torch.tensor(dones, dtype=torch.float, device=device).unsqueeze(1)
53
+ self.memories *= masks
54
+
55
+ def analyze_feedback(self, reward, done):
56
+ return self.analyze_feedbacks([reward], [done])
@@ -0,0 +1 @@
1
+ from gr_libs.ml.base.rl_agent import RLAgent, State, ContextualAgent
@@ -0,0 +1,54 @@
1
+ from typing import Any
2
+ from abc import ABC, abstractmethod
3
+ import numpy as np
4
+
5
+ State = Any
6
+
7
+ class ContextualAgent:
8
+ def __init__(self, problem_name, problem_goal, agent):
9
+ self.problem_name = problem_name
10
+ self.problem_goal = problem_goal
11
+ self.agent = agent
12
+
13
+ class RLAgent(ABC):
14
+ def __init__(
15
+ self,
16
+ episodes: int,
17
+ decaying_eps: bool,
18
+ epsilon: float,
19
+ learning_rate: float,
20
+ gamma: float,
21
+ problem_name: str,
22
+ domain_name: str
23
+ ):
24
+ self.episodes = episodes
25
+ self.decaying_eps = decaying_eps
26
+ self.epsilon = epsilon
27
+ self.learning_rate = learning_rate
28
+ self.gamma = gamma
29
+ self.problem_name = problem_name
30
+ self.domain_name = domain_name
31
+ self.env = None
32
+ self.states_counter = {}
33
+
34
+ @abstractmethod
35
+ def learn(self):
36
+ pass
37
+
38
+ def class_name(self):
39
+ return self.__class__.__name__
40
+
41
+ def get_actions_probabilities(self, observation):
42
+ raise Exception("function get_actions_probabilities is unimplemented")
43
+
44
+ def get_number_of_unique_states(self):
45
+ return len(self.states_counter)
46
+
47
+ def update_states_counter(self, observation_str: str):
48
+ if observation_str in self.states_counter:
49
+ self.states_counter[observation_str] = self.states_counter[observation_str] + 1
50
+ else:
51
+ self.states_counter[observation_str] = 1
52
+ if len(self.states_counter) % 10000 == 0:
53
+ print(f"probably error to many {len(self.states_counter)}")
54
+
gr_libs/ml/consts.py ADDED
@@ -0,0 +1,22 @@
1
+ MODEL = None
2
+ SEED = 1
3
+ LOG_INTERVAL = 1
4
+ SAVE_INTERVAL = 10
5
+ PROCS = 16
6
+ FRAMES = 10**7
7
+ EPISODES = FRAMES
8
+ EPOCS = 4
9
+ BATCH_SIZE = 256
10
+ FRAMES_PER_PROC = None
11
+ DISCOUNT = 0.99
12
+ GAMMA = DISCOUNT
13
+ LEARNING_RATE = 0.001
14
+ GEA_LAMBDA = 0.95
15
+ ENTROPY_COEF = 0.01
16
+ VALUE_LOSS_COEF = 0.5
17
+ MAX_GRAD_NORM = 0.5
18
+ OPTIM_EPS = 1e-8
19
+ OPTIM_ALPHA = 0.99
20
+ CLIP_EPS = 0.2
21
+ RECURRENCE = 1
22
+ TEXT = False
@@ -0,0 +1,3 @@
1
+ # from ml.neural.model import AbstractACModel, RecurrentACModel, ACModel
2
+ # from ml.neural.algorithms import BaseAlgo, A2CAlgo, PPOAlgo
3
+ from gr_libs.ml.neural.utils import DictList