gr-libs 0.1.7.post0__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. evaluation/analyze_results_cross_alg_cross_domain.py +236 -246
  2. evaluation/create_minigrid_map_image.py +10 -6
  3. evaluation/file_system.py +16 -5
  4. evaluation/generate_experiments_results.py +123 -74
  5. evaluation/generate_experiments_results_new_ver1.py +227 -243
  6. evaluation/generate_experiments_results_new_ver2.py +317 -317
  7. evaluation/generate_task_specific_statistics_plots.py +481 -253
  8. evaluation/get_plans_images.py +41 -26
  9. evaluation/increasing_and_decreasing_.py +97 -56
  10. gr_libs/__init__.py +2 -1
  11. gr_libs/_version.py +2 -2
  12. gr_libs/environment/__init__.py +16 -8
  13. gr_libs/environment/environment.py +167 -39
  14. gr_libs/environment/utils/utils.py +22 -12
  15. gr_libs/metrics/__init__.py +5 -0
  16. gr_libs/metrics/metrics.py +76 -34
  17. gr_libs/ml/__init__.py +2 -0
  18. gr_libs/ml/agent.py +21 -6
  19. gr_libs/ml/base/__init__.py +1 -1
  20. gr_libs/ml/base/rl_agent.py +13 -10
  21. gr_libs/ml/consts.py +1 -1
  22. gr_libs/ml/neural/deep_rl_learner.py +433 -352
  23. gr_libs/ml/neural/utils/__init__.py +1 -1
  24. gr_libs/ml/neural/utils/dictlist.py +3 -3
  25. gr_libs/ml/neural/utils/penv.py +5 -2
  26. gr_libs/ml/planner/mcts/mcts_model.py +524 -302
  27. gr_libs/ml/planner/mcts/utils/__init__.py +1 -1
  28. gr_libs/ml/planner/mcts/utils/node.py +11 -7
  29. gr_libs/ml/planner/mcts/utils/tree.py +14 -10
  30. gr_libs/ml/sequential/__init__.py +1 -1
  31. gr_libs/ml/sequential/lstm_model.py +256 -175
  32. gr_libs/ml/tabular/state.py +7 -7
  33. gr_libs/ml/tabular/tabular_q_learner.py +123 -73
  34. gr_libs/ml/tabular/tabular_rl_agent.py +20 -19
  35. gr_libs/ml/utils/__init__.py +8 -2
  36. gr_libs/ml/utils/format.py +78 -70
  37. gr_libs/ml/utils/math.py +2 -1
  38. gr_libs/ml/utils/other.py +1 -1
  39. gr_libs/ml/utils/storage.py +88 -28
  40. gr_libs/problems/consts.py +1549 -1227
  41. gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +145 -80
  42. gr_libs/recognizer/graml/gr_dataset.py +209 -110
  43. gr_libs/recognizer/graml/graml_recognizer.py +431 -240
  44. gr_libs/recognizer/recognizer.py +38 -27
  45. gr_libs/recognizer/utils/__init__.py +1 -1
  46. gr_libs/recognizer/utils/format.py +8 -3
  47. {gr_libs-0.1.7.post0.dist-info → gr_libs-0.1.8.dist-info}/METADATA +1 -1
  48. gr_libs-0.1.8.dist-info/RECORD +70 -0
  49. {gr_libs-0.1.7.post0.dist-info → gr_libs-0.1.8.dist-info}/WHEEL +1 -1
  50. tests/test_gcdraco.py +10 -0
  51. tests/test_graml.py +8 -4
  52. tests/test_graql.py +2 -1
  53. tutorials/gcdraco_panda_tutorial.py +66 -0
  54. tutorials/gcdraco_parking_tutorial.py +61 -0
  55. tutorials/graml_minigrid_tutorial.py +42 -12
  56. tutorials/graml_panda_tutorial.py +35 -14
  57. tutorials/graml_parking_tutorial.py +37 -20
  58. tutorials/graml_point_maze_tutorial.py +33 -13
  59. tutorials/graql_minigrid_tutorial.py +31 -15
  60. gr_libs-0.1.7.post0.dist-info/RECORD +0 -67
  61. {gr_libs-0.1.7.post0.dist-info → gr_libs-0.1.8.dist-info}/top_level.txt +0 -0
@@ -1,17 +1,27 @@
1
1
  import logging
2
2
  import sys
3
- from gr_libs.environment.environment import MINIGRID, PANDA, PARKING, POINT_MAZE, EnvProperty, MinigridProperty, PandaProperty, ParkingProperty, PointMazeProperty
3
+ from gr_libs.environment.environment import (
4
+ MINIGRID,
5
+ PANDA,
6
+ PARKING,
7
+ POINT_MAZE,
8
+ EnvProperty,
9
+ MinigridProperty,
10
+ PandaProperty,
11
+ ParkingProperty,
12
+ PointMazeProperty,
13
+ )
4
14
 
5
15
 
6
16
  def domain_to_env_property(domain_name: str):
7
- if domain_name == MINIGRID:
8
- return MinigridProperty
9
- elif domain_name == PARKING:
10
- return ParkingProperty
11
- elif domain_name == PANDA:
12
- return PandaProperty
13
- elif domain_name == POINT_MAZE:
14
- return PointMazeProperty
15
- else:
16
- logging.error(f"Domain {domain_name} is not supported.")
17
- sys.exit(1)
17
+ if domain_name == MINIGRID:
18
+ return MinigridProperty
19
+ elif domain_name == PARKING:
20
+ return ParkingProperty
21
+ elif domain_name == PANDA:
22
+ return PandaProperty
23
+ elif domain_name == POINT_MAZE:
24
+ return PointMazeProperty
25
+ else:
26
+ logging.error(f"Domain {domain_name} is not supported.")
27
+ sys.exit(1)
@@ -0,0 +1,5 @@
1
+ from .metrics import (
2
+ mean_p_value,
3
+ mean_wasserstein_distance,
4
+ stochastic_amplified_selection,
5
+ )
@@ -7,6 +7,7 @@ from typing import Callable, Generator, List, Dict, Tuple, Any
7
7
  from math import log2
8
8
  from scipy.stats import wasserstein_distance
9
9
  from gymnasium.spaces.discrete import Discrete
10
+
10
11
  # import torch
11
12
  # from torch.distributions.categorical import Categorical
12
13
 
@@ -26,21 +27,24 @@ def kl_divergence(p1: List[float], p2: List[float]) -> float:
26
27
  Returns:
27
28
  float: The KL-divergence between p1 and p2
28
29
  """
29
- assert (len(p1) == len(p2))
30
+ assert len(p1) == len(p2)
30
31
  return sum(p1[i] * log2(p1[i] / p2[i]) for i in range(len(p1)))
31
32
 
32
33
 
33
- def kl_divergence_norm_softmax(observations: List[Tuple[State, Any]], agent, actions: Discrete):
34
+ def kl_divergence_norm_softmax(
35
+ observations: List[Tuple[State, Any]], agent, actions: Discrete
36
+ ):
34
37
  distances = []
35
38
  p_traj = traj_to_policy(observations=observations, actions=actions)
36
39
 
37
40
  for (observation, agent_pos), action in observations:
38
- state = observation['image']
41
+ state = observation["image"]
39
42
  state_pickled = dill.dumps(state)
40
43
 
41
44
  qp1 = p_traj[state_pickled]
42
45
  qp2_flatten_distribution_list: List[float] = agent.get_actions_probabilities(
43
- observation=(observation, agent_pos))
46
+ observation=(observation, agent_pos)
47
+ )
44
48
  distances.append(kl_divergence(qp1, qp2_flatten_distribution_list))
45
49
  return np.mean(distances)
46
50
 
@@ -53,9 +57,10 @@ def amplify(values, alpha=1.0):
53
57
  Returns:
54
58
  np.array: amplified softmax probabilities
55
59
  """
56
- values = values[:3]**alpha # currently only choose to turn or move forward
60
+ values = values[:3] ** alpha # currently only choose to turn or move forward
57
61
  return values / np.sum(values)
58
62
 
63
+
59
64
  def stochastic_amplified_selection(actions_probs, alpha=8.0):
60
65
  action_probs_amplified = amplify(actions_probs, alpha)
61
66
  choice = np.random.choice(len(action_probs_amplified), p=action_probs_amplified)
@@ -63,12 +68,15 @@ def stochastic_amplified_selection(actions_probs, alpha=8.0):
63
68
  choice = 6
64
69
  return choice
65
70
 
71
+
66
72
  def stochastic_selection(actions_probs):
67
73
  return np.random.choice(len(actions_probs), p=actions_probs)
68
74
 
75
+
69
76
  def greedy_selection(actions_probs):
70
77
  return np.argmax(actions_probs)
71
78
 
79
+
72
80
  def measure_average_sequence_distance(seq1, seq2):
73
81
  """Measures the sequence similarity between two sequences of observations and actions.
74
82
 
@@ -82,19 +90,22 @@ def measure_average_sequence_distance(seq1, seq2):
82
90
 
83
91
  # Ensure both sequences have the same length
84
92
  min_seq_len = np.min([len(seq1), len(seq2)])
85
- assert np.max([len(seq1), len(seq2)]) <= 30*min_seq_len, "We can't really measure similarity in case the sequences are really not the same... maybe just return a default NOT_SIMILAR here."
93
+ assert (
94
+ np.max([len(seq1), len(seq2)]) <= 30 * min_seq_len
95
+ ), "We can't really measure similarity in case the sequences are really not the same... maybe just return a default NOT_SIMILAR here."
86
96
 
87
97
  # Calculate the Euclidean distance between corresponding elements in the sequences
88
98
  distances = []
89
99
  for i in range(0, min_seq_len):
90
- distances.append(np.sum(np.abs(np.array(seq1[i])-np.array(seq2[i]))))
100
+ distances.append(np.sum(np.abs(np.array(seq1[i]) - np.array(seq2[i]))))
91
101
 
92
102
  # Calculate the average distance over all elements
93
103
  return np.mean(np.array(distances))
94
104
 
95
105
 
96
- def traj_to_policy(observations: List[Tuple[State, Any]], actions: Discrete, epsilon: float = 0.) -> Dict[
97
- str, List[float]]:
106
+ def traj_to_policy(
107
+ observations: List[Tuple[State, Any]], actions: Discrete, epsilon: float = 0.0
108
+ ) -> Dict[str, List[float]]:
98
109
  # converts a trajectory from a planner to a policy
99
110
  # where the taken action has 99.99999% probability
100
111
  trajectory_as_policy = {}
@@ -104,22 +115,28 @@ def traj_to_policy(observations: List[Tuple[State, Any]], actions: Discrete, eps
104
115
 
105
116
  actions_len = actions.n
106
117
  qs = [1e-6 + epsilon / actions_len for _ in range(actions_len)]
107
- qs[action_index] = 1. - 1e-6 * (actions_len - 1) - epsilon
118
+ qs[action_index] = 1.0 - 1e-6 * (actions_len - 1) - epsilon
108
119
 
109
- state = observation['image']
120
+ state = observation["image"]
110
121
  state_pickled = dill.dumps(state)
111
122
  trajectory_as_policy[state_pickled] = qs
112
123
  return trajectory_as_policy
113
124
 
114
- def pass_observation_patcher(observations: List[Any], agent: RLAgent) -> Generator[None, None, None]:
125
+
126
+ def pass_observation_patcher(
127
+ observations: List[Any], agent: RLAgent
128
+ ) -> Generator[None, None, None]:
115
129
  for observation in observations:
116
130
  yield observation
117
131
 
132
+
118
133
  def mean_wasserstein_distance(
119
- observations: List[Tuple[State, Any]],
120
- agent: DeepRLAgent,
121
- actions: gymnasium.spaces.Box,
122
- observation_patcher: Callable[[List[Any], RLAgent], Generator[None, None, None]] = pass_observation_patcher
134
+ observations: List[Tuple[State, Any]],
135
+ agent: DeepRLAgent,
136
+ actions: gymnasium.spaces.Box,
137
+ observation_patcher: Callable[
138
+ [List[Any], RLAgent], Generator[None, None, None]
139
+ ] = pass_observation_patcher,
123
140
  ):
124
141
  distances = []
125
142
 
@@ -141,22 +158,28 @@ def mean_wasserstein_distance(
141
158
  wasserstein_distances.append(
142
159
  wasserstein_distance([observation_action], [actor_mean])
143
160
  )
144
- distances.append(mean(wasserstein_distances))
145
- return mean(distances)
161
+ distances.append(np.mean(wasserstein_distances))
162
+ return np.mean(distances)
146
163
 
147
164
 
148
- def mean_action_distance_continuous(observations: List[Tuple[State, Any]], agent: DeepRLAgent, actions: gymnasium.spaces.Box):
165
+ def mean_action_distance_continuous(
166
+ observations: List[Tuple[State, Any]],
167
+ agent: DeepRLAgent,
168
+ actions: gymnasium.spaces.Box,
169
+ ):
149
170
  distances = []
150
171
  for observation, action in observations:
151
172
  action2, _ = agent.model.predict(
152
173
  observation,
153
174
  state=None,
154
175
  deterministic=True,
155
- episode_start=np.ones((1,), dtype=bool)
176
+ episode_start=np.ones((1,), dtype=bool),
156
177
  )
157
178
  action_arr, action2_arr = action[0], action2[0]
158
179
  print(f"actor means:{action2}")
159
- assert len(action_arr) == len(action2_arr), f"Actions should be on the same length:{action},{action2}"
180
+ assert len(action_arr) == len(
181
+ action2_arr
182
+ ), f"Actions should be on the same length:{action},{action2}"
160
183
 
161
184
  total_diff = 0
162
185
  # total_diff = []
@@ -165,24 +188,29 @@ def mean_action_distance_continuous(observations: List[Tuple[State, Any]], agent
165
188
  # distances.append(statistics.mean(total_diff))
166
189
  distances.append(total_diff)
167
190
  # print(f"distances:{distances}")
168
- return mean(distances)
191
+ return np.mean(distances)
169
192
 
170
193
 
171
- def set_agent_goal_observation(observations: List[Any], agent: RLAgent) -> Generator[None, None, None]:
194
+ def set_agent_goal_observation(
195
+ observations: List[Any], agent: RLAgent
196
+ ) -> Generator[None, None, None]:
172
197
  copy_observation = observations.copy()
173
198
  for observation, action in copy_observation:
174
- observation['desired_goal'] = agent.goal
199
+ observation["desired_goal"] = agent.goal
175
200
  yield observation, action
176
201
 
177
202
 
178
203
  def z_score(x, mean_action: float, std_dev: float):
179
204
  return (x - mean_action) / std_dev
180
205
 
206
+
181
207
  def mean_p_value(
182
- observations: List[Tuple[State, Any]],
183
- agent: DeepRLAgent,
184
- actions: gymnasium.spaces.Box,
185
- observation_patcher: Callable[[List[Any], RLAgent], Generator[None, None, None]] = pass_observation_patcher
208
+ observations: List[Tuple[State, Any]],
209
+ agent: DeepRLAgent,
210
+ actions: gymnasium.spaces.Box,
211
+ observation_patcher: Callable[
212
+ [List[Any], RLAgent], Generator[None, None, None]
213
+ ] = pass_observation_patcher,
186
214
  ):
187
215
  distances = []
188
216
  for observation, observed_action in observation_patcher(observations, agent):
@@ -194,25 +222,39 @@ def mean_p_value(
194
222
  observed_actions = observed_action[0]
195
223
  log_std_dev = log_std_dev[0]
196
224
 
197
- if len(actor_means) != len(observed_actions) or len(actor_means) != len(log_std_dev) or len(observed_actions) != len(log_std_dev):
225
+ if (
226
+ len(actor_means) != len(observed_actions)
227
+ or len(actor_means) != len(log_std_dev)
228
+ or len(observed_actions) != len(log_std_dev)
229
+ ):
198
230
  raise Exception(
199
231
  f"Length of observed actions, actor mean and std-dev should be equal! "
200
232
  f"{len(observed_actions)},{len(actor_means)},{len(log_std_dev)}"
201
233
  )
202
234
  z_scores = []
203
- for actor_mean, observation_action, action_log_std_dev in zip(actor_means, observed_actions, log_std_dev):
235
+ for actor_mean, observation_action, action_log_std_dev in zip(
236
+ actor_means, observed_actions, log_std_dev
237
+ ):
204
238
  z_scores.append(
205
- math.fabs(z_score(observation_action, actor_mean, math.pow(2, math.fabs(action_log_std_dev))))
239
+ math.fabs(
240
+ z_score(
241
+ observation_action,
242
+ actor_mean,
243
+ math.pow(2, math.fabs(action_log_std_dev)),
244
+ )
245
+ )
206
246
  )
207
- mean_distances = mean(z_scores)
247
+ mean_distances = np.mean(z_scores)
208
248
 
209
249
  distances.append(mean_distances)
210
- return mean(distances)
250
+ return np.mean(distances)
251
+
211
252
 
212
253
  def normalize(values: List[float]) -> List[float]:
213
254
  values /= sum(values)
214
255
  return values
215
256
 
257
+
216
258
  def max(values: List[float]) -> List[float]:
217
259
  if not len(values):
218
260
  return values
@@ -220,4 +262,4 @@ def max(values: List[float]) -> List[float]:
220
262
  argmax = vals.argmax()
221
263
  vals[:] = 0.0
222
264
  vals[argmax] = 1.0
223
- return vals
265
+ return vals
gr_libs/ml/__init__.py CHANGED
@@ -1,6 +1,8 @@
1
1
  from ..ml.utils import device, seed, synthesize
2
+
2
3
  # from ml.neural import PPOAlgo
3
4
  from ..ml.tabular import TabularQLearner
5
+
4
6
  # from ml.neural import ACModel, RecurrentACModel
5
7
  from ..ml.neural import DictList
6
8
  from ..ml.agent import Agent
gr_libs/ml/agent.py CHANGED
@@ -2,6 +2,7 @@ import torch
2
2
 
3
3
  from gr_libs.ml import utils
4
4
  from gr_libs.ml.utils.other import device
5
+
5
6
  # from ml.neural import ACModel
6
7
 
7
8
 
@@ -12,15 +13,27 @@ class Agent:
12
13
  - to choose an action given an observation,
13
14
  - to analyze the feedback (i.e. reward and done state) of its action."""
14
15
 
15
- def __init__(self, obs_space, action_space, model_dir,
16
- argmax=False, num_envs=1, use_memory=True, use_text=False):
16
+ def __init__(
17
+ self,
18
+ obs_space,
19
+ action_space,
20
+ model_dir,
21
+ argmax=False,
22
+ num_envs=1,
23
+ use_memory=True,
24
+ use_text=False,
25
+ ):
17
26
  obs_space, self.preprocess_obss = utils.get_obss_preprocessor(obs_space)
18
- self.acmodel = ACModel(obs_space, action_space, use_memory=use_memory, use_text=use_text)
27
+ self.acmodel = ACModel(
28
+ obs_space, action_space, use_memory=use_memory, use_text=use_text
29
+ )
19
30
  self.argmax = argmax
20
31
  self.num_envs = num_envs
21
32
 
22
33
  if self.acmodel.recurrent:
23
- self.memories = torch.zeros(self.num_envs, self.acmodel.memory_size, device=device)
34
+ self.memories = torch.zeros(
35
+ self.num_envs, self.acmodel.memory_size, device=device
36
+ )
24
37
 
25
38
  self.acmodel.load_state_dict(utils.get_model_state(model_dir))
26
39
  self.acmodel.to(device)
@@ -49,8 +62,10 @@ class Agent:
49
62
 
50
63
  def analyze_feedbacks(self, rewards, dones):
51
64
  if self.acmodel.recurrent:
52
- masks = 1 - torch.tensor(dones, dtype=torch.float, device=device).unsqueeze(1)
65
+ masks = 1 - torch.tensor(dones, dtype=torch.float, device=device).unsqueeze(
66
+ 1
67
+ )
53
68
  self.memories *= masks
54
69
 
55
70
  def analyze_feedback(self, reward, done):
56
- return self.analyze_feedbacks([reward], [done])
71
+ return self.analyze_feedbacks([reward], [done])
@@ -1 +1 @@
1
- from gr_libs.ml.base.rl_agent import RLAgent, State, ContextualAgent
1
+ from gr_libs.ml.base.rl_agent import RLAgent, State, ContextualAgent
@@ -4,22 +4,24 @@ import numpy as np
4
4
 
5
5
  State = Any
6
6
 
7
+
7
8
  class ContextualAgent:
8
9
  def __init__(self, problem_name, problem_goal, agent):
9
10
  self.problem_name = problem_name
10
11
  self.problem_goal = problem_goal
11
12
  self.agent = agent
12
13
 
14
+
13
15
  class RLAgent(ABC):
14
16
  def __init__(
15
- self,
16
- episodes: int,
17
- decaying_eps: bool,
18
- epsilon: float,
19
- learning_rate: float,
20
- gamma: float,
21
- problem_name: str,
22
- domain_name: str
17
+ self,
18
+ episodes: int,
19
+ decaying_eps: bool,
20
+ epsilon: float,
21
+ learning_rate: float,
22
+ gamma: float,
23
+ problem_name: str,
24
+ domain_name: str,
23
25
  ):
24
26
  self.episodes = episodes
25
27
  self.decaying_eps = decaying_eps
@@ -46,9 +48,10 @@ class RLAgent(ABC):
46
48
 
47
49
  def update_states_counter(self, observation_str: str):
48
50
  if observation_str in self.states_counter:
49
- self.states_counter[observation_str] = self.states_counter[observation_str] + 1
51
+ self.states_counter[observation_str] = (
52
+ self.states_counter[observation_str] + 1
53
+ )
50
54
  else:
51
55
  self.states_counter[observation_str] = 1
52
56
  if len(self.states_counter) % 10000 == 0:
53
57
  print(f"probably error to many {len(self.states_counter)}")
54
-
gr_libs/ml/consts.py CHANGED
@@ -19,4 +19,4 @@ OPTIM_EPS = 1e-8
19
19
  OPTIM_ALPHA = 0.99
20
20
  CLIP_EPS = 0.2
21
21
  RECURRENCE = 1
22
- TEXT = False
22
+ TEXT = False