gr-libs 0.1.8__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. gr_libs/__init__.py +3 -1
  2. gr_libs/_evaluation/__init__.py +1 -0
  3. evaluation/analyze_results_cross_alg_cross_domain.py → gr_libs/_evaluation/_analyze_results_cross_alg_cross_domain.py +81 -88
  4. evaluation/generate_experiments_results.py → gr_libs/_evaluation/_generate_experiments_results.py +6 -6
  5. evaluation/generate_task_specific_statistics_plots.py → gr_libs/_evaluation/_generate_task_specific_statistics_plots.py +11 -14
  6. evaluation/get_plans_images.py → gr_libs/_evaluation/_get_plans_images.py +3 -4
  7. evaluation/increasing_and_decreasing_.py → gr_libs/_evaluation/_increasing_and_decreasing_.py +3 -1
  8. gr_libs/_version.py +2 -2
  9. gr_libs/all_experiments.py +294 -0
  10. gr_libs/environment/__init__.py +14 -1
  11. gr_libs/environment/{utils → _utils}/utils.py +1 -1
  12. gr_libs/environment/environment.py +257 -22
  13. gr_libs/metrics/__init__.py +2 -0
  14. gr_libs/metrics/metrics.py +166 -31
  15. gr_libs/ml/__init__.py +1 -6
  16. gr_libs/ml/base/__init__.py +3 -1
  17. gr_libs/ml/base/rl_agent.py +68 -3
  18. gr_libs/ml/neural/__init__.py +1 -3
  19. gr_libs/ml/neural/deep_rl_learner.py +227 -67
  20. gr_libs/ml/neural/utils/__init__.py +1 -2
  21. gr_libs/ml/planner/mcts/{utils → _utils}/tree.py +1 -1
  22. gr_libs/ml/planner/mcts/mcts_model.py +71 -34
  23. gr_libs/ml/sequential/__init__.py +0 -1
  24. gr_libs/ml/sequential/{lstm_model.py → _lstm_model.py} +11 -14
  25. gr_libs/ml/tabular/__init__.py +1 -3
  26. gr_libs/ml/tabular/tabular_q_learner.py +27 -9
  27. gr_libs/ml/tabular/tabular_rl_agent.py +22 -9
  28. gr_libs/ml/utils/__init__.py +2 -9
  29. gr_libs/ml/utils/format.py +13 -90
  30. gr_libs/ml/utils/math.py +3 -2
  31. gr_libs/ml/utils/other.py +2 -2
  32. gr_libs/ml/utils/storage.py +41 -94
  33. gr_libs/odgr_executor.py +268 -0
  34. gr_libs/problems/consts.py +2 -2
  35. gr_libs/recognizer/_utils/__init__.py +0 -0
  36. gr_libs/recognizer/{utils → _utils}/format.py +2 -2
  37. gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +116 -36
  38. gr_libs/recognizer/graml/{gr_dataset.py → _gr_dataset.py} +11 -11
  39. gr_libs/recognizer/graml/graml_recognizer.py +172 -29
  40. gr_libs/recognizer/recognizer.py +59 -10
  41. gr_libs/tutorials/draco_panda_tutorial.py +58 -0
  42. gr_libs/tutorials/draco_parking_tutorial.py +56 -0
  43. {tutorials → gr_libs/tutorials}/gcdraco_panda_tutorial.py +5 -9
  44. {tutorials → gr_libs/tutorials}/gcdraco_parking_tutorial.py +3 -7
  45. {tutorials → gr_libs/tutorials}/graml_minigrid_tutorial.py +2 -2
  46. {tutorials → gr_libs/tutorials}/graml_panda_tutorial.py +5 -10
  47. {tutorials → gr_libs/tutorials}/graml_parking_tutorial.py +5 -9
  48. {tutorials → gr_libs/tutorials}/graml_point_maze_tutorial.py +2 -1
  49. {tutorials → gr_libs/tutorials}/graql_minigrid_tutorial.py +2 -2
  50. {gr_libs-0.1.8.dist-info → gr_libs-0.2.2.dist-info}/METADATA +84 -29
  51. gr_libs-0.2.2.dist-info/RECORD +71 -0
  52. {gr_libs-0.1.8.dist-info → gr_libs-0.2.2.dist-info}/WHEEL +1 -1
  53. gr_libs-0.2.2.dist-info/top_level.txt +2 -0
  54. tests/test_draco.py +14 -0
  55. tests/test_gcdraco.py +2 -2
  56. tests/test_graml.py +4 -4
  57. tests/test_graql.py +1 -1
  58. evaluation/create_minigrid_map_image.py +0 -38
  59. evaluation/file_system.py +0 -53
  60. evaluation/generate_experiments_results_new_ver1.py +0 -238
  61. evaluation/generate_experiments_results_new_ver2.py +0 -331
  62. gr_libs/ml/neural/utils/penv.py +0 -60
  63. gr_libs/recognizer/utils/__init__.py +0 -1
  64. gr_libs-0.1.8.dist-info/RECORD +0 -70
  65. gr_libs-0.1.8.dist-info/top_level.txt +0 -4
  66. /gr_libs/environment/{utils → _utils}/__init__.py +0 -0
  67. /gr_libs/ml/planner/mcts/{utils → _utils}/__init__.py +0 -0
  68. /gr_libs/ml/planner/mcts/{utils → _utils}/node.py +0 -0
@@ -1,23 +1,24 @@
1
+ """ metrics for GR algorithms, to perform distance, similarity, likelihood and other measurements and metrics. """
2
+
1
3
  import math
4
+ from collections.abc import Callable, Generator
5
+ from math import log2
6
+ from typing import Any
7
+
2
8
  import dill
3
9
  import gymnasium
4
10
  import numpy as np
5
-
6
- from typing import Callable, Generator, List, Dict, Tuple, Any
7
- from math import log2
8
- from scipy.stats import wasserstein_distance
9
11
  from gymnasium.spaces.discrete import Discrete
10
-
11
- # import torch
12
- # from torch.distributions.categorical import Categorical
12
+ from scipy.stats import wasserstein_distance
13
13
 
14
14
  from ..ml.base import State
15
15
  from ..ml.base.rl_agent import RLAgent
16
16
  from ..ml.neural.deep_rl_learner import DeepRLAgent
17
17
 
18
18
 
19
- def kl_divergence(p1: List[float], p2: List[float]) -> float:
20
- """Computes Kullback–Leibler divergence from two probabilities distributions p1 and p2.
19
+ def kl_divergence(p1: list[float], p2: list[float]) -> float:
20
+ """
21
+ Computes Kullback–Leibler divergence from two probabilities distributions p1 and p2.
21
22
  We follow the formula in Wikipedia https://en.wikipedia.org/wiki/Kullback–Leibler_divergence
22
23
 
23
24
  Args:
@@ -32,8 +33,19 @@ def kl_divergence(p1: List[float], p2: List[float]) -> float:
32
33
 
33
34
 
34
35
  def kl_divergence_norm_softmax(
35
- observations: List[Tuple[State, Any]], agent, actions: Discrete
36
+ observations: list[tuple[State, Any]], agent, actions: Discrete
36
37
  ):
38
+ """
39
+ Calculates the Kullback-Leibler (KL) divergence between two probability distributions.
40
+
41
+ Args:
42
+ observations (list[tuple[State, Any]]): List of observations and corresponding actions.
43
+ agent: The agent object.
44
+ actions: The discrete actions.
45
+
46
+ Returns:
47
+ float: The mean KL divergence between the two distributions.
48
+ """
37
49
  distances = []
38
50
  p_traj = traj_to_policy(observations=observations, actions=actions)
39
51
 
@@ -42,7 +54,7 @@ def kl_divergence_norm_softmax(
42
54
  state_pickled = dill.dumps(state)
43
55
 
44
56
  qp1 = p_traj[state_pickled]
45
- qp2_flatten_distribution_list: List[float] = agent.get_actions_probabilities(
57
+ qp2_flatten_distribution_list: list[float] = agent.get_actions_probabilities(
46
58
  observation=(observation, agent_pos)
47
59
  )
48
60
  distances.append(kl_divergence(qp1, qp2_flatten_distribution_list))
@@ -62,6 +74,17 @@ def amplify(values, alpha=1.0):
62
74
 
63
75
 
64
76
  def stochastic_amplified_selection(actions_probs, alpha=8.0):
77
+ """
78
+ Selects an action based on the given action probabilities, with amplification using the specified alpha value.
79
+
80
+ Parameters:
81
+ actions_probs (list): A list of action probabilities.
82
+ alpha (float): Amplification factor (default: 8.0).
83
+
84
+ Returns:
85
+ int: The selected action.
86
+
87
+ """
65
88
  action_probs_amplified = amplify(actions_probs, alpha)
66
89
  choice = np.random.choice(len(action_probs_amplified), p=action_probs_amplified)
67
90
  if choice == 3:
@@ -69,11 +92,32 @@ def stochastic_amplified_selection(actions_probs, alpha=8.0):
69
92
  return choice
70
93
 
71
94
 
95
+ import numpy as np
96
+
97
+
72
98
  def stochastic_selection(actions_probs):
99
+ """
100
+ Selects an action based on the given probabilities using a stochastic selection method.
101
+
102
+ Parameters:
103
+ actions_probs (list): A list of probabilities for each action.
104
+
105
+ Returns:
106
+ int: The index of the selected action.
107
+ """
73
108
  return np.random.choice(len(actions_probs), p=actions_probs)
74
109
 
75
110
 
76
111
  def greedy_selection(actions_probs):
112
+ """
113
+ Selects the action with the highest probability.
114
+
115
+ Args:
116
+ actions_probs (numpy.ndarray): Array of action probabilities.
117
+
118
+ Returns:
119
+ int: Index of the selected action.
120
+ """
77
121
  return np.argmax(actions_probs)
78
122
 
79
123
 
@@ -104,13 +148,21 @@ def measure_average_sequence_distance(seq1, seq2):
104
148
 
105
149
 
106
150
  def traj_to_policy(
107
- observations: List[Tuple[State, Any]], actions: Discrete, epsilon: float = 0.0
108
- ) -> Dict[str, List[float]]:
109
- # converts a trajectory from a planner to a policy
110
- # where the taken action has 99.99999% probability
151
+ observations: list[tuple[State, Any]], actions: Discrete, epsilon: float = 0.0
152
+ ) -> dict[str, list[float]]:
153
+ """
154
+ Converts a trajectory from a planner to a policy.
155
+
156
+ Args:
157
+ observations (list[tuple[State, Any]]): List of tuples containing the observation and the corresponding action.
158
+ actions (Discrete): Discrete action space.
159
+ epsilon (float, optional): Exploration parameter. Defaults to 0.0.
160
+
161
+ Returns:
162
+ dict[str, list[float]]: Dictionary mapping serialized states to action probabilities.
163
+ """
111
164
  trajectory_as_policy = {}
112
- for (observation, agent_pos), action in observations:
113
- # in the discrete world the action is the index
165
+ for (observation, _agent_pos), action in observations:
114
166
  action_index = action
115
167
 
116
168
  actions_len = actions.n
@@ -123,21 +175,48 @@ def traj_to_policy(
123
175
  return trajectory_as_policy
124
176
 
125
177
 
178
+ from collections.abc import Generator
179
+ from typing import Any
180
+
181
+
126
182
  def pass_observation_patcher(
127
- observations: List[Any], agent: RLAgent
183
+ observations: list[Any], agent: RLAgent
128
184
  ) -> Generator[None, None, None]:
129
- for observation in observations:
130
- yield observation
185
+ """
186
+ Generator function that yields observations.
187
+
188
+ Args:
189
+ observations (list): List of observations.
190
+ agent (RLAgent): RL agent object.
191
+
192
+ Yields:
193
+ None: Yields each observation from the list.
194
+
195
+ """
196
+ yield from observations
131
197
 
132
198
 
133
199
  def mean_wasserstein_distance(
134
- observations: List[Tuple[State, Any]],
200
+ observations: list[tuple[State, Any]],
135
201
  agent: DeepRLAgent,
136
202
  actions: gymnasium.spaces.Box,
137
203
  observation_patcher: Callable[
138
- [List[Any], RLAgent], Generator[None, None, None]
204
+ [list[Any], RLAgent], Generator[None, None, None]
139
205
  ] = pass_observation_patcher,
140
206
  ):
207
+ """
208
+ Calculates the mean Wasserstein distance between observed actions and actor means.
209
+
210
+ Args:
211
+ observations (list[tuple[State, Any]]): List of observations and corresponding actions.
212
+ agent (DeepRLAgent): The deep reinforcement learning agent.
213
+ actions (gymnasium.spaces.Box): The action space.
214
+ observation_patcher (Callable[[list[Any], RLAgent], Generator[None, None, None]], optional):
215
+ A function that patches the observations. Defaults to pass_observation_patcher.
216
+
217
+ Returns:
218
+ float: The mean Wasserstein distance between observed actions and actor means.
219
+ """
141
220
  distances = []
142
221
 
143
222
  for observation, observed_action in observation_patcher(observations, agent):
@@ -163,10 +242,21 @@ def mean_wasserstein_distance(
163
242
 
164
243
 
165
244
  def mean_action_distance_continuous(
166
- observations: List[Tuple[State, Any]],
245
+ observations: list[tuple[State, Any]],
167
246
  agent: DeepRLAgent,
168
247
  actions: gymnasium.spaces.Box,
169
248
  ):
249
+ """
250
+ Calculates the mean distance between the predicted actions and the actual actions for a continuous action space.
251
+
252
+ Args:
253
+ observations (list[tuple[State, Any]]): A list of tuples containing the observations and corresponding actions.
254
+ agent (DeepRLAgent): The deep reinforcement learning agent used to predict actions.
255
+ actions (gymnasium.spaces.Box): The action space.
256
+
257
+ Returns:
258
+ float: The mean distance between the predicted actions and the actual actions.
259
+ """
170
260
  distances = []
171
261
  for observation, action in observations:
172
262
  action2, _ = agent.model.predict(
@@ -182,18 +272,29 @@ def mean_action_distance_continuous(
182
272
  ), f"Actions should be on the same length:{action},{action2}"
183
273
 
184
274
  total_diff = 0
185
- # total_diff = []
186
275
  for action1, action2 in zip(action_arr, action2_arr):
187
276
  total_diff += math.fabs(action1 - action2)
188
- # distances.append(statistics.mean(total_diff))
189
277
  distances.append(total_diff)
190
- # print(f"distances:{distances}")
191
278
  return np.mean(distances)
192
279
 
193
280
 
281
+ from collections.abc import Generator
282
+ from typing import Any
283
+
284
+
194
285
  def set_agent_goal_observation(
195
- observations: List[Any], agent: RLAgent
286
+ observations: list[Any], agent: RLAgent
196
287
  ) -> Generator[None, None, None]:
288
+ """
289
+ Sets the desired goal in each observation to the agent's goal.
290
+
291
+ Args:
292
+ observations (list): List of observations.
293
+ agent (RLAgent): The RL agent.
294
+
295
+ Yields:
296
+ tuple: A tuple containing the modified observation and the corresponding action.
297
+ """
197
298
  copy_observation = observations.copy()
198
299
  for observation, action in copy_observation:
199
300
  observation["desired_goal"] = agent.goal
@@ -205,13 +306,29 @@ def z_score(x, mean_action: float, std_dev: float):
205
306
 
206
307
 
207
308
  def mean_p_value(
208
- observations: List[Tuple[State, Any]],
309
+ observations: list[tuple[State, Any]],
209
310
  agent: DeepRLAgent,
210
311
  actions: gymnasium.spaces.Box,
211
312
  observation_patcher: Callable[
212
- [List[Any], RLAgent], Generator[None, None, None]
313
+ [list[Any], RLAgent], Generator[None, None, None]
213
314
  ] = pass_observation_patcher,
214
315
  ):
316
+ """
317
+ Calculate the mean p-value for a given set of observations.
318
+
319
+ Args:
320
+ observations (list[tuple[State, Any]]): List of observations and corresponding actions.
321
+ agent (DeepRLAgent): The deep reinforcement learning agent.
322
+ actions (gymnasium.spaces.Box): The action space.
323
+ observation_patcher (Callable[[list[Any], RLAgent], Generator[None, None, None]], optional):
324
+ A function that patches the observations. Defaults to pass_observation_patcher.
325
+
326
+ Returns:
327
+ float: The mean p-value.
328
+
329
+ Raises:
330
+ Exception: If the lengths of observed actions, actor mean, and std-dev are not equal.
331
+ """
215
332
  distances = []
216
333
  for observation, observed_action in observation_patcher(observations, agent):
217
334
  # execute prediction X times and add to list (observed_action * X) |X| Len
@@ -250,12 +367,30 @@ def mean_p_value(
250
367
  return np.mean(distances)
251
368
 
252
369
 
253
- def normalize(values: List[float]) -> List[float]:
370
+ def normalize(values: list[float]) -> list[float]:
371
+ """
372
+ Normalize a list of values by dividing each value by the sum of all values.
373
+
374
+ Args:
375
+ values (list[float]): The list of values to be normalized.
376
+
377
+ Returns:
378
+ list[float]: The normalized list of values.
379
+ """
254
380
  values /= sum(values)
255
381
  return values
256
382
 
257
383
 
258
- def max(values: List[float]) -> List[float]:
384
+ def maximum(values: list[float]) -> list[float]:
385
+ """
386
+ Returns a list with the same length as the input list, where the maximum value is set to 1.0 and all other values are set to 0.0.
387
+
388
+ Args:
389
+ values (list[float]): The input list of values.
390
+
391
+ Returns:
392
+ list[float]: A list with the same length as the input list, where the maximum value is set to 1.0 and all other values are set to 0.0.
393
+ """
259
394
  if not len(values):
260
395
  return values
261
396
  vals = np.array(values)
gr_libs/ml/__init__.py CHANGED
@@ -1,8 +1,3 @@
1
- from ..ml.utils import device, seed, synthesize
1
+ # from ml.neural import ACModel, RecurrentACModel
2
2
 
3
3
  # from ml.neural import PPOAlgo
4
- from ..ml.tabular import TabularQLearner
5
-
6
- # from ml.neural import ACModel, RecurrentACModel
7
- from ..ml.neural import DictList
8
- from ..ml.agent import Agent
@@ -1 +1,3 @@
1
- from gr_libs.ml.base.rl_agent import RLAgent, State, ContextualAgent
1
+ """ base ML classes for other modules to extend. """
2
+
3
+ from gr_libs.ml.base.rl_agent import ContextualAgent, RLAgent, State
@@ -1,12 +1,33 @@
1
- from typing import Any
2
1
  from abc import ABC, abstractmethod
3
- import numpy as np
2
+ from typing import Any
4
3
 
5
4
  State = Any
6
5
 
7
6
 
8
7
  class ContextualAgent:
8
+ """
9
+ A class representing a contextual agent for reinforcement learning, including gym properties.
10
+
11
+ Args:
12
+ problem_name (str): The name of the problem the agent is designed to solve.
13
+ problem_goal (str): The goal of the problem the agent is designed to achieve.
14
+ agent: The underlying agent implementation.
15
+
16
+ Attributes:
17
+ problem_name (str): The name of the problem the agent is designed to solve.
18
+ problem_goal (str): The goal of the problem the agent is designed to achieve.
19
+ agent: The underlying agent implementation.
20
+ """
21
+
9
22
  def __init__(self, problem_name, problem_goal, agent):
23
+ """
24
+ Initializes a reinforcement learning agent.
25
+
26
+ Args:
27
+ problem_name (str): The name of the problem.
28
+ problem_goal (str): The goal of the problem.
29
+ agent: The agent object.
30
+ """
10
31
  self.problem_name = problem_name
11
32
  self.problem_goal = problem_goal
12
33
  self.agent = agent
@@ -23,6 +44,18 @@ class RLAgent(ABC):
23
44
  problem_name: str,
24
45
  domain_name: str,
25
46
  ):
47
+ """
48
+ Initializes a reinforcement learning agent.
49
+
50
+ Args:
51
+ episodes (int): The number of episodes to train the agent.
52
+ decaying_eps (bool): Whether to use decaying epsilon-greedy exploration.
53
+ epsilon (float): The exploration rate.
54
+ learning_rate (float): The learning rate.
55
+ gamma (float): The discount factor.
56
+ problem_name (str): The name of the problem.
57
+ domain_name (str): The name of the domain.
58
+ """
26
59
  self.episodes = episodes
27
60
  self.decaying_eps = decaying_eps
28
61
  self.epsilon = epsilon
@@ -35,18 +68,50 @@ class RLAgent(ABC):
35
68
 
36
69
  @abstractmethod
37
70
  def learn(self):
38
- pass
71
+ """
72
+ Abstract method for the agent to learn from the environment.
73
+ """
39
74
 
40
75
  def class_name(self):
76
+ """
77
+ Returns the name of the agent's class.
78
+
79
+ Returns:
80
+ str: The name of the agent's class.
81
+ """
41
82
  return self.__class__.__name__
42
83
 
43
84
  def get_actions_probabilities(self, observation):
85
+ """
86
+ Get the probabilities of available actions given an observation.
87
+
88
+ Args:
89
+ observation: The observation from the environment.
90
+
91
+ Raises:
92
+ Exception: This function is unimplemented.
93
+
94
+ Returns:
95
+ Any: The probabilities of available actions.
96
+ """
44
97
  raise Exception("function get_actions_probabilities is unimplemented")
45
98
 
46
99
  def get_number_of_unique_states(self):
100
+ """
101
+ Get the number of unique states encountered by the agent.
102
+
103
+ Returns:
104
+ int: The number of unique states encountered.
105
+ """
47
106
  return len(self.states_counter)
48
107
 
49
108
  def update_states_counter(self, observation_str: str):
109
+ """
110
+ Update the counter for the number of times each observation state is encountered.
111
+
112
+ Args:
113
+ observation_str (str): The string representation of the observation state.
114
+ """
50
115
  if observation_str in self.states_counter:
51
116
  self.states_counter[observation_str] = (
52
117
  self.states_counter[observation_str] + 1
@@ -1,3 +1 @@
1
- # from ml.neural.model import AbstractACModel, RecurrentACModel, ACModel
2
- # from ml.neural.algorithms import BaseAlgo, A2CAlgo, PPOAlgo
3
- from gr_libs.ml.neural.utils import DictList
1
+ """ Algorithms that involve using neural networks. """