gr-libs 0.1.7.post0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. gr_libs/__init__.py +4 -1
  2. gr_libs/_evaluation/__init__.py +1 -0
  3. gr_libs/_evaluation/_analyze_results_cross_alg_cross_domain.py +260 -0
  4. gr_libs/_evaluation/_generate_experiments_results.py +141 -0
  5. gr_libs/_evaluation/_generate_task_specific_statistics_plots.py +497 -0
  6. gr_libs/_evaluation/_get_plans_images.py +61 -0
  7. gr_libs/_evaluation/_increasing_and_decreasing_.py +106 -0
  8. gr_libs/_version.py +2 -2
  9. gr_libs/all_experiments.py +294 -0
  10. gr_libs/environment/__init__.py +30 -9
  11. gr_libs/environment/_utils/utils.py +27 -0
  12. gr_libs/environment/environment.py +417 -54
  13. gr_libs/metrics/__init__.py +7 -0
  14. gr_libs/metrics/metrics.py +231 -54
  15. gr_libs/ml/__init__.py +2 -5
  16. gr_libs/ml/agent.py +21 -6
  17. gr_libs/ml/base/__init__.py +3 -1
  18. gr_libs/ml/base/rl_agent.py +81 -13
  19. gr_libs/ml/consts.py +1 -1
  20. gr_libs/ml/neural/__init__.py +1 -3
  21. gr_libs/ml/neural/deep_rl_learner.py +619 -378
  22. gr_libs/ml/neural/utils/__init__.py +1 -2
  23. gr_libs/ml/neural/utils/dictlist.py +3 -3
  24. gr_libs/ml/planner/mcts/{utils → _utils}/__init__.py +1 -1
  25. gr_libs/ml/planner/mcts/{utils → _utils}/node.py +11 -7
  26. gr_libs/ml/planner/mcts/{utils → _utils}/tree.py +15 -11
  27. gr_libs/ml/planner/mcts/mcts_model.py +571 -312
  28. gr_libs/ml/sequential/__init__.py +0 -1
  29. gr_libs/ml/sequential/_lstm_model.py +270 -0
  30. gr_libs/ml/tabular/__init__.py +1 -3
  31. gr_libs/ml/tabular/state.py +7 -7
  32. gr_libs/ml/tabular/tabular_q_learner.py +150 -82
  33. gr_libs/ml/tabular/tabular_rl_agent.py +42 -28
  34. gr_libs/ml/utils/__init__.py +2 -3
  35. gr_libs/ml/utils/format.py +28 -97
  36. gr_libs/ml/utils/math.py +5 -3
  37. gr_libs/ml/utils/other.py +3 -3
  38. gr_libs/ml/utils/storage.py +88 -81
  39. gr_libs/odgr_executor.py +268 -0
  40. gr_libs/problems/consts.py +1549 -1227
  41. gr_libs/recognizer/_utils/__init__.py +0 -0
  42. gr_libs/recognizer/_utils/format.py +18 -0
  43. gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +233 -88
  44. gr_libs/recognizer/graml/_gr_dataset.py +233 -0
  45. gr_libs/recognizer/graml/graml_recognizer.py +586 -252
  46. gr_libs/recognizer/recognizer.py +90 -30
  47. gr_libs/tutorials/draco_panda_tutorial.py +58 -0
  48. gr_libs/tutorials/draco_parking_tutorial.py +56 -0
  49. gr_libs/tutorials/gcdraco_panda_tutorial.py +62 -0
  50. gr_libs/tutorials/gcdraco_parking_tutorial.py +57 -0
  51. gr_libs/tutorials/graml_minigrid_tutorial.py +64 -0
  52. gr_libs/tutorials/graml_panda_tutorial.py +57 -0
  53. gr_libs/tutorials/graml_parking_tutorial.py +52 -0
  54. gr_libs/tutorials/graml_point_maze_tutorial.py +60 -0
  55. gr_libs/tutorials/graql_minigrid_tutorial.py +50 -0
  56. {gr_libs-0.1.7.post0.dist-info → gr_libs-0.2.2.dist-info}/METADATA +84 -29
  57. gr_libs-0.2.2.dist-info/RECORD +71 -0
  58. {gr_libs-0.1.7.post0.dist-info → gr_libs-0.2.2.dist-info}/WHEEL +1 -1
  59. gr_libs-0.2.2.dist-info/top_level.txt +2 -0
  60. tests/test_draco.py +14 -0
  61. tests/test_gcdraco.py +10 -0
  62. tests/test_graml.py +12 -8
  63. tests/test_graql.py +3 -2
  64. evaluation/analyze_results_cross_alg_cross_domain.py +0 -277
  65. evaluation/create_minigrid_map_image.py +0 -34
  66. evaluation/file_system.py +0 -42
  67. evaluation/generate_experiments_results.py +0 -92
  68. evaluation/generate_experiments_results_new_ver1.py +0 -254
  69. evaluation/generate_experiments_results_new_ver2.py +0 -331
  70. evaluation/generate_task_specific_statistics_plots.py +0 -272
  71. evaluation/get_plans_images.py +0 -47
  72. evaluation/increasing_and_decreasing_.py +0 -63
  73. gr_libs/environment/utils/utils.py +0 -17
  74. gr_libs/ml/neural/utils/penv.py +0 -57
  75. gr_libs/ml/sequential/lstm_model.py +0 -192
  76. gr_libs/recognizer/graml/gr_dataset.py +0 -134
  77. gr_libs/recognizer/utils/__init__.py +0 -1
  78. gr_libs/recognizer/utils/format.py +0 -13
  79. gr_libs-0.1.7.post0.dist-info/RECORD +0 -67
  80. gr_libs-0.1.7.post0.dist-info/top_level.txt +0 -4
  81. tutorials/graml_minigrid_tutorial.py +0 -34
  82. tutorials/graml_panda_tutorial.py +0 -41
  83. tutorials/graml_parking_tutorial.py +0 -39
  84. tutorials/graml_point_maze_tutorial.py +0 -39
  85. tutorials/graql_minigrid_tutorial.py +0 -34
  86. /gr_libs/environment/{utils → _utils}/__init__.py +0 -0
@@ -1,22 +1,24 @@
1
+ """ metrics for GR algorithms, to perform distance, similarity, likelihood and other measurements and metrics. """
2
+
1
3
  import math
4
+ from collections.abc import Callable, Generator
5
+ from math import log2
6
+ from typing import Any
7
+
2
8
  import dill
3
9
  import gymnasium
4
10
  import numpy as np
5
-
6
- from typing import Callable, Generator, List, Dict, Tuple, Any
7
- from math import log2
8
- from scipy.stats import wasserstein_distance
9
11
  from gymnasium.spaces.discrete import Discrete
10
- # import torch
11
- # from torch.distributions.categorical import Categorical
12
+ from scipy.stats import wasserstein_distance
12
13
 
13
14
  from ..ml.base import State
14
15
  from ..ml.base.rl_agent import RLAgent
15
16
  from ..ml.neural.deep_rl_learner import DeepRLAgent
16
17
 
17
18
 
18
- def kl_divergence(p1: List[float], p2: List[float]) -> float:
19
- """Computes Kullback–Leibler divergence from two probabilities distributions p1 and p2.
19
+ def kl_divergence(p1: list[float], p2: list[float]) -> float:
20
+ """
21
+ Computes Kullback–Leibler divergence from two probabilities distributions p1 and p2.
20
22
  We follow the formula in Wikipedia https://en.wikipedia.org/wiki/Kullback–Leibler_divergence
21
23
 
22
24
  Args:
@@ -26,21 +28,35 @@ def kl_divergence(p1: List[float], p2: List[float]) -> float:
26
28
  Returns:
27
29
  float: The KL-divergence between p1 and p2
28
30
  """
29
- assert (len(p1) == len(p2))
31
+ assert len(p1) == len(p2)
30
32
  return sum(p1[i] * log2(p1[i] / p2[i]) for i in range(len(p1)))
31
33
 
32
34
 
33
- def kl_divergence_norm_softmax(observations: List[Tuple[State, Any]], agent, actions: Discrete):
35
+ def kl_divergence_norm_softmax(
36
+ observations: list[tuple[State, Any]], agent, actions: Discrete
37
+ ):
38
+ """
39
+ Calculates the Kullback-Leibler (KL) divergence between two probability distributions.
40
+
41
+ Args:
42
+ observations (list[tuple[State, Any]]): List of observations and corresponding actions.
43
+ agent: The agent object.
44
+ actions: The discrete actions.
45
+
46
+ Returns:
47
+ float: The mean KL divergence between the two distributions.
48
+ """
34
49
  distances = []
35
50
  p_traj = traj_to_policy(observations=observations, actions=actions)
36
51
 
37
52
  for (observation, agent_pos), action in observations:
38
- state = observation['image']
53
+ state = observation["image"]
39
54
  state_pickled = dill.dumps(state)
40
55
 
41
56
  qp1 = p_traj[state_pickled]
42
- qp2_flatten_distribution_list: List[float] = agent.get_actions_probabilities(
43
- observation=(observation, agent_pos))
57
+ qp2_flatten_distribution_list: list[float] = agent.get_actions_probabilities(
58
+ observation=(observation, agent_pos)
59
+ )
44
60
  distances.append(kl_divergence(qp1, qp2_flatten_distribution_list))
45
61
  return np.mean(distances)
46
62
 
@@ -53,22 +69,58 @@ def amplify(values, alpha=1.0):
53
69
  Returns:
54
70
  np.array: amplified softmax probabilities
55
71
  """
56
- values = values[:3]**alpha # currently only choose to turn or move forward
72
+ values = values[:3] ** alpha # currently only choose to turn or move forward
57
73
  return values / np.sum(values)
58
74
 
75
+
59
76
  def stochastic_amplified_selection(actions_probs, alpha=8.0):
77
+ """
78
+ Selects an action based on the given action probabilities, with amplification using the specified alpha value.
79
+
80
+ Parameters:
81
+ actions_probs (list): A list of action probabilities.
82
+ alpha (float): Amplification factor (default: 8.0).
83
+
84
+ Returns:
85
+ int: The selected action.
86
+
87
+ """
60
88
  action_probs_amplified = amplify(actions_probs, alpha)
61
89
  choice = np.random.choice(len(action_probs_amplified), p=action_probs_amplified)
62
90
  if choice == 3:
63
91
  choice = 6
64
92
  return choice
65
93
 
94
+
95
+ import numpy as np
96
+
97
+
66
98
  def stochastic_selection(actions_probs):
99
+ """
100
+ Selects an action based on the given probabilities using a stochastic selection method.
101
+
102
+ Parameters:
103
+ actions_probs (list): A list of probabilities for each action.
104
+
105
+ Returns:
106
+ int: The index of the selected action.
107
+ """
67
108
  return np.random.choice(len(actions_probs), p=actions_probs)
68
109
 
110
+
69
111
  def greedy_selection(actions_probs):
112
+ """
113
+ Selects the action with the highest probability.
114
+
115
+ Args:
116
+ actions_probs (numpy.ndarray): Array of action probabilities.
117
+
118
+ Returns:
119
+ int: Index of the selected action.
120
+ """
70
121
  return np.argmax(actions_probs)
71
122
 
123
+
72
124
  def measure_average_sequence_distance(seq1, seq2):
73
125
  """Measures the sequence similarity between two sequences of observations and actions.
74
126
 
@@ -82,45 +134,89 @@ def measure_average_sequence_distance(seq1, seq2):
82
134
 
83
135
  # Ensure both sequences have the same length
84
136
  min_seq_len = np.min([len(seq1), len(seq2)])
85
- assert np.max([len(seq1), len(seq2)]) <= 30*min_seq_len, "We can't really measure similarity in case the sequences are really not the same... maybe just return a default NOT_SIMILAR here."
137
+ assert (
138
+ np.max([len(seq1), len(seq2)]) <= 30 * min_seq_len
139
+ ), "We can't really measure similarity in case the sequences are really not the same... maybe just return a default NOT_SIMILAR here."
86
140
 
87
141
  # Calculate the Euclidean distance between corresponding elements in the sequences
88
142
  distances = []
89
143
  for i in range(0, min_seq_len):
90
- distances.append(np.sum(np.abs(np.array(seq1[i])-np.array(seq2[i]))))
144
+ distances.append(np.sum(np.abs(np.array(seq1[i]) - np.array(seq2[i]))))
91
145
 
92
146
  # Calculate the average distance over all elements
93
147
  return np.mean(np.array(distances))
94
148
 
95
149
 
96
- def traj_to_policy(observations: List[Tuple[State, Any]], actions: Discrete, epsilon: float = 0.) -> Dict[
97
- str, List[float]]:
98
- # converts a trajectory from a planner to a policy
99
- # where the taken action has 99.99999% probability
150
+ def traj_to_policy(
151
+ observations: list[tuple[State, Any]], actions: Discrete, epsilon: float = 0.0
152
+ ) -> dict[str, list[float]]:
153
+ """
154
+ Converts a trajectory from a planner to a policy.
155
+
156
+ Args:
157
+ observations (list[tuple[State, Any]]): List of tuples containing the observation and the corresponding action.
158
+ actions (Discrete): Discrete action space.
159
+ epsilon (float, optional): Exploration parameter. Defaults to 0.0.
160
+
161
+ Returns:
162
+ dict[str, list[float]]: Dictionary mapping serialized states to action probabilities.
163
+ """
100
164
  trajectory_as_policy = {}
101
- for (observation, agent_pos), action in observations:
102
- # in the discrete world the action is the index
165
+ for (observation, _agent_pos), action in observations:
103
166
  action_index = action
104
167
 
105
168
  actions_len = actions.n
106
169
  qs = [1e-6 + epsilon / actions_len for _ in range(actions_len)]
107
- qs[action_index] = 1. - 1e-6 * (actions_len - 1) - epsilon
170
+ qs[action_index] = 1.0 - 1e-6 * (actions_len - 1) - epsilon
108
171
 
109
- state = observation['image']
172
+ state = observation["image"]
110
173
  state_pickled = dill.dumps(state)
111
174
  trajectory_as_policy[state_pickled] = qs
112
175
  return trajectory_as_policy
113
176
 
114
- def pass_observation_patcher(observations: List[Any], agent: RLAgent) -> Generator[None, None, None]:
115
- for observation in observations:
116
- yield observation
177
+
178
+ from collections.abc import Generator
179
+ from typing import Any
180
+
181
+
182
+ def pass_observation_patcher(
183
+ observations: list[Any], agent: RLAgent
184
+ ) -> Generator[None, None, None]:
185
+ """
186
+ Generator function that yields observations.
187
+
188
+ Args:
189
+ observations (list): List of observations.
190
+ agent (RLAgent): RL agent object.
191
+
192
+ Yields:
193
+ None: Yields each observation from the list.
194
+
195
+ """
196
+ yield from observations
197
+
117
198
 
118
199
  def mean_wasserstein_distance(
119
- observations: List[Tuple[State, Any]],
120
- agent: DeepRLAgent,
121
- actions: gymnasium.spaces.Box,
122
- observation_patcher: Callable[[List[Any], RLAgent], Generator[None, None, None]] = pass_observation_patcher
200
+ observations: list[tuple[State, Any]],
201
+ agent: DeepRLAgent,
202
+ actions: gymnasium.spaces.Box,
203
+ observation_patcher: Callable[
204
+ [list[Any], RLAgent], Generator[None, None, None]
205
+ ] = pass_observation_patcher,
123
206
  ):
207
+ """
208
+ Calculates the mean Wasserstein distance between observed actions and actor means.
209
+
210
+ Args:
211
+ observations (list[tuple[State, Any]]): List of observations and corresponding actions.
212
+ agent (DeepRLAgent): The deep reinforcement learning agent.
213
+ actions (gymnasium.spaces.Box): The action space.
214
+ observation_patcher (Callable[[list[Any], RLAgent], Generator[None, None, None]], optional):
215
+ A function that patches the observations. Defaults to pass_observation_patcher.
216
+
217
+ Returns:
218
+ float: The mean Wasserstein distance between observed actions and actor means.
219
+ """
124
220
  distances = []
125
221
 
126
222
  for observation, observed_action in observation_patcher(observations, agent):
@@ -141,49 +237,98 @@ def mean_wasserstein_distance(
141
237
  wasserstein_distances.append(
142
238
  wasserstein_distance([observation_action], [actor_mean])
143
239
  )
144
- distances.append(mean(wasserstein_distances))
145
- return mean(distances)
240
+ distances.append(np.mean(wasserstein_distances))
241
+ return np.mean(distances)
146
242
 
147
243
 
148
- def mean_action_distance_continuous(observations: List[Tuple[State, Any]], agent: DeepRLAgent, actions: gymnasium.spaces.Box):
244
+ def mean_action_distance_continuous(
245
+ observations: list[tuple[State, Any]],
246
+ agent: DeepRLAgent,
247
+ actions: gymnasium.spaces.Box,
248
+ ):
249
+ """
250
+ Calculates the mean distance between the predicted actions and the actual actions for a continuous action space.
251
+
252
+ Args:
253
+ observations (list[tuple[State, Any]]): A list of tuples containing the observations and corresponding actions.
254
+ agent (DeepRLAgent): The deep reinforcement learning agent used to predict actions.
255
+ actions (gymnasium.spaces.Box): The action space.
256
+
257
+ Returns:
258
+ float: The mean distance between the predicted actions and the actual actions.
259
+ """
149
260
  distances = []
150
261
  for observation, action in observations:
151
262
  action2, _ = agent.model.predict(
152
263
  observation,
153
264
  state=None,
154
265
  deterministic=True,
155
- episode_start=np.ones((1,), dtype=bool)
266
+ episode_start=np.ones((1,), dtype=bool),
156
267
  )
157
268
  action_arr, action2_arr = action[0], action2[0]
158
269
  print(f"actor means:{action2}")
159
- assert len(action_arr) == len(action2_arr), f"Actions should be on the same length:{action},{action2}"
270
+ assert len(action_arr) == len(
271
+ action2_arr
272
+ ), f"Actions should be on the same length:{action},{action2}"
160
273
 
161
274
  total_diff = 0
162
- # total_diff = []
163
275
  for action1, action2 in zip(action_arr, action2_arr):
164
276
  total_diff += math.fabs(action1 - action2)
165
- # distances.append(statistics.mean(total_diff))
166
277
  distances.append(total_diff)
167
- # print(f"distances:{distances}")
168
- return mean(distances)
278
+ return np.mean(distances)
279
+
280
+
281
+ from collections.abc import Generator
282
+ from typing import Any
169
283
 
170
284
 
171
- def set_agent_goal_observation(observations: List[Any], agent: RLAgent) -> Generator[None, None, None]:
285
+ def set_agent_goal_observation(
286
+ observations: list[Any], agent: RLAgent
287
+ ) -> Generator[None, None, None]:
288
+ """
289
+ Sets the desired goal in each observation to the agent's goal.
290
+
291
+ Args:
292
+ observations (list): List of observations.
293
+ agent (RLAgent): The RL agent.
294
+
295
+ Yields:
296
+ tuple: A tuple containing the modified observation and the corresponding action.
297
+ """
172
298
  copy_observation = observations.copy()
173
299
  for observation, action in copy_observation:
174
- observation['desired_goal'] = agent.goal
300
+ observation["desired_goal"] = agent.goal
175
301
  yield observation, action
176
302
 
177
303
 
178
304
  def z_score(x, mean_action: float, std_dev: float):
179
305
  return (x - mean_action) / std_dev
180
306
 
307
+
181
308
  def mean_p_value(
182
- observations: List[Tuple[State, Any]],
183
- agent: DeepRLAgent,
184
- actions: gymnasium.spaces.Box,
185
- observation_patcher: Callable[[List[Any], RLAgent], Generator[None, None, None]] = pass_observation_patcher
309
+ observations: list[tuple[State, Any]],
310
+ agent: DeepRLAgent,
311
+ actions: gymnasium.spaces.Box,
312
+ observation_patcher: Callable[
313
+ [list[Any], RLAgent], Generator[None, None, None]
314
+ ] = pass_observation_patcher,
186
315
  ):
316
+ """
317
+ Calculate the mean p-value for a given set of observations.
318
+
319
+ Args:
320
+ observations (list[tuple[State, Any]]): List of observations and corresponding actions.
321
+ agent (DeepRLAgent): The deep reinforcement learning agent.
322
+ actions (gymnasium.spaces.Box): The action space.
323
+ observation_patcher (Callable[[list[Any], RLAgent], Generator[None, None, None]], optional):
324
+ A function that patches the observations. Defaults to pass_observation_patcher.
325
+
326
+ Returns:
327
+ float: The mean p-value.
328
+
329
+ Raises:
330
+ Exception: If the lengths of observed actions, actor mean, and std-dev are not equal.
331
+ """
187
332
  distances = []
188
333
  for observation, observed_action in observation_patcher(observations, agent):
189
334
  # execute prediction X times and add to list (observed_action * X) |X| Len
@@ -194,30 +339,62 @@ def mean_p_value(
194
339
  observed_actions = observed_action[0]
195
340
  log_std_dev = log_std_dev[0]
196
341
 
197
- if len(actor_means) != len(observed_actions) or len(actor_means) != len(log_std_dev) or len(observed_actions) != len(log_std_dev):
342
+ if (
343
+ len(actor_means) != len(observed_actions)
344
+ or len(actor_means) != len(log_std_dev)
345
+ or len(observed_actions) != len(log_std_dev)
346
+ ):
198
347
  raise Exception(
199
348
  f"Length of observed actions, actor mean and std-dev should be equal! "
200
349
  f"{len(observed_actions)},{len(actor_means)},{len(log_std_dev)}"
201
350
  )
202
351
  z_scores = []
203
- for actor_mean, observation_action, action_log_std_dev in zip(actor_means, observed_actions, log_std_dev):
352
+ for actor_mean, observation_action, action_log_std_dev in zip(
353
+ actor_means, observed_actions, log_std_dev
354
+ ):
204
355
  z_scores.append(
205
- math.fabs(z_score(observation_action, actor_mean, math.pow(2, math.fabs(action_log_std_dev))))
356
+ math.fabs(
357
+ z_score(
358
+ observation_action,
359
+ actor_mean,
360
+ math.pow(2, math.fabs(action_log_std_dev)),
361
+ )
362
+ )
206
363
  )
207
- mean_distances = mean(z_scores)
364
+ mean_distances = np.mean(z_scores)
208
365
 
209
366
  distances.append(mean_distances)
210
- return mean(distances)
367
+ return np.mean(distances)
368
+
211
369
 
212
- def normalize(values: List[float]) -> List[float]:
370
+ def normalize(values: list[float]) -> list[float]:
371
+ """
372
+ Normalize a list of values by dividing each value by the sum of all values.
373
+
374
+ Args:
375
+ values (list[float]): The list of values to be normalized.
376
+
377
+ Returns:
378
+ list[float]: The normalized list of values.
379
+ """
213
380
  values /= sum(values)
214
381
  return values
215
382
 
216
- def max(values: List[float]) -> List[float]:
383
+
384
+ def maximum(values: list[float]) -> list[float]:
385
+ """
386
+ Returns a list with the same length as the input list, where the maximum value is set to 1.0 and all other values are set to 0.0.
387
+
388
+ Args:
389
+ values (list[float]): The input list of values.
390
+
391
+ Returns:
392
+ list[float]: A list with the same length as the input list, where the maximum value is set to 1.0 and all other values are set to 0.0.
393
+ """
217
394
  if not len(values):
218
395
  return values
219
396
  vals = np.array(values)
220
397
  argmax = vals.argmax()
221
398
  vals[:] = 0.0
222
399
  vals[argmax] = 1.0
223
- return vals
400
+ return vals
gr_libs/ml/__init__.py CHANGED
@@ -1,6 +1,3 @@
1
- from ..ml.utils import device, seed, synthesize
2
- # from ml.neural import PPOAlgo
3
- from ..ml.tabular import TabularQLearner
4
1
  # from ml.neural import ACModel, RecurrentACModel
5
- from ..ml.neural import DictList
6
- from ..ml.agent import Agent
2
+
3
+ # from ml.neural import PPOAlgo
gr_libs/ml/agent.py CHANGED
@@ -2,6 +2,7 @@ import torch
2
2
 
3
3
  from gr_libs.ml import utils
4
4
  from gr_libs.ml.utils.other import device
5
+
5
6
  # from ml.neural import ACModel
6
7
 
7
8
 
@@ -12,15 +13,27 @@ class Agent:
12
13
  - to choose an action given an observation,
13
14
  - to analyze the feedback (i.e. reward and done state) of its action."""
14
15
 
15
- def __init__(self, obs_space, action_space, model_dir,
16
- argmax=False, num_envs=1, use_memory=True, use_text=False):
16
+ def __init__(
17
+ self,
18
+ obs_space,
19
+ action_space,
20
+ model_dir,
21
+ argmax=False,
22
+ num_envs=1,
23
+ use_memory=True,
24
+ use_text=False,
25
+ ):
17
26
  obs_space, self.preprocess_obss = utils.get_obss_preprocessor(obs_space)
18
- self.acmodel = ACModel(obs_space, action_space, use_memory=use_memory, use_text=use_text)
27
+ self.acmodel = ACModel(
28
+ obs_space, action_space, use_memory=use_memory, use_text=use_text
29
+ )
19
30
  self.argmax = argmax
20
31
  self.num_envs = num_envs
21
32
 
22
33
  if self.acmodel.recurrent:
23
- self.memories = torch.zeros(self.num_envs, self.acmodel.memory_size, device=device)
34
+ self.memories = torch.zeros(
35
+ self.num_envs, self.acmodel.memory_size, device=device
36
+ )
24
37
 
25
38
  self.acmodel.load_state_dict(utils.get_model_state(model_dir))
26
39
  self.acmodel.to(device)
@@ -49,8 +62,10 @@ class Agent:
49
62
 
50
63
  def analyze_feedbacks(self, rewards, dones):
51
64
  if self.acmodel.recurrent:
52
- masks = 1 - torch.tensor(dones, dtype=torch.float, device=device).unsqueeze(1)
65
+ masks = 1 - torch.tensor(dones, dtype=torch.float, device=device).unsqueeze(
66
+ 1
67
+ )
53
68
  self.memories *= masks
54
69
 
55
70
  def analyze_feedback(self, reward, done):
56
- return self.analyze_feedbacks([reward], [done])
71
+ return self.analyze_feedbacks([reward], [done])
@@ -1 +1,3 @@
1
- from gr_libs.ml.base.rl_agent import RLAgent, State, ContextualAgent
1
+ """ base ML classes for other modules to extend. """
2
+
3
+ from gr_libs.ml.base.rl_agent import ContextualAgent, RLAgent, State
@@ -1,26 +1,61 @@
1
- from typing import Any
2
1
  from abc import ABC, abstractmethod
3
- import numpy as np
2
+ from typing import Any
4
3
 
5
4
  State = Any
6
5
 
6
+
7
7
  class ContextualAgent:
8
+ """
9
+ A class representing a contextual agent for reinforcement learning, including gym properties.
10
+
11
+ Args:
12
+ problem_name (str): The name of the problem the agent is designed to solve.
13
+ problem_goal (str): The goal of the problem the agent is designed to achieve.
14
+ agent: The underlying agent implementation.
15
+
16
+ Attributes:
17
+ problem_name (str): The name of the problem the agent is designed to solve.
18
+ problem_goal (str): The goal of the problem the agent is designed to achieve.
19
+ agent: The underlying agent implementation.
20
+ """
21
+
8
22
  def __init__(self, problem_name, problem_goal, agent):
23
+ """
24
+ Initializes a reinforcement learning agent.
25
+
26
+ Args:
27
+ problem_name (str): The name of the problem.
28
+ problem_goal (str): The goal of the problem.
29
+ agent: The agent object.
30
+ """
9
31
  self.problem_name = problem_name
10
32
  self.problem_goal = problem_goal
11
33
  self.agent = agent
12
34
 
35
+
13
36
  class RLAgent(ABC):
14
37
  def __init__(
15
- self,
16
- episodes: int,
17
- decaying_eps: bool,
18
- epsilon: float,
19
- learning_rate: float,
20
- gamma: float,
21
- problem_name: str,
22
- domain_name: str
38
+ self,
39
+ episodes: int,
40
+ decaying_eps: bool,
41
+ epsilon: float,
42
+ learning_rate: float,
43
+ gamma: float,
44
+ problem_name: str,
45
+ domain_name: str,
23
46
  ):
47
+ """
48
+ Initializes a reinforcement learning agent.
49
+
50
+ Args:
51
+ episodes (int): The number of episodes to train the agent.
52
+ decaying_eps (bool): Whether to use decaying epsilon-greedy exploration.
53
+ epsilon (float): The exploration rate.
54
+ learning_rate (float): The learning rate.
55
+ gamma (float): The discount factor.
56
+ problem_name (str): The name of the problem.
57
+ domain_name (str): The name of the domain.
58
+ """
24
59
  self.episodes = episodes
25
60
  self.decaying_eps = decaying_eps
26
61
  self.epsilon = epsilon
@@ -33,22 +68,55 @@ class RLAgent(ABC):
33
68
 
34
69
  @abstractmethod
35
70
  def learn(self):
36
- pass
71
+ """
72
+ Abstract method for the agent to learn from the environment.
73
+ """
37
74
 
38
75
  def class_name(self):
76
+ """
77
+ Returns the name of the agent's class.
78
+
79
+ Returns:
80
+ str: The name of the agent's class.
81
+ """
39
82
  return self.__class__.__name__
40
83
 
41
84
  def get_actions_probabilities(self, observation):
85
+ """
86
+ Get the probabilities of available actions given an observation.
87
+
88
+ Args:
89
+ observation: The observation from the environment.
90
+
91
+ Raises:
92
+ Exception: This function is unimplemented.
93
+
94
+ Returns:
95
+ Any: The probabilities of available actions.
96
+ """
42
97
  raise Exception("function get_actions_probabilities is unimplemented")
43
98
 
44
99
  def get_number_of_unique_states(self):
100
+ """
101
+ Get the number of unique states encountered by the agent.
102
+
103
+ Returns:
104
+ int: The number of unique states encountered.
105
+ """
45
106
  return len(self.states_counter)
46
107
 
47
108
  def update_states_counter(self, observation_str: str):
109
+ """
110
+ Update the counter for the number of times each observation state is encountered.
111
+
112
+ Args:
113
+ observation_str (str): The string representation of the observation state.
114
+ """
48
115
  if observation_str in self.states_counter:
49
- self.states_counter[observation_str] = self.states_counter[observation_str] + 1
116
+ self.states_counter[observation_str] = (
117
+ self.states_counter[observation_str] + 1
118
+ )
50
119
  else:
51
120
  self.states_counter[observation_str] = 1
52
121
  if len(self.states_counter) % 10000 == 0:
53
122
  print(f"probably error to many {len(self.states_counter)}")
54
-
gr_libs/ml/consts.py CHANGED
@@ -19,4 +19,4 @@ OPTIM_EPS = 1e-8
19
19
  OPTIM_ALPHA = 0.99
20
20
  CLIP_EPS = 0.2
21
21
  RECURRENCE = 1
22
- TEXT = False
22
+ TEXT = False
@@ -1,3 +1 @@
1
- # from ml.neural.model import AbstractACModel, RecurrentACModel, ACModel
2
- # from ml.neural.algorithms import BaseAlgo, A2CAlgo, PPOAlgo
3
- from gr_libs.ml.neural.utils import DictList
1
+ """ Algorithms that involve using neural networks. """