gr-libs 0.1.8__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. gr_libs/__init__.py +3 -1
  2. gr_libs/_version.py +2 -2
  3. gr_libs/all_experiments.py +260 -0
  4. gr_libs/environment/__init__.py +14 -1
  5. gr_libs/environment/_utils/__init__.py +0 -0
  6. gr_libs/environment/{utils → _utils}/utils.py +1 -1
  7. gr_libs/environment/environment.py +278 -23
  8. gr_libs/evaluation/__init__.py +1 -0
  9. gr_libs/evaluation/generate_experiments_results.py +100 -0
  10. gr_libs/metrics/__init__.py +2 -0
  11. gr_libs/metrics/metrics.py +166 -31
  12. gr_libs/ml/__init__.py +1 -6
  13. gr_libs/ml/base/__init__.py +3 -1
  14. gr_libs/ml/base/rl_agent.py +68 -3
  15. gr_libs/ml/neural/__init__.py +1 -3
  16. gr_libs/ml/neural/deep_rl_learner.py +241 -84
  17. gr_libs/ml/neural/utils/__init__.py +1 -2
  18. gr_libs/ml/planner/mcts/{utils → _utils}/tree.py +1 -1
  19. gr_libs/ml/planner/mcts/mcts_model.py +71 -34
  20. gr_libs/ml/sequential/__init__.py +0 -1
  21. gr_libs/ml/sequential/{lstm_model.py → _lstm_model.py} +11 -14
  22. gr_libs/ml/tabular/__init__.py +1 -3
  23. gr_libs/ml/tabular/tabular_q_learner.py +27 -9
  24. gr_libs/ml/tabular/tabular_rl_agent.py +22 -9
  25. gr_libs/ml/utils/__init__.py +2 -9
  26. gr_libs/ml/utils/format.py +13 -90
  27. gr_libs/ml/utils/math.py +3 -2
  28. gr_libs/ml/utils/other.py +2 -2
  29. gr_libs/ml/utils/storage.py +41 -94
  30. gr_libs/odgr_executor.py +263 -0
  31. gr_libs/problems/consts.py +570 -292
  32. gr_libs/recognizer/{utils → _utils}/format.py +2 -2
  33. gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +127 -36
  34. gr_libs/recognizer/graml/{gr_dataset.py → _gr_dataset.py} +11 -11
  35. gr_libs/recognizer/graml/graml_recognizer.py +186 -35
  36. gr_libs/recognizer/recognizer.py +59 -10
  37. gr_libs/tutorials/draco_panda_tutorial.py +58 -0
  38. gr_libs/tutorials/draco_parking_tutorial.py +56 -0
  39. {tutorials → gr_libs/tutorials}/gcdraco_panda_tutorial.py +11 -11
  40. {tutorials → gr_libs/tutorials}/gcdraco_parking_tutorial.py +6 -8
  41. {tutorials → gr_libs/tutorials}/graml_minigrid_tutorial.py +18 -14
  42. {tutorials → gr_libs/tutorials}/graml_panda_tutorial.py +11 -12
  43. {tutorials → gr_libs/tutorials}/graml_parking_tutorial.py +8 -10
  44. {tutorials → gr_libs/tutorials}/graml_point_maze_tutorial.py +17 -3
  45. {tutorials → gr_libs/tutorials}/graql_minigrid_tutorial.py +2 -2
  46. {gr_libs-0.1.8.dist-info → gr_libs-0.2.5.dist-info}/METADATA +95 -29
  47. gr_libs-0.2.5.dist-info/RECORD +72 -0
  48. {gr_libs-0.1.8.dist-info → gr_libs-0.2.5.dist-info}/WHEEL +1 -1
  49. gr_libs-0.2.5.dist-info/top_level.txt +2 -0
  50. tests/test_draco.py +14 -0
  51. tests/test_gcdraco.py +2 -2
  52. tests/test_graml.py +4 -4
  53. tests/test_graql.py +1 -1
  54. tests/test_odgr_executor_expertbasedgraml.py +14 -0
  55. tests/test_odgr_executor_gcdraco.py +14 -0
  56. tests/test_odgr_executor_gcgraml.py +14 -0
  57. tests/test_odgr_executor_graql.py +14 -0
  58. evaluation/analyze_results_cross_alg_cross_domain.py +0 -267
  59. evaluation/create_minigrid_map_image.py +0 -38
  60. evaluation/file_system.py +0 -53
  61. evaluation/generate_experiments_results.py +0 -141
  62. evaluation/generate_experiments_results_new_ver1.py +0 -238
  63. evaluation/generate_experiments_results_new_ver2.py +0 -331
  64. evaluation/generate_task_specific_statistics_plots.py +0 -500
  65. evaluation/get_plans_images.py +0 -62
  66. evaluation/increasing_and_decreasing_.py +0 -104
  67. gr_libs/ml/neural/utils/penv.py +0 -60
  68. gr_libs-0.1.8.dist-info/RECORD +0 -70
  69. gr_libs-0.1.8.dist-info/top_level.txt +0 -4
  70. /gr_libs/{environment/utils/__init__.py → _evaluation/_generate_experiments_results.py} +0 -0
  71. /gr_libs/ml/planner/mcts/{utils → _utils}/__init__.py +0 -0
  72. /gr_libs/ml/planner/mcts/{utils → _utils}/node.py +0 -0
  73. /gr_libs/recognizer/{utils → _utils}/__init__.py +0 -0
@@ -1,24 +1,26 @@
1
- from collections import OrderedDict
2
1
  import gc
2
+ from collections import OrderedDict
3
3
  from types import MethodType
4
- from typing import List, Tuple
5
- import numpy as np
4
+
6
5
  import cv2
6
+ import numpy as np
7
7
 
8
- from gr_libs.environment.environment import EnvProperty
8
+ from gr_libs.environment.environment import EnvProperty, suppress_output
9
9
 
10
10
  if __name__ != "__main__":
11
11
  from gr_libs.ml.utils.storage import get_agent_model_dir
12
12
  from gr_libs.ml.utils.format import random_subset_with_order
13
- from stable_baselines3 import SAC, PPO, TD3
14
- from stable_baselines3.common.base_class import BaseAlgorithm
15
- from gr_libs.ml.utils import device
16
- import gymnasium as gym
13
+
14
+ import os
17
15
 
18
16
  # built-in python modules
19
17
  import random
20
- import os
21
- import sys
18
+
19
+ import gymnasium as gym
20
+ from stable_baselines3 import PPO, SAC, TD3
21
+ from stable_baselines3.common.base_class import BaseAlgorithm
22
+
23
+ from gr_libs.ml.utils import device
22
24
 
23
25
  # TODO do we need this?
24
26
  NETWORK_SETUP = {
@@ -42,7 +44,6 @@ NETWORK_SETUP = {
42
44
  ("normalize_kwargs", {"norm_obs": False, "norm_reward": False}),
43
45
  ]
44
46
  ),
45
- # "tqc": OrderedDict([('batch_size', 256), ('buffer_size', 1000000), ('ent_coef', 'auto'), ('env_wrapper', ['sb3_contrib.common.wrappers.TimeFeatureWrapper']), ('gamma', 0.95), ('learning_rate', 0.001), ('learning_starts', 1000), ('n_timesteps', 25000.0), ('normalize', False), ('policy', 'MultiInputPolicy'), ('policy_kwargs', 'dict(net_arch=[64, 64])'), ('replay_buffer_class', 'HerReplayBuffer'), ('replay_buffer_kwargs', "dict( goal_selection_strategy='future', n_sampled_goal=4 )"), ('normalize_kwargs',{'norm_obs':False,'norm_reward':False})]),
46
47
  PPO: OrderedDict(
47
48
  [
48
49
  ("batch_size", 256),
@@ -68,6 +69,22 @@ NETWORK_SETUP = {
68
69
 
69
70
 
70
71
  class DeepRLAgent:
72
+ """
73
+ Deep Reinforcement Learning Agent, wrapping a SB3 agent and adding functionality,
74
+ needed for GR framework executions such as observation generation and video recording.
75
+ Supports SAC, PPO and TD3 algorithms.
76
+ Can be loaded from rl_zoo or trained from scratch.
77
+
78
+ Args:
79
+ domain_name (str): The domain name.
80
+ problem_name (str): The problem name.
81
+ num_timesteps (float): The number of timesteps for training.
82
+ env_prop (EnvProperty): The environment property.
83
+ algorithm (BaseAlgorithm, optional): The algorithm to use. Defaults to SAC.
84
+ reward_threshold (float, optional): The reward threshold. Defaults to 450.
85
+ exploration_rate (float, optional): The exploration rate. Defaults to None.
86
+ """
87
+
71
88
  def __init__(
72
89
  self,
73
90
  domain_name: str,
@@ -78,7 +95,18 @@ class DeepRLAgent:
78
95
  reward_threshold: float = 450,
79
96
  exploration_rate=None,
80
97
  ):
81
- # Need to change reward threshold to change according to which task the agent is training on, becuase it changes from task to task.
98
+ """
99
+ Initialize the DeepRLLearner object.
100
+
101
+ Args:
102
+ domain_name (str): The name of the domain.
103
+ problem_name (str): The name of the problem.
104
+ num_timesteps (float): The number of timesteps.
105
+ env_prop (EnvProperty): The environment property.
106
+ algorithm (BaseAlgorithm, optional): The algorithm to use. Defaults to SAC.
107
+ reward_threshold (float, optional): The reward threshold. Defaults to 450.
108
+ exploration_rate (float, optional): The exploration rate. Defaults to None.
109
+ """
82
110
  env_kwargs = {"id": problem_name, "render_mode": "rgb_array"}
83
111
  assert algorithm in [SAC, PPO, TD3]
84
112
 
@@ -110,7 +138,8 @@ class DeepRLAgent:
110
138
  "seed": 0,
111
139
  "buffer_size": 1,
112
140
  }
113
- # second support: models saved with SB3's model.save, which is saved as a formatted .pth file.
141
+ # second support: models saved with SB3's model.save, which is saved as a
142
+ # formatted .pth file.
114
143
  else:
115
144
  self.model_kwargs = {}
116
145
  self._model_file_path = os.path.join(
@@ -122,9 +151,17 @@ class DeepRLAgent:
122
151
  self.num_timesteps = num_timesteps
123
152
 
124
153
  def save_model(self):
154
+ """Save the model to a file."""
125
155
  self._model.save(self._model_file_path)
126
156
 
127
157
  def try_recording_video(self, video_path, desired=None):
158
+ """
159
+ Try recording a video of the agent's performance.
160
+
161
+ Args:
162
+ video_path (str): The path to save the video.
163
+ desired (optional): The desired goal. Defaults to None.
164
+ """
128
165
  num_tries = 0
129
166
  while True:
130
167
  if num_tries >= 10:
@@ -132,21 +169,22 @@ class DeepRLAgent:
132
169
  try:
133
170
  self.record_video(video_path, desired)
134
171
  break
135
- except Exception as e:
172
+ except Exception:
136
173
  num_tries += 1
137
174
  # print(f"sequence to {self.problem_name} is:\n\t{steps}\ngenerating image at {img_path}.")
138
175
  print(f"generated sequence video at {video_path}.")
139
176
 
140
177
  def record_video(self, video_path, desired=None):
141
- """Record a video of the agent's performance."""
178
+ """
179
+ Record a video of the agent's performance.
180
+
181
+ Args:
182
+ video_path (str): The path to save the video.
183
+ desired (optional): The desired goal. Defaults to None.
184
+ """
142
185
  fourcc = cv2.VideoWriter_fourcc("m", "p", "4", "v")
143
186
  fps = 30.0
144
- # if is_gc:
145
- # assert goal_idx != None
146
- # self.reset_with_goal_idx(goal_idx)
147
- # else:
148
- # assert goal_idx == None
149
- self.env.reset()
187
+ self.safe_env_reset()
150
188
  frame_size = (
151
189
  self.env.render(mode="rgb_array").shape[1],
152
190
  self.env.render(mode="rgb_array").shape[0],
@@ -155,7 +193,7 @@ class DeepRLAgent:
155
193
  video_writer = cv2.VideoWriter(video_path, fourcc, fps, frame_size)
156
194
  general_done, success_done = False, False
157
195
  gc.collect()
158
- obs = self.env.reset()
196
+ obs = self.safe_env_reset()
159
197
  self.env_prop.change_goal_to_specific_desired(obs, desired)
160
198
  counter = 0
161
199
  while not (general_done or success_done):
@@ -166,17 +204,11 @@ class DeepRLAgent:
166
204
  general_done = general_done[0]
167
205
  self.env_prop.change_goal_to_specific_desired(obs, desired)
168
206
  if "success" in info[0].keys():
169
- success_done = info[0][
170
- "success"
171
- ] # make sure the agent actually reached the goal within the max time
207
+ success_done = info[0]["success"]
172
208
  elif "is_success" in info[0].keys():
173
- success_done = info[0][
174
- "is_success"
175
- ] # make sure the agent actually reached the goal within the max time
209
+ success_done = info[0]["is_success"]
176
210
  elif "step_task_completions" in info[0].keys():
177
- success_done = (
178
- len(info[0]["step_task_completions"]) == 1
179
- ) # bug of dummyVecEnv, it removes the episode_task_completions from the info dict.
211
+ success_done = len(info[0]["step_task_completions"]) == 1
180
212
  else:
181
213
  raise NotImplementedError(
182
214
  "no other option for any of the environments."
@@ -186,34 +218,32 @@ class DeepRLAgent:
186
218
  obs, desired, success_done
187
219
  )
188
220
  video_writer.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
189
- if general_done == False != success_done == True:
221
+ if general_done == False and success_done == True:
190
222
  assert (
191
223
  desired is not None
192
- ), f"general_done is false but success_done is true, and desired is None. This should never happen, since the \
193
- environment will say 'done' is false (general_done) while the observation will be close to the goal (success_done) \
194
- only in case we incorporated a 'desired' when generating the observation."
195
- elif general_done == True != success_done == False:
224
+ ), f"general_done is false but success_done is true, and desired is None. \
225
+ This should never happen, since the environment will say 'done' is false \
226
+ (general_done) while the observation will be close to the goal (success_done) \
227
+ only in case we incorporated a 'desired' when generating the observation."
228
+ elif general_done == True and success_done == False:
196
229
  raise Exception("general_done is true but success_done is false")
197
230
  self.env.close()
198
231
  video_writer.release()
199
232
 
200
233
  def load_model(self):
234
+ """Load the model from a file."""
201
235
  self._model = self.algorithm.load(
202
236
  self._model_file_path, env=self.env, device=device, **self.model_kwargs
203
237
  )
204
238
 
205
239
  def learn(self):
240
+ """Train the agent."""
206
241
  if os.path.exists(self._model_file_path):
207
242
  print(f"Loading pre-existing model in {self._model_file_path}")
208
243
  self.load_model()
209
244
  else:
210
- # Stop training when the model reaches the reward threshold
211
- # callback_on_best = StopTrainingOnRewardThreshold(reward_threshold=self.reward_threshold, verbose=1)
212
- # eval_callback = EvalCallback(self.env, best_model_save_path="./logs/",
213
- # log_path="./logs/", eval_freq=500, callback_on_new_best=callback_on_best, verbose=1, render=True)
214
- # self._model.learn(total_timesteps=self.num_timesteps, progress_bar=True, callback=eval_callback)
215
245
  print(f"No existing model in {self._model_file_path}, starting learning")
216
- if self.exploration_rate != None:
246
+ if self.exploration_rate is not None:
217
247
  self._model = self.algorithm(
218
248
  "MultiInputPolicy",
219
249
  self.env,
@@ -228,15 +258,30 @@ class DeepRLAgent:
228
258
  self.save_model()
229
259
 
230
260
  def safe_env_reset(self):
261
+ """
262
+ Reset the environment safely, suppressing output.
263
+
264
+ Returns:
265
+ The initial observation.
266
+ """
231
267
  try:
232
- obs = self.env.reset()
233
- except Exception as e:
268
+ obs = suppress_env_reset(self.env)
269
+ except Exception:
234
270
  kwargs = {"id": self.problem_name, "render_mode": "rgb_array"}
235
271
  self.env = self.env_prop.create_vec_env(kwargs)
236
- obs = self.env.reset()
272
+ obs = suppress_env_reset(self.env)
237
273
  return obs
238
274
 
239
275
  def get_mean_and_std_dev(self, observation):
276
+ """
277
+ Get the mean and standard deviation of the action distribution.
278
+
279
+ Args:
280
+ observation: The observation.
281
+
282
+ Returns:
283
+ The mean and standard deviation of the action distribution.
284
+ """
240
285
  if self.algorithm == SAC:
241
286
  tensor_observation, _ = self._model.actor.obs_to_tensor(observation)
242
287
 
@@ -266,9 +311,20 @@ class DeepRLAgent:
266
311
  assert False
267
312
  return actor_means, log_std_dev
268
313
 
269
- # fits agents that generated observations in the form of: list of tuples, each tuple a single step\frame with size 2, comprised of obs and action.
270
- # the function squashes the 2d array of obs and action in a 1d array, concatenating their values together for training.
271
314
  def simplify_observation(self, observation):
315
+ """
316
+ Simplifies the given observation by concatenating the last dimension of each observation and action.
317
+ fits agents that generated observations in the form of: list of tuples, each tuple a single
318
+ step\frame with size 2, comprised of obs and action.
319
+ the function squashes the 2d array of obs and action in a 1d array, concatenating their
320
+ values together for training.
321
+
322
+ Args:
323
+ observation (list): List of tuples containing observation and action.
324
+
325
+ Returns:
326
+ list: List of simplified observations.
327
+ """
272
328
  return [
273
329
  np.concatenate(
274
330
  (
@@ -280,6 +336,17 @@ class DeepRLAgent:
280
336
  ]
281
337
 
282
338
  def add_random_optimalism(self, observations, action, constant_initial_action):
339
+ """
340
+ Adds random optimalism to the given action based on the length of observations.
341
+
342
+ Parameters:
343
+ observations (list): List of observations.
344
+ action (ndarray): Action to modify.
345
+ constant_initial_action (float): Initial action value.
346
+
347
+ Returns:
348
+ ndarray: Modified action.
349
+ """
283
350
  if len(observations) > 3:
284
351
  for i in range(0, len(action[0])):
285
352
  action[0][i] += random.uniform(
@@ -287,6 +354,7 @@ class DeepRLAgent:
287
354
  )
288
355
  else: # just walk in a specific random direction to enable diverse plans
289
356
  action = np.array(np.array([constant_initial_action]), None)
357
+ return action
290
358
 
291
359
  def generate_partial_observation(
292
360
  self,
@@ -297,6 +365,20 @@ class DeepRLAgent:
297
365
  fig_path=None,
298
366
  random_optimalism=True,
299
367
  ):
368
+ """
369
+ Generates a partial observation by selecting a subset of steps from a full observation.
370
+
371
+ Args:
372
+ action_selection_method (str): The method used for selecting actions.
373
+ percentage (float): The percentage of steps to include in the partial observation.
374
+ is_consecutive (bool): Whether the selected steps should be consecutive or not.
375
+ save_fig (bool, optional): Whether to save a figure of the observation. Defaults to False.
376
+ fig_path (str, optional): The path to save the figure. Defaults to None.
377
+ random_optimalism (bool, optional): Whether to apply random optimalism during observation generation. Defaults to True.
378
+
379
+ Returns:
380
+ list: A partial observation consisting of a subset of steps from the full observation.
381
+ """
300
382
  steps = self.generate_observation(
301
383
  action_selection_method,
302
384
  save_fig=save_fig,
@@ -315,25 +397,39 @@ class DeepRLAgent:
315
397
  fig_path=None,
316
398
  with_dict=False,
317
399
  desired=None,
318
- ) -> List[
319
- Tuple[np.ndarray, np.ndarray]
320
- ]: # TODO make sure to add a linter to alert when a method doesn't accept or return the type it should
321
- if save_fig == False:
400
+ ) -> list[tuple[np.ndarray, np.ndarray]]:
401
+ """
402
+ Generates observations by interacting with the environment.
403
+
404
+ Args:
405
+ action_selection_method (MethodType): The method used for action selection.
406
+ random_optimalism (bool): Flag indicating whether to add random optimalism to the actions.
407
+ save_fig (bool, optional): Flag indicating whether to save a figure. Defaults to False.
408
+ fig_path (str, optional): The path to save the figure. Required if save_fig is True. Defaults to None.
409
+ with_dict (bool, optional): Flag indicating whether to include the observation as a dictionary. Defaults to False.
410
+ desired (Any, optional): The desired goal for the observation. Defaults to None.
411
+
412
+ Returns:
413
+ list[tuple[np.ndarray, np.ndarray]]: A list of tuples containing the observation and the corresponding action.
414
+ """
415
+ if save_fig is False:
322
416
  assert (
323
- fig_path == None
417
+ fig_path is None
324
418
  ), "You can't specify a vid path when you don't even save the figure."
325
419
  else:
326
420
  assert (
327
- fig_path != None
421
+ fig_path is not None
328
422
  ), "You need to specify a vid path when you save the figure."
329
- # The try-except is a bug fix for the env not being reset properly in panda. If someone wants to check why and provide a robust solution they're welcome.
423
+ # The try-except is a bug fix for the env not being reset properly in panda.
424
+ # If someone wants to check why and provide a robust solution they're welcome.
330
425
  obs = self.safe_env_reset()
331
426
  self.env_prop.change_goal_to_specific_desired(obs, desired)
332
427
  observations = []
333
428
  is_successful_observation_made = False
334
429
  num_of_insuccessful_attempts = 0
335
430
  while not is_successful_observation_made:
336
- is_successful_observation_made = True # start as true, if this isn't the case (crash/death/truncation instead of success)
431
+ # start as true, if this isn't the case (crash/death/truncation instead of success)
432
+ is_successful_observation_made = True
337
433
  if random_optimalism:
338
434
  constant_initial_action = self.env.action_space.sample()
339
435
  while True:
@@ -343,9 +439,8 @@ class DeepRLAgent:
343
439
  action_selection_method != stochastic_amplified_selection
344
440
  )
345
441
  action, _states = self._model.predict(obs, deterministic=deterministic)
346
- if (
347
- random_optimalism
348
- ): # get the right direction and then start inserting noise to still get a relatively optimal plan
442
+ if random_optimalism:
443
+ # get the right direction and then start inserting noise to still get a relatively optimal plan
349
444
  self.add_random_optimalism(obs, action, constant_initial_action)
350
445
  if with_dict:
351
446
  observations.append((obs, action))
@@ -353,22 +448,31 @@ class DeepRLAgent:
353
448
  observations.append((obs["observation"], action))
354
449
  obs, reward, done, info = self.env.step(action)
355
450
  self.env_prop.change_goal_to_specific_desired(obs, desired)
356
- general_done = self.env_prop.is_done(done)
451
+ general_done = bool(self.env_prop.is_done(done))
357
452
  success_done = self.env_prop.is_success(info)
358
- success_done = self.env_prop.change_done_by_specific_desired(
359
- obs, desired, success_done
453
+ success_done = bool(
454
+ self.env_prop.change_done_by_specific_desired(
455
+ obs, desired, success_done
456
+ )
360
457
  )
361
- if general_done == True and success_done == False:
362
- # it could be that the stochasticity inserted into the actions made the agent die/crash. we don't want this observation: it's an insuccessful attempt.
458
+ if general_done is True and success_done is False:
459
+ # it could be that the stochasticity inserted into the actions made the agent die/crash.
460
+ # we don't want this observation: it's an insuccessful attempt.
363
461
  num_of_insuccessful_attempts += 1
364
- # print(f"for agent for problem {self.problem_name}, its done {len(observations)} steps, and got to a situation where general_done != success_done, for the {num_of_insuccessful_attempts} time.")
462
+ # print(f"for agent for problem {self.problem_name}, its done
463
+ # {len(observations)} steps, and got to a situation where
464
+ # general_done != success_done, for the {num_of_insuccessful_attempts} time.")
365
465
  if num_of_insuccessful_attempts > 50:
366
466
  # print(f"got more then 10 insuccessful attempts!")
367
467
  assert (
368
- general_done == success_done
369
- ), f"failed on goal: {obs['desired']}" # we want to make sure the episode is done only when the agent has actually succeeded with the task.
468
+ general_done
469
+ == success_done
470
+ # we want to make sure the episode is done only
471
+ # when the agent has actually succeeded with the task.
472
+ ), f"failed on goal: {obs['desired']}"
370
473
  else:
371
- # try again by breaking inner loop. everything is set up to be like the beginning of the function.
474
+ # try again by breaking inner loop.
475
+ # everything is set up to be like the beginning of the function.
372
476
  is_successful_observation_made = False
373
477
  obs = self.safe_env_reset()
374
478
  self.env_prop.change_goal_to_specific_desired(obs, desired)
@@ -376,20 +480,21 @@ class DeepRLAgent:
376
480
  []
377
481
  ) # we want to re-accumulate the observations from scratch, have another try
378
482
  break
379
- elif general_done == False and success_done == False:
483
+ elif general_done is False and success_done is False:
380
484
  continue
381
- elif general_done == True and success_done == True:
485
+ elif general_done is True and success_done is True:
382
486
  if num_of_insuccessful_attempts > 0:
383
487
  pass # print(f"after {num_of_insuccessful_attempts}, finally I succeeded!")
384
488
  break
385
- elif general_done == False and success_done == True:
386
- # The environment will say 'done' is false (general_done) while the observation will be close to the goal (success_done)
387
- # only in case we incorporated a 'desired' when generating the observation.
489
+ elif general_done is False and success_done is True:
490
+ # The environment will say 'done' is false (general_done) while the observation
491
+ # will be close to the goal (success_done) only in case we incorporated a 'desired'
492
+ # when generating the observation.
388
493
  assert (
389
494
  desired is not None
390
495
  ), f"general_done is false but success_done is true, and desired is None. This should never happen, since the \
391
- environment will say 'done' is false (general_done) while the observation will be close to the goal (success_done) \
392
- only in case we incorporated a 'desired' when generating the observation."
496
+ environment will say 'done' is false (general_done) while the observation will be close to the goal (success_done) \
497
+ only in case we incorporated a 'desired' when generating the observation."
393
498
  break
394
499
 
395
500
  if save_fig:
@@ -400,6 +505,23 @@ class DeepRLAgent:
400
505
 
401
506
 
402
507
  class GCDeepRLAgent(DeepRLAgent):
508
+ """
509
+ A class representing a Goal Conditioned Deep Reinforcement Learning Agent.
510
+
511
+ This agent extends the functionality of the base DeepRLAgent class by providing methods for generating partial observations and observations with goal-directed goals or problems.
512
+
513
+ Args:
514
+ DeepRLAgent (class): The base class for DeepRLAgent.
515
+
516
+ Attributes:
517
+ env (object): The environment in which the agent operates.
518
+ env_prop (object): The environment properties.
519
+
520
+ Methods:
521
+ generate_partial_observation: Generates a partial observation based on a given percentage of steps.
522
+ generate_observation: Generates an observation with optional goal-directed goals or problems.
523
+ """
524
+
403
525
  def generate_partial_observation(
404
526
  self,
405
527
  action_selection_method,
@@ -411,6 +533,22 @@ class GCDeepRLAgent(DeepRLAgent):
411
533
  fig_path=None,
412
534
  random_optimalism=True,
413
535
  ):
536
+ """
537
+ Generates a partial observation based on a given percentage of steps.
538
+
539
+ Args:
540
+ action_selection_method (MethodType): The method for selecting actions.
541
+ percentage (float): The percentage of steps to include in the partial observation.
542
+ is_consecutive (bool): Whether the steps should be consecutive or randomly selected.
543
+ goal_directed_problem (str, optional): The goal-directed problem. Defaults to None.
544
+ goal_directed_goal (object, optional): The goal-directed goal. Defaults to None.
545
+ save_fig (bool, optional): Whether to save a figure. Defaults to False.
546
+ fig_path (str, optional): The path to save the figure. Defaults to None.
547
+ random_optimalism (bool, optional): Whether to use random optimalism. Defaults to True.
548
+
549
+ Returns:
550
+ list: A random subset of steps from the full observation.
551
+ """
414
552
  steps = self.generate_observation(
415
553
  action_selection_method,
416
554
  save_fig=save_fig,
@@ -423,8 +561,6 @@ class GCDeepRLAgent(DeepRLAgent):
423
561
  steps, (int)(percentage * len(steps)), is_consecutive
424
562
  )
425
563
 
426
- # TODO move the goal_directed_goal and/or goal_directed_problem mechanism to be a property of the env_property, so deep_rl_learner doesn't depend on it and holds this logic so heavily.
427
- # Generate observation with goal_directed_goal or goal_directed_problem is only possible for a GC agent, otherwise - the agent can't act optimally to that new goal.
428
564
  def generate_observation(
429
565
  self,
430
566
  action_selection_method: MethodType,
@@ -435,16 +571,31 @@ class GCDeepRLAgent(DeepRLAgent):
435
571
  fig_path=None,
436
572
  with_dict=False,
437
573
  ):
574
+ """
575
+ Generates an observation with optional goal-directed goals or problems.
576
+
577
+ Args:
578
+ action_selection_method (MethodType): The method for selecting actions.
579
+ random_optimalism (bool): Whether to use random optimalism.
580
+ goal_directed_problem (str, optional): The goal-directed problem. Defaults to None.
581
+ goal_directed_goal (object, optional): The goal-directed goal. Defaults to None.
582
+ save_fig (bool, optional): Whether to save a figure. Defaults to False.
583
+ fig_path (str, optional): The path to save the figure. Defaults to None.
584
+ with_dict (bool, optional): Whether to include a dictionary in the observation. Defaults to False.
585
+
586
+ Returns:
587
+ list: The generated observation.
588
+ """
438
589
  if save_fig:
439
590
  assert (
440
- fig_path != None
591
+ fig_path is not None
441
592
  ), "You need to specify a vid path when you save the figure."
442
593
  else:
443
- assert fig_path == None
444
- # goal_directed_problem employs the GC agent in a new env with a static, predefined goal, and has him generate an observation sequence in it.
594
+ assert fig_path is None
595
+
445
596
  if goal_directed_problem:
446
597
  assert (
447
- goal_directed_goal == None
598
+ goal_directed_goal is None
448
599
  ), "can't give goal directed goal and also goal directed problem for the sake of sequence generation by a general agent"
449
600
  kwargs = {"id": goal_directed_problem, "render_mode": "rgb_array"}
450
601
  self.env = self.env_prop.create_vec_env(kwargs)
@@ -457,11 +608,9 @@ class GCDeepRLAgent(DeepRLAgent):
457
608
  with_dict=with_dict,
458
609
  )
459
610
  self.env = orig_env
460
- # goal_directed_goal employs the agent in the same env on which it trained - with goals that change with every episode sampled from the goal space.
461
- # but we manually change the 'desired' part of the observation to be the goal_directed_goal and edit the id_success and is_done accordingly.
462
611
  else:
463
612
  assert (
464
- goal_directed_problem == None
613
+ goal_directed_problem is None
465
614
  ), "can't give goal directed goal and also goal directed problem for the sake of sequence generation by a general agent"
466
615
  observations = super().generate_observation(
467
616
  action_selection_method=action_selection_method,
@@ -470,5 +619,13 @@ class GCDeepRLAgent(DeepRLAgent):
470
619
  fig_path=fig_path,
471
620
  with_dict=with_dict,
472
621
  desired=goal_directed_goal,
473
- ) # TODO tutorial on how to use the deepRLAgent for sequence generation and examination and plotting of the sequence
622
+ )
474
623
  return observations
624
+
625
+
626
+ def suppress_env_reset(env):
627
+ """
628
+ Utility function to suppress prints during env.reset().
629
+ """
630
+ with suppress_output():
631
+ return env.reset()
@@ -1,2 +1 @@
1
- from gr_libs.ml.neural.utils.dictlist import DictList
2
- from gr_libs.ml.neural.utils.penv import ParallelEnv
1
+ """ utility functions for GR algorithms that use neural networks """
@@ -102,5 +102,5 @@ class Tree:
102
102
  def show(self):
103
103
  lines = ""
104
104
  for edge, node in self.iter(identifier=None, depth=0, last_node_flags=[]):
105
- lines += "{}{}\n".format(edge, node)
105
+ lines += f"{edge}{node}\n"
106
106
  print(lines)