gr-libs 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. evaluation/analyze_results_cross_alg_cross_domain.py +277 -0
  2. evaluation/create_minigrid_map_image.py +34 -0
  3. evaluation/file_system.py +42 -0
  4. evaluation/generate_experiments_results.py +92 -0
  5. evaluation/generate_experiments_results_new_ver1.py +254 -0
  6. evaluation/generate_experiments_results_new_ver2.py +331 -0
  7. evaluation/generate_task_specific_statistics_plots.py +272 -0
  8. evaluation/get_plans_images.py +47 -0
  9. evaluation/increasing_and_decreasing_.py +63 -0
  10. gr_libs/__init__.py +2 -0
  11. gr_libs/environment/__init__.py +0 -0
  12. gr_libs/environment/environment.py +227 -0
  13. gr_libs/environment/utils/__init__.py +0 -0
  14. gr_libs/environment/utils/utils.py +17 -0
  15. gr_libs/metrics/__init__.py +0 -0
  16. gr_libs/metrics/metrics.py +224 -0
  17. gr_libs/ml/__init__.py +6 -0
  18. gr_libs/ml/agent.py +56 -0
  19. gr_libs/ml/base/__init__.py +1 -0
  20. gr_libs/ml/base/rl_agent.py +54 -0
  21. gr_libs/ml/consts.py +22 -0
  22. gr_libs/ml/neural/__init__.py +3 -0
  23. gr_libs/ml/neural/deep_rl_learner.py +395 -0
  24. gr_libs/ml/neural/utils/__init__.py +2 -0
  25. gr_libs/ml/neural/utils/dictlist.py +33 -0
  26. gr_libs/ml/neural/utils/penv.py +57 -0
  27. gr_libs/ml/planner/__init__.py +0 -0
  28. gr_libs/ml/planner/mcts/__init__.py +0 -0
  29. gr_libs/ml/planner/mcts/mcts_model.py +330 -0
  30. gr_libs/ml/planner/mcts/utils/__init__.py +2 -0
  31. gr_libs/ml/planner/mcts/utils/node.py +33 -0
  32. gr_libs/ml/planner/mcts/utils/tree.py +102 -0
  33. gr_libs/ml/sequential/__init__.py +1 -0
  34. gr_libs/ml/sequential/lstm_model.py +192 -0
  35. gr_libs/ml/tabular/__init__.py +3 -0
  36. gr_libs/ml/tabular/state.py +21 -0
  37. gr_libs/ml/tabular/tabular_q_learner.py +453 -0
  38. gr_libs/ml/tabular/tabular_rl_agent.py +126 -0
  39. gr_libs/ml/utils/__init__.py +6 -0
  40. gr_libs/ml/utils/env.py +7 -0
  41. gr_libs/ml/utils/format.py +100 -0
  42. gr_libs/ml/utils/math.py +13 -0
  43. gr_libs/ml/utils/other.py +24 -0
  44. gr_libs/ml/utils/storage.py +127 -0
  45. gr_libs/recognizer/__init__.py +0 -0
  46. gr_libs/recognizer/gr_as_rl/__init__.py +0 -0
  47. gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +102 -0
  48. gr_libs/recognizer/graml/__init__.py +0 -0
  49. gr_libs/recognizer/graml/gr_dataset.py +134 -0
  50. gr_libs/recognizer/graml/graml_recognizer.py +266 -0
  51. gr_libs/recognizer/recognizer.py +46 -0
  52. gr_libs/recognizer/utils/__init__.py +1 -0
  53. gr_libs/recognizer/utils/format.py +13 -0
  54. gr_libs-0.1.3.dist-info/METADATA +197 -0
  55. gr_libs-0.1.3.dist-info/RECORD +62 -0
  56. gr_libs-0.1.3.dist-info/WHEEL +5 -0
  57. gr_libs-0.1.3.dist-info/top_level.txt +3 -0
  58. tutorials/graml_minigrid_tutorial.py +30 -0
  59. tutorials/graml_panda_tutorial.py +32 -0
  60. tutorials/graml_parking_tutorial.py +38 -0
  61. tutorials/graml_point_maze_tutorial.py +43 -0
  62. tutorials/graql_minigrid_tutorial.py +29 -0
@@ -0,0 +1,395 @@
1
+ from collections import OrderedDict
2
+ import gc
3
+ from types import MethodType
4
+ from typing import List, Tuple
5
+ import gymnasium as gym
6
+ import numpy as np
7
+ import cv2
8
+
9
+ HACK_HAPPENED = False
10
+
11
+ if __name__ != "__main__":
12
+ from gr_libs.ml.utils.storage import get_agent_model_dir
13
+ from gr_libs.ml.utils.format import random_subset_with_order
14
+ from stable_baselines3 import SAC, PPO
15
+ from stable_baselines3.common.vec_env import DummyVecEnv
16
+ from gr_envs.custom_env_wrappers.flat_obs_wrapper import CombineAchievedGoalAndObservationWrapper
17
+
18
+ # important for registration of envs! do not remove lad
19
+ import gr_envs.maze_scripts.envs.maze
20
+ import gr_envs.highway_env_scripts.envs.parking_env
21
+ from gr_libs.ml.utils import device
22
+
23
+ # built-in python modules
24
+ import random
25
+ import os
26
+ import sys
27
+
28
+ def create_vec_env(kwargs):
29
+ # create the model, it will not be a pretrained one anyway
30
+ # env = gym.make(**kwargs)
31
+ env = gym.make(**kwargs)
32
+ return DummyVecEnv([lambda: env])
33
+
34
+ def change_goal_to_specific_desired(obs, desired):
35
+ try:
36
+ if desired!=None: obs['desired_goal'] = desired
37
+ except Exception as e:
38
+ try:
39
+ if all(desired!=None): obs['desired_goal'] = desired
40
+ except Exception as e:
41
+ if all([desiredy!=None for desiredish in desired for desiredy in desiredish]): obs['desired_goal'] = desired
42
+
43
+
44
+ NETWORK_SETUP = {
45
+ SAC: OrderedDict([('batch_size', 512), ('buffer_size', 100000), ('ent_coef', 'auto'), ('gamma', 0.95), ('learning_rate', 0.001), ('learning_starts', 5000), ('n_timesteps', 50000.0), ('normalize', "{'norm_obs': False, 'norm_reward': False}"), ('policy', 'MultiInputPolicy'), ('policy_kwargs', 'dict(net_arch=[64, 64])'), ('replay_buffer_class', 'HerReplayBuffer'), ('replay_buffer_kwargs', "dict( goal_selection_strategy='future', n_sampled_goal=4 )"), ('normalize_kwargs', {'norm_obs': False, 'norm_reward': False})]),
46
+ #"tqc": OrderedDict([('batch_size', 256), ('buffer_size', 1000000), ('ent_coef', 'auto'), ('env_wrapper', ['sb3_contrib.common.wrappers.TimeFeatureWrapper']), ('gamma', 0.95), ('learning_rate', 0.001), ('learning_starts', 1000), ('n_timesteps', 25000.0), ('normalize', False), ('policy', 'MultiInputPolicy'), ('policy_kwargs', 'dict(net_arch=[64, 64])'), ('replay_buffer_class', 'HerReplayBuffer'), ('replay_buffer_kwargs', "dict( goal_selection_strategy='future', n_sampled_goal=4 )"), ('normalize_kwargs',{'norm_obs':False,'norm_reward':False})]),
47
+ PPO: OrderedDict([('batch_size', 256), ('ent_coef', 0.01), ('gae_lambda', 0.9), ('gamma', 0.99), ('learning_rate', 'lin_0.0001'), ('max_grad_norm', 0.5), ('n_envs', 8), ('n_epochs', 20), ('n_steps', 8), ('n_timesteps', 25000.0), ('normalize_advantage', False), ('policy', 'MultiInputPolicy'), ('policy_kwargs', 'dict(log_std_init=-2, ortho_init=False)'), ('use_sde', True), ('vf_coef', 0.4), ('normalize', False), ('normalize_kwargs', {'norm_obs': False, 'norm_reward': False})]),
48
+ }
49
+
50
+ class DeepRLAgent():
51
+ def __init__(self, domain_name: str, problem_name: str, num_timesteps:float, algorithm=SAC, reward_threshold: float=450,
52
+ exploration_rate=None):
53
+ # Need to change reward threshold to change according to which task the agent is training on, becuase it changes from task to task.
54
+ kwargs = {"id":problem_name, "render_mode":"rgb_array"}
55
+
56
+ self.domain_name = domain_name
57
+ self.problem_name = problem_name
58
+
59
+ self._model_directory = get_agent_model_dir(domain_name=self.domain_name, model_name=problem_name, class_name=algorithm.__name__)
60
+ if os.path.exists(os.path.join(self._model_directory, "saved_model.zip")):
61
+ self.pre_trained_model = True
62
+ self._model_file_path = os.path.join(self._model_directory, "saved_model.zip")
63
+ else:
64
+ self.pre_trained_model = False
65
+ self.env = create_vec_env(kwargs)
66
+ self._actions_space = self.env.action_space
67
+ if exploration_rate != None: self._model = algorithm("MultiInputPolicy", self.env, ent_coef=exploration_rate, verbose=1)
68
+ else: self._model = algorithm("MultiInputPolicy", self.env, verbose=1)
69
+ self._model_file_path = os.path.join(self._model_directory, "saved_model.pth")
70
+ self.algorithm = algorithm
71
+ self.reward_threshold = reward_threshold
72
+ self.num_timesteps = num_timesteps
73
+
74
+ def save_model(self):
75
+ self._model.save(self._model_file_path)
76
+
77
+ def record_video(self, video_path, desired=None):
78
+ global HACK_HAPPENED
79
+ """Record a video of the agent's performance."""
80
+ fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
81
+ fps = 30.0
82
+ # if is_gc:
83
+ # assert goal_idx != None
84
+ # self.reset_with_goal_idx(goal_idx)
85
+ # else:
86
+ # assert goal_idx == None
87
+ self.env.reset()
88
+ frame_size = (self.env.render(mode='rgb_array').shape[1], self.env.render(mode='rgb_array').shape[0])
89
+ video_path = os.path.join(video_path, "plan_video.mp4")
90
+ video_writer = cv2.VideoWriter(video_path, fourcc, fps, frame_size)
91
+ general_done, success_done = False, False
92
+ gc.collect()
93
+ obs = self.env.reset()
94
+ change_goal_to_specific_desired(obs, desired)
95
+ counter = 0
96
+ while not (general_done or success_done):
97
+ counter += 1
98
+ action, _states = self._model.predict(obs, deterministic=False)
99
+ obs, rewards, general_done, info = self.env.step(action)
100
+ if isinstance(general_done, np.ndarray): general_done = general_done[0]
101
+ change_goal_to_specific_desired(obs, desired)
102
+ if "success" in info[0].keys(): success_done = info[0]["success"] # make sure the agent actually reached the goal within the max time
103
+ elif "is_success" in info[0].keys(): success_done = info[0]["is_success"] # make sure the agent actually reached the goal within the max time
104
+ elif "step_task_completions" in info[0].keys(): success_done = (len(info[0]["step_task_completions"]) == 1) # bug of dummyVecEnv, it removes the episode_task_completions from the info dict.
105
+ else: raise NotImplementedError("no other option for any of the environments.")
106
+ frame = self.env.render()
107
+ success_done = self.change_done_by_specific_desired(obs, desired, success_done)
108
+ video_writer.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
109
+ if general_done == False != success_done == True:
110
+ assert HACK_HAPPENED
111
+ elif general_done == True != success_done == False:
112
+ raise Exception("general_done is true but success_done is false")
113
+ self.env.close()
114
+ video_writer.release()
115
+
116
+ #def set_success_done(self, success_done, desired, )
117
+
118
+ def change_done_by_specific_desired(self, obs, desired, old_success_done):
119
+ global HACK_HAPPENED
120
+ try:
121
+ if desired!=None:
122
+ HACK_HAPPENED = True
123
+ if 'Panda' in self.problem_name:
124
+ assert obs['achieved_goal'].shape == desired.shape
125
+ d = np.linalg.norm(obs['achieved_goal'] - desired, axis=-1)
126
+ # print(f"achieved_goal:{achieved_goal}, desired_goal:{desired_goal}, distance:{d}, is finished:{d < self.distance_threshold}")
127
+ return (d < 0.04)[0]
128
+ elif 'Parking' in self.problem_name: # shuoldn't be used for now
129
+ # TODO
130
+ return self.env.task.is_success()
131
+ else:
132
+ return old_success_done
133
+ except Exception as e:
134
+ try:
135
+ if all(desired!=None):
136
+ HACK_HAPPENED = True
137
+ if 'Panda' in self.problem_name:
138
+ assert obs['achieved_goal'].shape == desired.shape
139
+ d = np.linalg.norm(obs['achieved_goal'] - desired, axis=-1)
140
+ # print(f"achieved_goal:{achieved_goal}, desired_goal:{desired_goal}, distance:{d}, is finished:{d < self.distance_threshold}")
141
+ return (d < 0.04)[0]
142
+ elif 'Parking' in self.problem_name:
143
+ # TODO add all of this to the environment property. recognizer shouldn't know anything about it.
144
+ return self.env.task.is_success()
145
+ else:
146
+ return old_success_done
147
+ except Exception as e:
148
+ if all([desiredy!=None for desiredish in desired for desiredy in desiredish]):
149
+ HACK_HAPPENED = True
150
+ if 'Panda' in self.problem_name:
151
+ assert obs['achieved_goal'].shape == desired.shape
152
+ d = np.linalg.norm(obs['achieved_goal'] - desired, axis=-1)
153
+ # print(f"achieved_goal:{achieved_goal}, desired_goal:{desired_goal}, distance:{d}, is finished:{d < self.distance_threshold}")
154
+ return (d < 0.04)[0]
155
+ elif 'Parking' in self.problem_name:
156
+ # TODO
157
+ return self.env.task.is_success()
158
+ else:
159
+ return old_success_done
160
+
161
+ def load_model(self):
162
+ self._model = self.algorithm.load(self._model_file_path, env=self.env, device=device)
163
+
164
+ def learn(self):
165
+ if os.path.exists(self._model_file_path):
166
+ print(f"Loading pre-existing model in {self._model_file_path}")
167
+ if self.pre_trained_model:
168
+ def test(env):
169
+ obs = env.reset()
170
+ lstm_states = None
171
+ episode_start = np.ones((1,), dtype=bool)
172
+ deterministic = True
173
+ episode_reward = 0.0
174
+ ep_len = 0
175
+ generator = range(5000)
176
+ for i in generator:
177
+ # print(f"iteration {i}:{obs=}")
178
+ action, lstm_states = self._model.predict(
179
+ obs, # type: ignore[arg-type]
180
+ state=lstm_states,
181
+ episode_start=episode_start,
182
+ deterministic=deterministic,
183
+ )
184
+ obs, reward, done, infos = env.step(action)
185
+
186
+ assert len(reward) == 1, f"length of rewards list is not 1, rewards:{reward}"
187
+ if "success" in infos[0].keys(): is_success = infos[0]["success"] # make sure the agent actually reached the goal within the max time
188
+ elif "is_success" in infos[0].keys(): is_success = infos[0]["is_success"] # make sure the agent actually reached the goal within the max time
189
+ elif "step_task_completions" in infos[0].keys(): is_success = (len(infos[0]["step_task_completions"]) == 1) # bug of dummyVecEnv, it removes the episode_task_completions from the info dict.
190
+ else: raise NotImplementedError("no other option for any of the environments.")
191
+ # print(f"(action,is_done,info):({action},{done},{infos})")
192
+ if is_success:
193
+ #print(f"breaking due to GG, took {i} steps")
194
+ break
195
+ episode_start = done
196
+
197
+ episode_reward += reward[0]
198
+ ep_len += 1
199
+ env.close()
200
+ custom_objects = {
201
+ "learning_rate": 0.0,
202
+ "lr_schedule": lambda _: 0.0,
203
+ "clip_range": lambda _: 0.0,
204
+ }
205
+ kwargs = {"id": self.problem_name, "render_mode": "rgb_array"}
206
+ self.env = create_vec_env(kwargs)
207
+ self._actions_space = self.env.action_space
208
+ kwargs = {'seed': 0, 'buffer_size': 1}
209
+
210
+ self._model = self.algorithm.load(self._model_file_path, env=self.env, custom_objects=custom_objects, device=device, **kwargs)
211
+ test(self.env)
212
+ else:
213
+ self.load_model()
214
+ else:
215
+ # Stop training when the model reaches the reward threshold
216
+ # callback_on_best = StopTrainingOnRewardThreshold(reward_threshold=self.reward_threshold, verbose=1)
217
+ # eval_callback = EvalCallback(self.env, best_model_save_path="./logs/",
218
+ # log_path="./logs/", eval_freq=500, callback_on_new_best=callback_on_best, verbose=1, render=True)
219
+ # self._model.learn(total_timesteps=self.num_timesteps, progress_bar=True, callback=eval_callback)
220
+ print(f"No existing model in {self._model_file_path}, starting learning")
221
+ self._model.learn(total_timesteps=self.num_timesteps, progress_bar=True) # comment this in a normal env
222
+ self.save_model()
223
+
224
+ def get_mean_and_std_dev(self, observation):
225
+ if self.algorithm == SAC:
226
+ tensor_observation, _ = self._model.actor.obs_to_tensor(observation)
227
+
228
+ mean_actions, log_std_dev, kwargs = self._model.actor.get_action_dist_params(tensor_observation)
229
+ probability_dist = self._model.actor.action_dist.proba_distribution(
230
+ mean_actions=mean_actions,
231
+ log_std=log_std_dev
232
+ )
233
+ actor_means = probability_dist.get_actions(True).cpu().detach().numpy()
234
+ log_std_dev = log_std_dev.cpu().detach().numpy()
235
+ elif self.algorithm == PPO:
236
+ self._model.policy.set_training_mode(False)
237
+ tensor_observation, _ = self._model.policy.obs_to_tensor(observation)
238
+ distribution = self._model.policy.get_distribution(tensor_observation)
239
+
240
+ actor_means = distribution.distribution.mean.cpu().detach().numpy()
241
+ log_std_dev = distribution.distribution.stddev.cpu().detach().numpy()
242
+ if isinstance(self._model.policy.action_space, gym.spaces.Box):
243
+ actor_means = np.clip(
244
+ actor_means,
245
+ self._model.policy.action_space.low,
246
+ self._model.policy.action_space.high
247
+ )
248
+ return actor_means, log_std_dev
249
+ else:
250
+ assert False
251
+ return actor_means, log_std_dev
252
+
253
+ # fits agents that generated observations in the form of: list of tuples, each tuple a single step\frame with size 2, comprised of obs and action.
254
+ # the function squashes the 2d array of obs and action in a 1d array, concatenating their values together for training.
255
+ def simplify_observation(self, observation):
256
+ return [np.concatenate((np.array(obs).reshape(obs.shape[-1]),np.array(action[0]).reshape(action[0].shape[-1]))) for (obs,action) in observation]
257
+
258
+ def generate_partial_observation(self, action_selection_method, percentage, is_consecutive, save_fig=False, fig_path=None, random_optimalism=True):
259
+ steps = self.generate_observation(action_selection_method, save_fig=save_fig, random_optimalism=random_optimalism, fig_path=fig_path) # steps are a full observation
260
+ return random_subset_with_order(steps, (int)(percentage * len(steps)), is_consecutive)
261
+
262
+ def generate_observation(self, action_selection_method: MethodType, random_optimalism, save_fig=False, env_prop=None,
263
+ fig_path=None, with_dict=False, desired=None) -> List[Tuple[np.ndarray, np.ndarray]]: # TODO make sure to add a linter to alert when a method doesn't accept or return the type it should
264
+ if save_fig == False:
265
+ assert fig_path == None, "You can't specify a vid path when you don't even save the figure."
266
+ else:
267
+ assert fig_path != None, "You need to specify a vid path when you save the figure."
268
+ try:
269
+ obs = self.env.reset()
270
+ change_goal_to_specific_desired(obs, desired)
271
+ except Exception as e:
272
+ kwargs = {"id": self.problem_name, "render_mode": "rgb_array"}
273
+ self.env = create_vec_env(kwargs)
274
+ obs = self.env.reset()
275
+ change_goal_to_specific_desired(obs, desired)
276
+ observations = []
277
+ is_successful_observation_made = False
278
+ num_of_insuccessful_attempts = 0
279
+ while not is_successful_observation_made:
280
+ is_successful_observation_made = True # start as true, if this isn't the case (crash/death/truncation instead of success)
281
+ if random_optimalism:
282
+ constant_initial_action = self.env.action_space.sample()
283
+ while True:
284
+ from gr_libs.metrics.metrics import stochastic_amplified_selection
285
+ deterministic = action_selection_method != stochastic_amplified_selection
286
+ action, _states = self._model.predict(obs, deterministic=deterministic)
287
+ if random_optimalism: # get the right direction and then start inserting noise to still get a relatively optimal plan
288
+ if len(observations) > 3:
289
+ for i in range(0, len(action[0])):
290
+ action[0][i] += random.uniform(-0.01 * action[0][i], 0.01 * action[0][i])
291
+ else: # just walk in a specific random direction to enable diverse plans
292
+ action = np.array(np.array([constant_initial_action]), None)
293
+ if with_dict: observations.append((obs, action))
294
+ else: observations.append((obs['observation'], action))
295
+ obs, reward, done, info = self.env.step(action)
296
+ change_goal_to_specific_desired(obs, desired)
297
+ if isinstance(done, np.ndarray): general_done = done[0]
298
+ else: general_done = done
299
+ if "success" in info[0].keys(): success_done = info[0]["success"]
300
+ elif "is_success" in info[0].keys(): success_done = info[0]["is_success"]
301
+ elif "step_task_completions" in info[0].keys(): success_done = info[0]["step_task_completions"]
302
+ else: raise NotImplementedError("no other option for any of the environments.")
303
+ success_done = self.change_done_by_specific_desired(obs, desired, success_done)
304
+ if general_done == True and success_done == False:
305
+ # it could be that the stochasticity inserted into the actions made the agent die/crash. we don't want this observation.
306
+ num_of_insuccessful_attempts += 1
307
+ # print(f"for agent for problem {self.problem_name}, its done {len(observations)} steps, and got to a situation where general_done != success_done, for the {num_of_insuccessful_attempts} time.")
308
+ if num_of_insuccessful_attempts > 50:
309
+ # print(f"got more then 10 insuccessful attempts. fuak!")
310
+ assert general_done == success_done, f"failed on goal: {obs['desired']}" # we want to make sure the episode is done only when the agent has actually succeeded with the task.
311
+ else:
312
+ # try again by breaking inner loop. everything is set up to be like the beginning of the function.
313
+ is_successful_observation_made = False
314
+ try:
315
+ obs = self.env.reset()
316
+ change_goal_to_specific_desired(obs, desired)
317
+ except Exception as e:
318
+ kwargs = {"id": self.problem_name, "render_mode": "rgb_array"}
319
+ self.env = create_vec_env(kwargs)
320
+ obs = self.env.reset()
321
+ change_goal_to_specific_desired(obs, desired)
322
+ observations = [] # we want to re-accumulate the observations from scratch, have another try
323
+ break
324
+ elif general_done == False and success_done == False:
325
+ continue
326
+ elif general_done == True and success_done == True:
327
+ if num_of_insuccessful_attempts > 0:
328
+ pass # print(f"after {num_of_insuccessful_attempts}, finally I succeeded!")
329
+ break
330
+ elif general_done == False and success_done == True:
331
+ assert HACK_HAPPENED == True # happens only if hack happened
332
+ break
333
+ # self.env.close()
334
+ if save_fig:
335
+ num_tries = 0
336
+ while True:
337
+ if num_tries >= 10:
338
+ assert False, "agent keeps failing on recording an optimal obs."
339
+ try:
340
+ self.record_video(fig_path, desired)
341
+ break
342
+ except Exception as e:
343
+ num_tries += 1
344
+ #print(f"sequence to {self.problem_name} is:\n\t{steps}\ngenerating image at {img_path}.")
345
+ print(f"generated sequence video at {fig_path}.")
346
+ self.env.close()
347
+ return observations
348
+
349
+ # def reset_with_goal_idx(self, goal_idx):
350
+ # self.env.set_options({"goal_idx": goal_idx})
351
+ # return self.env.reset()
352
+
353
+ class GCDeepRLAgent(DeepRLAgent):
354
+ def generate_partial_observation(self, action_selection_method, percentage, is_consecutive, goal_directed_problem=None, goal_directed_goal=None, save_fig=False, fig_path=None, random_optimalism=True):
355
+ steps = self.generate_observation(action_selection_method, save_fig=save_fig, fig_path=fig_path, random_optimalism=random_optimalism, goal_directed_problem=goal_directed_problem, goal_directed_goal=goal_directed_goal) # steps are a full observation
356
+ return random_subset_with_order(steps, (int)(percentage * len(steps)), is_consecutive)
357
+
358
+ def generate_observation(self, action_selection_method: MethodType, random_optimalism, env_prop=None, goal_directed_problem=None, goal_directed_goal=None,
359
+ save_fig = False, fig_path=None, with_dict=False):
360
+ # print(f"hyperparams:{hyperparams}")
361
+ if goal_directed_problem:
362
+ if save_fig:
363
+ assert fig_path != None, "You need to specify a vid path when you save the figure."
364
+ else:
365
+ assert fig_path == None
366
+ assert goal_directed_goal == None, "can't give goal directed goal and also goal directed problem for the sake of sequence generation by a general agent"
367
+ kwargs = {"id": goal_directed_problem, "render_mode": "rgb_array"}
368
+ self.env = create_vec_env(kwargs)
369
+ orig_env = self.env
370
+ observations = super().generate_observation(action_selection_method=action_selection_method, random_optimalism=random_optimalism,
371
+ save_fig=save_fig, fig_path=fig_path, with_dict=with_dict)
372
+ self.env = orig_env
373
+ else: #goal_directed_goal!=None
374
+ if save_fig:
375
+ assert fig_path != None, "You need to specify a vid path when you save the figure."
376
+ else:
377
+ assert fig_path == None
378
+ assert goal_directed_problem == None, "can't give goal directed goal and also goal directed problem for the sake of sequence generation by a general agent"
379
+ observations = super().generate_observation(action_selection_method=action_selection_method, random_optimalism=random_optimalism,
380
+ save_fig=save_fig, fig_path=fig_path, with_dict=with_dict, desired=goal_directed_goal) # TODO tutorial on how to use the deepRLAgent for sequence generation and examination and plotting of the sequence
381
+ return observations
382
+
383
+
384
+ if __name__ == "__main__":
385
+ package_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
386
+ print("this is package root:" + package_root)
387
+ if package_root not in sys.path:
388
+ sys.path.insert(0, package_root)
389
+
390
+ from gr_libs.ml.utils.storage import get_agent_model_dir, set_global_storage_configs
391
+
392
+ set_global_storage_configs("graml", "fragmented_partial_obs", "inference_same_length", "learn_diff_length")
393
+ agent = DeepRLAgent(domain_name="point_maze", problem_name="PointMaze-FourRoomsEnvDense-11x11-Goal-9x1", algorithm=SAC, num_timesteps=200000)
394
+ agent.learn()
395
+ agent.record_video("")
@@ -0,0 +1,2 @@
1
+ from gr_libs.ml.neural.utils.dictlist import DictList
2
+ from gr_libs.ml.neural.utils.penv import ParallelEnv
@@ -0,0 +1,33 @@
1
+ class DictList(dict):
2
+ """A dictionnary of lists of same size. Dictionnary items can be
3
+ accessed using `.` notation and list items using `[]` notation.
4
+
5
+ Example:
6
+ >>> d = DictList({"a": [[1, 2], [3, 4]], "b": [[5], [6]]})
7
+ >>> d.a
8
+ [[1, 2], [3, 4]]
9
+ >>> d[0]
10
+ DictList({"a": [1, 2], "b": [5]})
11
+ """
12
+
13
+ __getattr__ = dict.__getitem__
14
+ __setattr__ = dict.__setitem__
15
+
16
+ def __len__(self):
17
+ return len(next(iter(dict.values(self))))
18
+
19
+ def __getitem__(self, index):
20
+ return DictList({key: value[index] for key, value in dict.items(self)})
21
+
22
+ def __setitem__(self, index, d):
23
+ for key, value in d.items():
24
+ dict.__getitem__(self, key)[index] = value
25
+
26
+ def __reduce__(self):
27
+ # Custom serialization method for dill
28
+ return (DictList, (dict(self),)) # Serialize as (DictList, (dict(self),))
29
+
30
+ def __setstate__(self, state):
31
+ # Custom deserialization method for dill
32
+ data, = state
33
+ self.update(data)
@@ -0,0 +1,57 @@
1
+ import multiprocessing
2
+ import gymnasium as gym
3
+
4
+ #multiprocessing.set_start_method("fork")
5
+
6
+
7
+ def worker(conn, env):
8
+ while True:
9
+ cmd, data = conn.recv()
10
+ if cmd == "step":
11
+ obs, reward, terminated, truncated, info = env.step(data)
12
+ if terminated or truncated:
13
+ obs, _ = env.reset()
14
+ conn.send((obs, reward, terminated, truncated, info))
15
+ elif cmd == "reset":
16
+ obs, _ = env.reset()
17
+ conn.send(obs)
18
+ else:
19
+ raise NotImplementedError
20
+
21
+
22
+ class ParallelEnv(gym.Env):
23
+ """A concurrent execution of environments in multiple processes."""
24
+
25
+ def __init__(self, envs):
26
+ assert len(envs) >= 1, "No environment given."
27
+
28
+ self.envs = envs
29
+ self.observation_space = self.envs[0].observation_space
30
+ self.action_space = self.envs[0].action_space
31
+
32
+ self.locals = []
33
+ for env in self.envs[1:]:
34
+ local, remote = multiprocessing.Pipe()
35
+ self.locals.append(local)
36
+ p = multiprocessing.Process(target=worker, args=(remote, env))
37
+ p.daemon = True
38
+ p.start()
39
+ remote.close()
40
+
41
+ def reset(self):
42
+ for local in self.locals:
43
+ local.send(("reset", None))
44
+ results = [self.envs[0].reset()[0]] + [local.recv() for local in self.locals]
45
+ return results
46
+
47
+ def step(self, actions):
48
+ for local, action in zip(self.locals, actions[1:]):
49
+ local.send(("step", action))
50
+ obs, reward, terminated, truncated, info = self.envs[0].step(actions[0])
51
+ if terminated or truncated:
52
+ obs, _ = self.envs[0].reset()
53
+ results = zip(*[(obs, reward, terminated, truncated, info)] + [local.recv() for local in self.locals])
54
+ return results
55
+
56
+ def render(self):
57
+ raise NotImplementedError
File without changes
File without changes