gr-libs 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,22 @@
1
+ import importlib.metadata
2
+ import warnings
3
+
4
+ def is_extra_installed(package: str, extra: str) -> bool:
5
+ """Check if an extra was installed for a given package."""
6
+ try:
7
+ # Get metadata for the installed package
8
+ dist = importlib.metadata.metadata(package)
9
+ requires = dist.get_all("Requires-Dist", []) # Dependencies listed in the package metadata
10
+ return any(extra in req for req in requires)
11
+ except importlib.metadata.PackageNotFoundError:
12
+ return False # The package is not installed
13
+
14
+ # Check if `gr_libs[minigrid]` was installed
15
+ for env in ["minigrid", "panda", "parking", "point_maze"]:
16
+ if is_extra_installed("gr_libs", f"gr_envs[{env}]"):
17
+ try:
18
+ importlib.import_module(f"gr_envs.{env}_scripts.envs")
19
+ except ImportError:
20
+ raise ImportError(f"gr_libs[{env}] was not installed, but gr_libs[{env}] requires it! if you messed with gr_libs installation, you can reinstall gr_libs.")
21
+ else:
22
+ warnings.warn(f"gr_libs[{env}] was not installed, skipping {env} imports.", RuntimeWarning)
@@ -168,8 +168,6 @@ class PandaProperty(GCEnvProperty):
168
168
 
169
169
 
170
170
  class ParkingProperty(GCEnvProperty):
171
- # def str_to_goal(self): # TODO not use it, goal is not a part of the env property anymore.
172
- # return self.name.split("-")[-2]
173
171
 
174
172
  def __init__(self, name):
175
173
  super().__init__(name)
@@ -13,11 +13,6 @@ if __name__ != "__main__":
13
13
  from gr_libs.ml.utils.format import random_subset_with_order
14
14
  from stable_baselines3 import SAC, PPO
15
15
  from stable_baselines3.common.vec_env import DummyVecEnv
16
- from gr_envs.custom_env_wrappers.flat_obs_wrapper import CombineAchievedGoalAndObservationWrapper
17
-
18
- # important for registration of envs! do not remove lad
19
- import gr_envs.maze_scripts.envs.maze
20
- import gr_envs.highway_env_scripts.envs.parking_env
21
16
  from gr_libs.ml.utils import device
22
17
 
23
18
  # built-in python modules
@@ -32,13 +27,15 @@ def create_vec_env(kwargs):
32
27
  return DummyVecEnv([lambda: env])
33
28
 
34
29
  def change_goal_to_specific_desired(obs, desired):
35
- try:
36
- if desired!=None: obs['desired_goal'] = desired
37
- except Exception as e:
38
- try:
39
- if all(desired!=None): obs['desired_goal'] = desired
40
- except Exception as e:
41
- if all([desiredy!=None for desiredish in desired for desiredy in desiredish]): obs['desired_goal'] = desired
30
+ if desired is not None:
31
+ obs['desired_goal'] = desired
32
+ # try:
33
+ # if desired!=None: obs['desired_goal'] = desired
34
+ # except Exception as e:
35
+ # try:
36
+ # if all(desired!=None): obs['desired_goal'] = desired
37
+ # except Exception as e:
38
+ # if all([desiredy!=None for desiredish in desired for desiredy in desiredish]): obs['desired_goal'] = desired
42
39
 
43
40
 
44
41
  NETWORK_SETUP = {
@@ -265,6 +262,7 @@ class DeepRLAgent():
265
262
  assert fig_path == None, "You can't specify a vid path when you don't even save the figure."
266
263
  else:
267
264
  assert fig_path != None, "You need to specify a vid path when you save the figure."
265
+ # The try-except is a bug fix for the env not being reset properly in panda. If someone wants to check why and provide a robust solution they're welcome.
268
266
  try:
269
267
  obs = self.env.reset()
270
268
  change_goal_to_specific_desired(obs, desired)
@@ -103,7 +103,6 @@ class Graml(LearningRecognizer):
103
103
  self.plans_dict[f"{true_goal}_true"] = true_sequence
104
104
 
105
105
  with open(embeddings_path + f'/{true_goal}_{percentage}_plans_dict.pkl', 'wb') as plans_file:
106
- # TODO erase AGENT_BASED macros
107
106
  to_dump = {}
108
107
  for goal, obss in self.plans_dict.items():
109
108
  if goal == f"{true_goal}_true":
@@ -243,7 +242,7 @@ class GCGraml(Graml, GaAdaptingRecognizer):
243
242
  if num_timesteps != None: kwargs["num_timesteps"] = num_timesteps
244
243
  gc_agent = self.rl_agent_type(**kwargs)
245
244
  gc_agent.learn()
246
- self.agents.append(ContextualAgent(problem_name=self.env_prop.name, problem_goal="general", agent=gc_agent)) # TODO change
245
+ self.agents.append(ContextualAgent(problem_name=self.env_prop.name, problem_goal="general", agent=gc_agent))
247
246
 
248
247
  def generate_sequences_library(self, goal: str) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
249
248
  problem_name = self.env_prop.goal_to_problem_str(goal)
@@ -18,7 +18,7 @@ class Recognizer(ABC):
18
18
  def inference_phase(self, inf_sequence, true_goal, percentage) -> str:
19
19
  pass
20
20
 
21
- class LearningRecognizer(Recognizer): # TODO add a class diagram with the inheritance of all calsses
21
+ class LearningRecognizer(Recognizer):
22
22
  def __init__(self, *args, **kwargs):
23
23
  super().__init__(*args, **kwargs)
24
24
 
@@ -26,7 +26,7 @@ class LearningRecognizer(Recognizer): # TODO add a class diagram with the inheri
26
26
  self.original_train_configs = train_configs
27
27
 
28
28
  # a recognizer that needs to train agents for every new goal as part of the goal adaptation phase (that's why it needs dynamic train configs)
29
- class GaAgentTrainerRecognizer(Recognizer): # TODO add a class diagram with the inheritance of all calsses
29
+ class GaAgentTrainerRecognizer(Recognizer):
30
30
  def __init__(self, *args, **kwargs):
31
31
  super().__init__(*args, **kwargs)
32
32
 
@@ -37,7 +37,7 @@ class GaAgentTrainerRecognizer(Recognizer): # TODO add a class diagram with the
37
37
  def domain_learning_phase(self, base_goals: List[str], train_configs: List):
38
38
  super().domain_learning_phase(base_goals, train_configs)
39
39
 
40
- class GaAdaptingRecognizer(Recognizer): # TODO add a class diagram with the inheritance of all calsses
40
+ class GaAdaptingRecognizer(Recognizer):
41
41
  def __init__(self, *args, **kwargs):
42
42
  super().__init__(*args, **kwargs)
43
43
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gr_libs
3
- Version: 0.1.4
3
+ Version: 0.1.5
4
4
  Summary: Package with goal recognition frameworks baselines
5
5
  Author: Ben Nageris
6
6
  Author-email: Matan Shamir <matan.shamir@live.biu.ac.il>, Osher Elhadad <osher.elhadad@live.biu.ac.il>
@@ -8,8 +8,8 @@ evaluation/generate_task_specific_statistics_plots.py,sha256=rBsqaMe2irP_Cfo-icw
8
8
  evaluation/get_plans_images.py,sha256=BT-bGWuOPUAYpZVDwk7YMRBLdgKaDbNOBjMrtcl1Vjk,2346
9
9
  evaluation/increasing_and_decreasing_.py,sha256=fu1hkEjhOQC3jEsjiS7emW_UPRpVFCaae0d0E2MGZqI,2991
10
10
  gr_libs/__init__.py,sha256=-uKsQiHIL7yojbDwlTR-I8sj1WX9XT52PoFbPjtUTKo,145
11
- gr_libs/environment/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
- gr_libs/environment/environment.py,sha256=0-3kZJCmLMF9o0NignZaMEwQb94NZQ2gmsOyfjPXKDI,6919
11
+ gr_libs/environment/__init__.py,sha256=oxEKmdvzQLKbbMxedqEf3bGsSJvp2XL9Bxr5JEO6a5o,1038
12
+ gr_libs/environment/environment.py,sha256=Ca9m6W8KEt4le0HFSAUvSHW5lSHg_wwJaBqFSq1KlUg,6781
13
13
  gr_libs/environment/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  gr_libs/environment/utils/utils.py,sha256=4yM3s30KjyuEmWR8UuICE5rR03zsLi3tzqNDvBkdPcU,537
15
15
  gr_libs/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -20,7 +20,7 @@ gr_libs/ml/consts.py,sha256=mrbZk8n6QoGzLGaKmaxq4QlAsBbk4fhkCgXLuO9jXKw,365
20
20
  gr_libs/ml/base/__init__.py,sha256=MfIYhl_UqH8H7YoTCih8wBFA_gpTOUFq8Ph0_Nq0XQk,68
21
21
  gr_libs/ml/base/rl_agent.py,sha256=u9rnb-ma9iDM5b_BlwjcTJGSFezIGrxXINw6b-Dbl8s,1598
22
22
  gr_libs/ml/neural/__init__.py,sha256=g-0D5oFX8W52To4OR8vO8kDoBLSxAupVqwcQw8XjT5E,180
23
- gr_libs/ml/neural/deep_rl_learner.py,sha256=_d6LdbMPqN4qJlOI_UqSD7o0yzIa7EjRONdFSVYO_Ag,20677
23
+ gr_libs/ml/neural/deep_rl_learner.py,sha256=b41_b4GVlYqxhjrr1_YMcGdU9iwcMXsf3zH8D2kEucs,20659
24
24
  gr_libs/ml/neural/utils/__init__.py,sha256=bJgPfRnmfDQxdnb0OyRGwzgebEc1PnlO7-GpqszPBcc,106
25
25
  gr_libs/ml/neural/utils/dictlist.py,sha256=WpHfdWpVZ_T3PcSnOQUC--ro_tsS0dvam2WG3LcsHDw,1039
26
26
  gr_libs/ml/neural/utils/penv.py,sha256=R1uW8sePQqvTlJjpAuMx16eDU6TuGAjQF3hTR1QasMo,1862
@@ -43,21 +43,21 @@ gr_libs/ml/utils/math.py,sha256=n62zssVOLHnUb4dPofAoFhoLOKl5n_xBzaKQOUQBoNc,440
43
43
  gr_libs/ml/utils/other.py,sha256=HKUfeLBbd4DgJxSTs3ya9KQ85Acx4TjycRrtGD9WQ3s,505
44
44
  gr_libs/ml/utils/storage.py,sha256=oCdvL_ypCglnSJsyyXzNyV_UJASTfioa3yJhFlFso64,4277
45
45
  gr_libs/recognizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
- gr_libs/recognizer/recognizer.py,sha256=ysJYOGe5OlERMAeMwclKpwqw2tQvbSvGnLZrq4qP0xk,1895
46
+ gr_libs/recognizer/recognizer.py,sha256=2lMlJNE7X13Y3FMpNfql-WAYw23NImIiomnVmCI75bM,1706
47
47
  gr_libs/recognizer/recognizer_doc.md,sha256=RnTvbZhl2opvU7-QT4pULCV5HCdJTw2dsu8WQOOiR3E,2521
48
48
  gr_libs/recognizer/gr_as_rl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
49
  gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py,sha256=84GdfohC2dZoNH_QEo7GpSt8nZWdfqSRKCTY99X_iME,5215
50
50
  gr_libs/recognizer/graml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
51
  gr_libs/recognizer/graml/gr_dataset.py,sha256=lG6m3ulxFELpH1oURnlcmNDWOrxyuzvlAR28ZTqB7L8,7224
52
- gr_libs/recognizer/graml/graml_recognizer.py,sha256=OblODjvWSi8KZ5ByvGyL5Mm1GY3IZb8yspfk81Dbebg,15721
52
+ gr_libs/recognizer/graml/graml_recognizer.py,sha256=SGs7rtkA73lbCv9HISa6dfjVUJUhlH54QriVsoGVRss,15672
53
53
  gr_libs/recognizer/utils/__init__.py,sha256=ewSroxL7aATvvm-Xzc1_-61mP2LU2U28YaOEqvVVDB0,41
54
54
  gr_libs/recognizer/utils/format.py,sha256=e0AnqtPeYoJsV9Z7cEBpgbzTM0hLNxFIjn07fQ3YbQw,492
55
- tutorials/graml_minigrid_tutorial.py,sha256=iLs7mOYNAZ5wtxtSwiE8tvbLMIueQ5VmVmDnBBjWG_4,1589
56
- tutorials/graml_panda_tutorial.py,sha256=DuHVDLe49qwgWouLxwalqdT1P4dlNOOMdgDc3ocNX5Y,1820
57
- tutorials/graml_parking_tutorial.py,sha256=sQ496DNuAo9GZ_0iUZ_6Hqe5zFxIYZ_pBIHQscQvR4o,2501
58
- tutorials/graml_point_maze_tutorial.py,sha256=TnLT9FdDj6AF8lm0lDIZum4ouPE5rye4RBH8z4Exj2Y,2713
55
+ tutorials/graml_minigrid_tutorial.py,sha256=0jSlsKd0H3DXA7rPSnw09y56pTSXvtXCFOKSuUvfDjs,1597
56
+ tutorials/graml_panda_tutorial.py,sha256=LwNQPb7Kdg7X8jY7Zk13-8uBfDP8LMNzwgH-u3KOcjw,1861
57
+ tutorials/graml_parking_tutorial.py,sha256=fsLbASIESUGnZe09eEhMcqxpU0NP8k1IQyGgJq_AFVs,1549
58
+ tutorials/graml_point_maze_tutorial.py,sha256=gY8GCHnq32xyY7gSw3i3DL98TlfwgMkhO17csyV2QBA,1631
59
59
  tutorials/graql_minigrid_tutorial.py,sha256=VoXbEgL_hjQLfau6WohXxPK8rrv1VLA874F8PZ7ZtPk,1421
60
- gr_libs-0.1.4.dist-info/METADATA,sha256=wH7aEvKh4kRTXBs75uPSBW87s23dgcsKRqgkVSwKDQc,8905
61
- gr_libs-0.1.4.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
62
- gr_libs-0.1.4.dist-info/top_level.txt,sha256=k7_l789QSJEr9JrtvsRMxNoTIDwNduq8mhIN-YoPJUM,29
63
- gr_libs-0.1.4.dist-info/RECORD,,
60
+ gr_libs-0.1.5.dist-info/METADATA,sha256=h4QUMjuxouD3o1iKg2F3doJbKLbezkV1FaGKm1oBL0o,8905
61
+ gr_libs-0.1.5.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
62
+ gr_libs-0.1.5.dist-info/top_level.txt,sha256=k7_l789QSJEr9JrtvsRMxNoTIDwNduq8mhIN-YoPJUM,29
63
+ gr_libs-0.1.5.dist-info/RECORD,,
@@ -1,11 +1,11 @@
1
- from gr_libs.environment.environment import QLEARNING
1
+ from gr_libs.environment.environment import MINIGRID, QLEARNING
2
2
  from gr_libs.metrics.metrics import stochastic_amplified_selection
3
3
  from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
4
4
  from gr_libs.ml.utils.format import random_subset_with_order
5
5
  from gr_libs import ExpertBasedGraml
6
6
 
7
7
  recognizer = ExpertBasedGraml(
8
- domain_name="minigrid",
8
+ domain_name=MINIGRID,
9
9
  env_name="MiniGrid-SimpleCrossingS13N4"
10
10
  )
11
11
 
@@ -1,7 +1,8 @@
1
1
 
2
2
  import numpy as np
3
3
  from stable_baselines3 import PPO, SAC
4
- from gr_libs.environment.environment import PANDA, GCEnvProperty, PandaProperty
4
+ import gr_libs.environment.environment
5
+ from gr_libs.environment.environment import PANDA, EnvProperty, GCEnvProperty, PandaProperty
5
6
  from gr_libs.environment.utils.utils import domain_to_env_property
6
7
  from gr_libs.metrics.metrics import stochastic_amplified_selection
7
8
  from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
@@ -12,9 +13,13 @@ recognizer = GCGraml( # TODO make these tutorials into pytests
12
13
  domain_name=PANDA,
13
14
  env_name="PandaMyReachDense"
14
15
  )
15
- recognizer.domain_learning_phase(base_goals=[np.array([PandaProperty.sample_goal()]) for _ in range(1,30)],
16
- train_configs=[(SAC, 800000)])
17
- recognizer.goals_adaptation_phase(dynamic_goals=[np.array([[-0.1, -0.1, 0.1]]), np.array([[-0.1, 0.1, 0.1]]), np.array([[0.2, 0.2, 0.1]])])
16
+ recognizer.domain_learning_phase(
17
+ base_goals=[np.array([PandaProperty.sample_goal()]) for _ in range(1,30)],
18
+ train_configs=[(SAC, 800000)]
19
+ )
20
+ recognizer.goals_adaptation_phase(
21
+ dynamic_goals=[np.array([[-0.1, -0.1, 0.1]]), np.array([[-0.1, 0.1, 0.1]]), np.array([[0.2, 0.2, 0.1]])]
22
+ )
18
23
  # TD3 is different from recognizer and expert algorithms, which are SAC #
19
24
  property_type = domain_to_env_property(PANDA)
20
25
  env_property = property_type("PandaMyReachDense")
@@ -1,31 +1,27 @@
1
1
 
2
2
  from stable_baselines3 import PPO, SAC, TD3
3
- from gr_libs.environment.environment import EnvProperty, GCEnvProperty, ParkingProperty
3
+ from gr_libs.environment.environment import PARKING, EnvProperty, GCEnvProperty, ParkingProperty
4
4
  from gr_libs.metrics.metrics import stochastic_amplified_selection
5
5
  from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
6
6
  from gr_libs.ml.utils.format import random_subset_with_order
7
7
  from gr_libs.recognizer.graml.graml_recognizer import ExpertBasedGraml, GCGraml
8
8
 
9
- # Consider extracting all these to "default point_maze (or every other domain) variables" module which would simplify things like the problem_list_to_str_tuple function, sizes of inputs, etc.
10
9
  recognizer = GCGraml(
11
- env_name="parking", # TODO change to macros which are importable from some info or env module of enums.
12
- problems=[ParkingProperty("parking-v0")],
13
- train_configs=[(PPO, 400000)],
14
- gc_goal_set=[f"Parking-S-14-PC--GI-{i}-v0" for i in range(1,21)]
10
+ domain_name=PARKING,
11
+ env_name="Parking-S-14-PC-"
12
+ )
13
+
14
+ recognizer.domain_learning_phase(
15
+ [i for i in range(1,21)],
16
+ [(PPO, 200000)]
15
17
  )
16
- recognizer.domain_learning_phase()
17
18
  recognizer.goals_adaptation_phase(
18
- dynamic_goals_problems = [ParkingProperty(p) for p in ["Parking-S-14-PC--GI-1-v0",
19
- "Parking-S-14-PC--GI-4-v0",
20
- "Parking-S-14-PC--GI-8-v0",
21
- "Parking-S-14-PC--GI-11-v0",
22
- "Parking-S-14-PC--GI-14-v0",
23
- "Parking-S-14-PC--GI-18-v0",
24
- "Parking-S-14-PC--GI-21-v0"]] # TODO detach the goal from the environment instance in every gym env, add the ability to alter it from outside.
25
- #dynamic_train_configs=[(SAC, 400000) for _ in range(7)] # for expert sequence generation. TODO change to require this only if sequence generation method is EXPERT.
19
+ dynamic_goals = ["1", "11", "21"]
20
+ # no need for expert sequence generation since GCRL is used
26
21
  )
22
+
27
23
  # TD3 is different from recognizer and expert algorithms, which are SAC #
28
- actor = DeepRLAgent(env_name="parking", problem_name="Parking-S-14-PC--GI-8-v0", algorithm=TD3, num_timesteps=400000)
24
+ actor = DeepRLAgent(domain_name="parking", problem_name="Parking-S-14-PC--GI-11-v0", algorithm=TD3, num_timesteps=400000)
29
25
  actor.learn()
30
26
  # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
31
27
  full_sequence = actor.generate_observation(
@@ -34,5 +30,5 @@ full_sequence = actor.generate_observation(
34
30
  )
35
31
 
36
32
  partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
37
- closest_goal = recognizer.inference_phase(partial_sequence, ParkingProperty("Parking-S-14-PC--GI-8-v0").str_to_goal(), 0.5)
38
- print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: 8")
33
+ closest_goal = recognizer.inference_phase(partial_sequence, ParkingProperty("Parking-S-14-PC--GI-11-v0").str_to_goal(), 0.5)
34
+ print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: 11")
@@ -1,36 +1,28 @@
1
1
 
2
2
  from stable_baselines3 import SAC, TD3
3
- from gr_libs.environment.utils.format import maze_str_to_goal
3
+ from gr_libs.environment.environment import POINT_MAZE, PointMazeProperty
4
4
  from gr_libs.metrics.metrics import stochastic_amplified_selection
5
5
  from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
6
6
  from gr_libs.ml.utils.format import random_subset_with_order
7
7
  from gr_libs.recognizer.graml.graml_recognizer import ExpertBasedGraml
8
8
 
9
- # Consider extracting all these to "default point_maze (or every other domain) variables" module which would simplify things like the problem_list_to_str_tuple function, sizes of inputs, etc.
10
9
  recognizer = ExpertBasedGraml(
11
- env_name="point_maze", # TODO change to macros which are importable from some info or env module of enums.
12
- problems=[("PointMaze-FourRoomsEnvDense-11x11-Goal-9x1"),
13
- ("PointMaze-FourRoomsEnv-11x11-Goal-9x9"), # this one doesn't work with dense rewards because of encountering local minima
14
- ("PointMaze-FourRoomsEnvDense-11x11-Goal-1x9"),
15
- ("PointMaze-FourRoomsEnvDense-11x11-Goal-3x3"),
16
- ("PointMaze-FourRoomsEnvDense-11x11-Goal-3x4"),
17
- ("PointMaze-FourRoomsEnvDense-11x11-Goal-8x2"),
18
- ("PointMaze-FourRoomsEnvDense-11x11-Goal-3x7"),
19
- ("PointMaze-FourRoomsEnvDense-11x11-Goal-2x8")],
20
- task_str_to_goal=maze_str_to_goal,
21
- method=DeepRLAgent,
22
- collect_statistics=False,
23
- train_configs=[(SAC, 200000) for _ in range(8)],
10
+ domain_name=POINT_MAZE,
11
+ env_name="PointMaze-FourRoomsEnvDense-11x11"
24
12
  )
25
- recognizer.domain_learning_phase()
13
+
14
+ recognizer.domain_learning_phase(
15
+ [(9,1), (9,9), (1,9), (3,3), (3,4), (8,2), (3,7), (2,8)],
16
+ [(SAC, 200000) for _ in range(8)]
17
+ )
18
+
26
19
  recognizer.goals_adaptation_phase(
27
- dynamic_goals_problems = ["PointMaze-FourRoomsEnvDense-11x11-Goal-4x4",
28
- "PointMaze-FourRoomsEnvDense-11x11-Goal-7x3",
29
- "PointMaze-FourRoomsEnvDense-11x11-Goal-3x7"],
30
- dynamic_train_configs=[(SAC, 200000) for _ in range(3)] # for expert sequence generation. TODO change to require this only if sequence generation method is EXPERT.
20
+ dynamic_goals = [(4,4), (7,3), (3,7)],
21
+ dynamic_train_configs=[(SAC, 200000) for _ in range(3)] # for expert sequence generation.
31
22
  )
23
+
32
24
  # TD3 is different from recognizer and expert algorithms, which are SAC #
33
- actor = DeepRLAgent(env_name="point_maze", problem_name="PointMaze-FourRoomsEnvDense-11x11-Goal-4x4", algorithm=TD3, num_timesteps=200000)
25
+ actor = DeepRLAgent(domain_name="point_maze", problem_name="PointMaze-FourRoomsEnvDense-11x11-Goal-4x4", algorithm=TD3, num_timesteps=200000)
34
26
  actor.learn()
35
27
  # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
36
28
  full_sequence = actor.generate_observation(
@@ -39,5 +31,5 @@ full_sequence = actor.generate_observation(
39
31
  )
40
32
 
41
33
  partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)))
42
- closest_goal = recognizer.inference_phase(partial_sequence, maze_str_to_goal("PointMaze-FourRoomsEnvDense-11x11-Goal-4x4"), 0.5)
34
+ closest_goal = recognizer.inference_phase(partial_sequence, PointMazeProperty("PointMaze-FourRoomsEnvDense-11x11-Goal-4x4").str_to_goal(), 0.5)
43
35
  print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (4, 4)")