gr-libs 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gr_libs/environment/__init__.py +22 -0
- gr_libs/environment/environment.py +0 -2
- gr_libs/ml/neural/deep_rl_learner.py +10 -12
- gr_libs/recognizer/graml/graml_recognizer.py +1 -2
- gr_libs/recognizer/recognizer.py +3 -3
- {gr_libs-0.1.4.dist-info → gr_libs-0.1.5.dist-info}/METADATA +1 -1
- {gr_libs-0.1.4.dist-info → gr_libs-0.1.5.dist-info}/RECORD +13 -13
- tutorials/graml_minigrid_tutorial.py +2 -2
- tutorials/graml_panda_tutorial.py +9 -4
- tutorials/graml_parking_tutorial.py +14 -18
- tutorials/graml_point_maze_tutorial.py +14 -22
- {gr_libs-0.1.4.dist-info → gr_libs-0.1.5.dist-info}/WHEEL +0 -0
- {gr_libs-0.1.4.dist-info → gr_libs-0.1.5.dist-info}/top_level.txt +0 -0
gr_libs/environment/__init__.py
CHANGED
@@ -0,0 +1,22 @@
|
|
1
|
+
import importlib.metadata
|
2
|
+
import warnings
|
3
|
+
|
4
|
+
def is_extra_installed(package: str, extra: str) -> bool:
|
5
|
+
"""Check if an extra was installed for a given package."""
|
6
|
+
try:
|
7
|
+
# Get metadata for the installed package
|
8
|
+
dist = importlib.metadata.metadata(package)
|
9
|
+
requires = dist.get_all("Requires-Dist", []) # Dependencies listed in the package metadata
|
10
|
+
return any(extra in req for req in requires)
|
11
|
+
except importlib.metadata.PackageNotFoundError:
|
12
|
+
return False # The package is not installed
|
13
|
+
|
14
|
+
# Check if `gr_libs[minigrid]` was installed
|
15
|
+
for env in ["minigrid", "panda", "parking", "point_maze"]:
|
16
|
+
if is_extra_installed("gr_libs", f"gr_envs[{env}]"):
|
17
|
+
try:
|
18
|
+
importlib.import_module(f"gr_envs.{env}_scripts.envs")
|
19
|
+
except ImportError:
|
20
|
+
raise ImportError(f"gr_libs[{env}] was not installed, but gr_libs[{env}] requires it! if you messed with gr_libs installation, you can reinstall gr_libs.")
|
21
|
+
else:
|
22
|
+
warnings.warn(f"gr_libs[{env}] was not installed, skipping {env} imports.", RuntimeWarning)
|
@@ -168,8 +168,6 @@ class PandaProperty(GCEnvProperty):
|
|
168
168
|
|
169
169
|
|
170
170
|
class ParkingProperty(GCEnvProperty):
|
171
|
-
# def str_to_goal(self): # TODO not use it, goal is not a part of the env property anymore.
|
172
|
-
# return self.name.split("-")[-2]
|
173
171
|
|
174
172
|
def __init__(self, name):
|
175
173
|
super().__init__(name)
|
@@ -13,11 +13,6 @@ if __name__ != "__main__":
|
|
13
13
|
from gr_libs.ml.utils.format import random_subset_with_order
|
14
14
|
from stable_baselines3 import SAC, PPO
|
15
15
|
from stable_baselines3.common.vec_env import DummyVecEnv
|
16
|
-
from gr_envs.custom_env_wrappers.flat_obs_wrapper import CombineAchievedGoalAndObservationWrapper
|
17
|
-
|
18
|
-
# important for registration of envs! do not remove lad
|
19
|
-
import gr_envs.maze_scripts.envs.maze
|
20
|
-
import gr_envs.highway_env_scripts.envs.parking_env
|
21
16
|
from gr_libs.ml.utils import device
|
22
17
|
|
23
18
|
# built-in python modules
|
@@ -32,13 +27,15 @@ def create_vec_env(kwargs):
|
|
32
27
|
return DummyVecEnv([lambda: env])
|
33
28
|
|
34
29
|
def change_goal_to_specific_desired(obs, desired):
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
30
|
+
if desired is not None:
|
31
|
+
obs['desired_goal'] = desired
|
32
|
+
# try:
|
33
|
+
# if desired!=None: obs['desired_goal'] = desired
|
34
|
+
# except Exception as e:
|
35
|
+
# try:
|
36
|
+
# if all(desired!=None): obs['desired_goal'] = desired
|
37
|
+
# except Exception as e:
|
38
|
+
# if all([desiredy!=None for desiredish in desired for desiredy in desiredish]): obs['desired_goal'] = desired
|
42
39
|
|
43
40
|
|
44
41
|
NETWORK_SETUP = {
|
@@ -265,6 +262,7 @@ class DeepRLAgent():
|
|
265
262
|
assert fig_path == None, "You can't specify a vid path when you don't even save the figure."
|
266
263
|
else:
|
267
264
|
assert fig_path != None, "You need to specify a vid path when you save the figure."
|
265
|
+
# The try-except is a bug fix for the env not being reset properly in panda. If someone wants to check why and provide a robust solution they're welcome.
|
268
266
|
try:
|
269
267
|
obs = self.env.reset()
|
270
268
|
change_goal_to_specific_desired(obs, desired)
|
@@ -103,7 +103,6 @@ class Graml(LearningRecognizer):
|
|
103
103
|
self.plans_dict[f"{true_goal}_true"] = true_sequence
|
104
104
|
|
105
105
|
with open(embeddings_path + f'/{true_goal}_{percentage}_plans_dict.pkl', 'wb') as plans_file:
|
106
|
-
# TODO erase AGENT_BASED macros
|
107
106
|
to_dump = {}
|
108
107
|
for goal, obss in self.plans_dict.items():
|
109
108
|
if goal == f"{true_goal}_true":
|
@@ -243,7 +242,7 @@ class GCGraml(Graml, GaAdaptingRecognizer):
|
|
243
242
|
if num_timesteps != None: kwargs["num_timesteps"] = num_timesteps
|
244
243
|
gc_agent = self.rl_agent_type(**kwargs)
|
245
244
|
gc_agent.learn()
|
246
|
-
self.agents.append(ContextualAgent(problem_name=self.env_prop.name, problem_goal="general", agent=gc_agent))
|
245
|
+
self.agents.append(ContextualAgent(problem_name=self.env_prop.name, problem_goal="general", agent=gc_agent))
|
247
246
|
|
248
247
|
def generate_sequences_library(self, goal: str) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
|
249
248
|
problem_name = self.env_prop.goal_to_problem_str(goal)
|
gr_libs/recognizer/recognizer.py
CHANGED
@@ -18,7 +18,7 @@ class Recognizer(ABC):
|
|
18
18
|
def inference_phase(self, inf_sequence, true_goal, percentage) -> str:
|
19
19
|
pass
|
20
20
|
|
21
|
-
class LearningRecognizer(Recognizer):
|
21
|
+
class LearningRecognizer(Recognizer):
|
22
22
|
def __init__(self, *args, **kwargs):
|
23
23
|
super().__init__(*args, **kwargs)
|
24
24
|
|
@@ -26,7 +26,7 @@ class LearningRecognizer(Recognizer): # TODO add a class diagram with the inheri
|
|
26
26
|
self.original_train_configs = train_configs
|
27
27
|
|
28
28
|
# a recognizer that needs to train agents for every new goal as part of the goal adaptation phase (that's why it needs dynamic train configs)
|
29
|
-
class GaAgentTrainerRecognizer(Recognizer):
|
29
|
+
class GaAgentTrainerRecognizer(Recognizer):
|
30
30
|
def __init__(self, *args, **kwargs):
|
31
31
|
super().__init__(*args, **kwargs)
|
32
32
|
|
@@ -37,7 +37,7 @@ class GaAgentTrainerRecognizer(Recognizer): # TODO add a class diagram with the
|
|
37
37
|
def domain_learning_phase(self, base_goals: List[str], train_configs: List):
|
38
38
|
super().domain_learning_phase(base_goals, train_configs)
|
39
39
|
|
40
|
-
class GaAdaptingRecognizer(Recognizer):
|
40
|
+
class GaAdaptingRecognizer(Recognizer):
|
41
41
|
def __init__(self, *args, **kwargs):
|
42
42
|
super().__init__(*args, **kwargs)
|
43
43
|
|
@@ -8,8 +8,8 @@ evaluation/generate_task_specific_statistics_plots.py,sha256=rBsqaMe2irP_Cfo-icw
|
|
8
8
|
evaluation/get_plans_images.py,sha256=BT-bGWuOPUAYpZVDwk7YMRBLdgKaDbNOBjMrtcl1Vjk,2346
|
9
9
|
evaluation/increasing_and_decreasing_.py,sha256=fu1hkEjhOQC3jEsjiS7emW_UPRpVFCaae0d0E2MGZqI,2991
|
10
10
|
gr_libs/__init__.py,sha256=-uKsQiHIL7yojbDwlTR-I8sj1WX9XT52PoFbPjtUTKo,145
|
11
|
-
gr_libs/environment/__init__.py,sha256=
|
12
|
-
gr_libs/environment/environment.py,sha256=
|
11
|
+
gr_libs/environment/__init__.py,sha256=oxEKmdvzQLKbbMxedqEf3bGsSJvp2XL9Bxr5JEO6a5o,1038
|
12
|
+
gr_libs/environment/environment.py,sha256=Ca9m6W8KEt4le0HFSAUvSHW5lSHg_wwJaBqFSq1KlUg,6781
|
13
13
|
gr_libs/environment/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
14
|
gr_libs/environment/utils/utils.py,sha256=4yM3s30KjyuEmWR8UuICE5rR03zsLi3tzqNDvBkdPcU,537
|
15
15
|
gr_libs/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -20,7 +20,7 @@ gr_libs/ml/consts.py,sha256=mrbZk8n6QoGzLGaKmaxq4QlAsBbk4fhkCgXLuO9jXKw,365
|
|
20
20
|
gr_libs/ml/base/__init__.py,sha256=MfIYhl_UqH8H7YoTCih8wBFA_gpTOUFq8Ph0_Nq0XQk,68
|
21
21
|
gr_libs/ml/base/rl_agent.py,sha256=u9rnb-ma9iDM5b_BlwjcTJGSFezIGrxXINw6b-Dbl8s,1598
|
22
22
|
gr_libs/ml/neural/__init__.py,sha256=g-0D5oFX8W52To4OR8vO8kDoBLSxAupVqwcQw8XjT5E,180
|
23
|
-
gr_libs/ml/neural/deep_rl_learner.py,sha256=
|
23
|
+
gr_libs/ml/neural/deep_rl_learner.py,sha256=b41_b4GVlYqxhjrr1_YMcGdU9iwcMXsf3zH8D2kEucs,20659
|
24
24
|
gr_libs/ml/neural/utils/__init__.py,sha256=bJgPfRnmfDQxdnb0OyRGwzgebEc1PnlO7-GpqszPBcc,106
|
25
25
|
gr_libs/ml/neural/utils/dictlist.py,sha256=WpHfdWpVZ_T3PcSnOQUC--ro_tsS0dvam2WG3LcsHDw,1039
|
26
26
|
gr_libs/ml/neural/utils/penv.py,sha256=R1uW8sePQqvTlJjpAuMx16eDU6TuGAjQF3hTR1QasMo,1862
|
@@ -43,21 +43,21 @@ gr_libs/ml/utils/math.py,sha256=n62zssVOLHnUb4dPofAoFhoLOKl5n_xBzaKQOUQBoNc,440
|
|
43
43
|
gr_libs/ml/utils/other.py,sha256=HKUfeLBbd4DgJxSTs3ya9KQ85Acx4TjycRrtGD9WQ3s,505
|
44
44
|
gr_libs/ml/utils/storage.py,sha256=oCdvL_ypCglnSJsyyXzNyV_UJASTfioa3yJhFlFso64,4277
|
45
45
|
gr_libs/recognizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
46
|
-
gr_libs/recognizer/recognizer.py,sha256=
|
46
|
+
gr_libs/recognizer/recognizer.py,sha256=2lMlJNE7X13Y3FMpNfql-WAYw23NImIiomnVmCI75bM,1706
|
47
47
|
gr_libs/recognizer/recognizer_doc.md,sha256=RnTvbZhl2opvU7-QT4pULCV5HCdJTw2dsu8WQOOiR3E,2521
|
48
48
|
gr_libs/recognizer/gr_as_rl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
49
49
|
gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py,sha256=84GdfohC2dZoNH_QEo7GpSt8nZWdfqSRKCTY99X_iME,5215
|
50
50
|
gr_libs/recognizer/graml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
51
51
|
gr_libs/recognizer/graml/gr_dataset.py,sha256=lG6m3ulxFELpH1oURnlcmNDWOrxyuzvlAR28ZTqB7L8,7224
|
52
|
-
gr_libs/recognizer/graml/graml_recognizer.py,sha256=
|
52
|
+
gr_libs/recognizer/graml/graml_recognizer.py,sha256=SGs7rtkA73lbCv9HISa6dfjVUJUhlH54QriVsoGVRss,15672
|
53
53
|
gr_libs/recognizer/utils/__init__.py,sha256=ewSroxL7aATvvm-Xzc1_-61mP2LU2U28YaOEqvVVDB0,41
|
54
54
|
gr_libs/recognizer/utils/format.py,sha256=e0AnqtPeYoJsV9Z7cEBpgbzTM0hLNxFIjn07fQ3YbQw,492
|
55
|
-
tutorials/graml_minigrid_tutorial.py,sha256=
|
56
|
-
tutorials/graml_panda_tutorial.py,sha256=
|
57
|
-
tutorials/graml_parking_tutorial.py,sha256=
|
58
|
-
tutorials/graml_point_maze_tutorial.py,sha256=
|
55
|
+
tutorials/graml_minigrid_tutorial.py,sha256=0jSlsKd0H3DXA7rPSnw09y56pTSXvtXCFOKSuUvfDjs,1597
|
56
|
+
tutorials/graml_panda_tutorial.py,sha256=LwNQPb7Kdg7X8jY7Zk13-8uBfDP8LMNzwgH-u3KOcjw,1861
|
57
|
+
tutorials/graml_parking_tutorial.py,sha256=fsLbASIESUGnZe09eEhMcqxpU0NP8k1IQyGgJq_AFVs,1549
|
58
|
+
tutorials/graml_point_maze_tutorial.py,sha256=gY8GCHnq32xyY7gSw3i3DL98TlfwgMkhO17csyV2QBA,1631
|
59
59
|
tutorials/graql_minigrid_tutorial.py,sha256=VoXbEgL_hjQLfau6WohXxPK8rrv1VLA874F8PZ7ZtPk,1421
|
60
|
-
gr_libs-0.1.
|
61
|
-
gr_libs-0.1.
|
62
|
-
gr_libs-0.1.
|
63
|
-
gr_libs-0.1.
|
60
|
+
gr_libs-0.1.5.dist-info/METADATA,sha256=h4QUMjuxouD3o1iKg2F3doJbKLbezkV1FaGKm1oBL0o,8905
|
61
|
+
gr_libs-0.1.5.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
62
|
+
gr_libs-0.1.5.dist-info/top_level.txt,sha256=k7_l789QSJEr9JrtvsRMxNoTIDwNduq8mhIN-YoPJUM,29
|
63
|
+
gr_libs-0.1.5.dist-info/RECORD,,
|
@@ -1,11 +1,11 @@
|
|
1
|
-
from gr_libs.environment.environment import QLEARNING
|
1
|
+
from gr_libs.environment.environment import MINIGRID, QLEARNING
|
2
2
|
from gr_libs.metrics.metrics import stochastic_amplified_selection
|
3
3
|
from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
|
4
4
|
from gr_libs.ml.utils.format import random_subset_with_order
|
5
5
|
from gr_libs import ExpertBasedGraml
|
6
6
|
|
7
7
|
recognizer = ExpertBasedGraml(
|
8
|
-
domain_name=
|
8
|
+
domain_name=MINIGRID,
|
9
9
|
env_name="MiniGrid-SimpleCrossingS13N4"
|
10
10
|
)
|
11
11
|
|
@@ -1,7 +1,8 @@
|
|
1
1
|
|
2
2
|
import numpy as np
|
3
3
|
from stable_baselines3 import PPO, SAC
|
4
|
-
|
4
|
+
import gr_libs.environment.environment
|
5
|
+
from gr_libs.environment.environment import PANDA, EnvProperty, GCEnvProperty, PandaProperty
|
5
6
|
from gr_libs.environment.utils.utils import domain_to_env_property
|
6
7
|
from gr_libs.metrics.metrics import stochastic_amplified_selection
|
7
8
|
from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
|
@@ -12,9 +13,13 @@ recognizer = GCGraml( # TODO make these tutorials into pytests
|
|
12
13
|
domain_name=PANDA,
|
13
14
|
env_name="PandaMyReachDense"
|
14
15
|
)
|
15
|
-
recognizer.domain_learning_phase(
|
16
|
-
|
17
|
-
|
16
|
+
recognizer.domain_learning_phase(
|
17
|
+
base_goals=[np.array([PandaProperty.sample_goal()]) for _ in range(1,30)],
|
18
|
+
train_configs=[(SAC, 800000)]
|
19
|
+
)
|
20
|
+
recognizer.goals_adaptation_phase(
|
21
|
+
dynamic_goals=[np.array([[-0.1, -0.1, 0.1]]), np.array([[-0.1, 0.1, 0.1]]), np.array([[0.2, 0.2, 0.1]])]
|
22
|
+
)
|
18
23
|
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
19
24
|
property_type = domain_to_env_property(PANDA)
|
20
25
|
env_property = property_type("PandaMyReachDense")
|
@@ -1,31 +1,27 @@
|
|
1
1
|
|
2
2
|
from stable_baselines3 import PPO, SAC, TD3
|
3
|
-
from gr_libs.environment.environment import EnvProperty, GCEnvProperty, ParkingProperty
|
3
|
+
from gr_libs.environment.environment import PARKING, EnvProperty, GCEnvProperty, ParkingProperty
|
4
4
|
from gr_libs.metrics.metrics import stochastic_amplified_selection
|
5
5
|
from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
|
6
6
|
from gr_libs.ml.utils.format import random_subset_with_order
|
7
7
|
from gr_libs.recognizer.graml.graml_recognizer import ExpertBasedGraml, GCGraml
|
8
8
|
|
9
|
-
# Consider extracting all these to "default point_maze (or every other domain) variables" module which would simplify things like the problem_list_to_str_tuple function, sizes of inputs, etc.
|
10
9
|
recognizer = GCGraml(
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
10
|
+
domain_name=PARKING,
|
11
|
+
env_name="Parking-S-14-PC-"
|
12
|
+
)
|
13
|
+
|
14
|
+
recognizer.domain_learning_phase(
|
15
|
+
[i for i in range(1,21)],
|
16
|
+
[(PPO, 200000)]
|
15
17
|
)
|
16
|
-
recognizer.domain_learning_phase()
|
17
18
|
recognizer.goals_adaptation_phase(
|
18
|
-
|
19
|
-
|
20
|
-
"Parking-S-14-PC--GI-8-v0",
|
21
|
-
"Parking-S-14-PC--GI-11-v0",
|
22
|
-
"Parking-S-14-PC--GI-14-v0",
|
23
|
-
"Parking-S-14-PC--GI-18-v0",
|
24
|
-
"Parking-S-14-PC--GI-21-v0"]] # TODO detach the goal from the environment instance in every gym env, add the ability to alter it from outside.
|
25
|
-
#dynamic_train_configs=[(SAC, 400000) for _ in range(7)] # for expert sequence generation. TODO change to require this only if sequence generation method is EXPERT.
|
19
|
+
dynamic_goals = ["1", "11", "21"]
|
20
|
+
# no need for expert sequence generation since GCRL is used
|
26
21
|
)
|
22
|
+
|
27
23
|
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
28
|
-
actor = DeepRLAgent(
|
24
|
+
actor = DeepRLAgent(domain_name="parking", problem_name="Parking-S-14-PC--GI-11-v0", algorithm=TD3, num_timesteps=400000)
|
29
25
|
actor.learn()
|
30
26
|
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
31
27
|
full_sequence = actor.generate_observation(
|
@@ -34,5 +30,5 @@ full_sequence = actor.generate_observation(
|
|
34
30
|
)
|
35
31
|
|
36
32
|
partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
|
37
|
-
closest_goal = recognizer.inference_phase(partial_sequence, ParkingProperty("Parking-S-14-PC--GI-
|
38
|
-
print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards:
|
33
|
+
closest_goal = recognizer.inference_phase(partial_sequence, ParkingProperty("Parking-S-14-PC--GI-11-v0").str_to_goal(), 0.5)
|
34
|
+
print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: 11")
|
@@ -1,36 +1,28 @@
|
|
1
1
|
|
2
2
|
from stable_baselines3 import SAC, TD3
|
3
|
-
from gr_libs.environment.
|
3
|
+
from gr_libs.environment.environment import POINT_MAZE, PointMazeProperty
|
4
4
|
from gr_libs.metrics.metrics import stochastic_amplified_selection
|
5
5
|
from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
|
6
6
|
from gr_libs.ml.utils.format import random_subset_with_order
|
7
7
|
from gr_libs.recognizer.graml.graml_recognizer import ExpertBasedGraml
|
8
8
|
|
9
|
-
# Consider extracting all these to "default point_maze (or every other domain) variables" module which would simplify things like the problem_list_to_str_tuple function, sizes of inputs, etc.
|
10
9
|
recognizer = ExpertBasedGraml(
|
11
|
-
|
12
|
-
|
13
|
-
("PointMaze-FourRoomsEnv-11x11-Goal-9x9"), # this one doesn't work with dense rewards because of encountering local minima
|
14
|
-
("PointMaze-FourRoomsEnvDense-11x11-Goal-1x9"),
|
15
|
-
("PointMaze-FourRoomsEnvDense-11x11-Goal-3x3"),
|
16
|
-
("PointMaze-FourRoomsEnvDense-11x11-Goal-3x4"),
|
17
|
-
("PointMaze-FourRoomsEnvDense-11x11-Goal-8x2"),
|
18
|
-
("PointMaze-FourRoomsEnvDense-11x11-Goal-3x7"),
|
19
|
-
("PointMaze-FourRoomsEnvDense-11x11-Goal-2x8")],
|
20
|
-
task_str_to_goal=maze_str_to_goal,
|
21
|
-
method=DeepRLAgent,
|
22
|
-
collect_statistics=False,
|
23
|
-
train_configs=[(SAC, 200000) for _ in range(8)],
|
10
|
+
domain_name=POINT_MAZE,
|
11
|
+
env_name="PointMaze-FourRoomsEnvDense-11x11"
|
24
12
|
)
|
25
|
-
|
13
|
+
|
14
|
+
recognizer.domain_learning_phase(
|
15
|
+
[(9,1), (9,9), (1,9), (3,3), (3,4), (8,2), (3,7), (2,8)],
|
16
|
+
[(SAC, 200000) for _ in range(8)]
|
17
|
+
)
|
18
|
+
|
26
19
|
recognizer.goals_adaptation_phase(
|
27
|
-
|
28
|
-
|
29
|
-
"PointMaze-FourRoomsEnvDense-11x11-Goal-3x7"],
|
30
|
-
dynamic_train_configs=[(SAC, 200000) for _ in range(3)] # for expert sequence generation. TODO change to require this only if sequence generation method is EXPERT.
|
20
|
+
dynamic_goals = [(4,4), (7,3), (3,7)],
|
21
|
+
dynamic_train_configs=[(SAC, 200000) for _ in range(3)] # for expert sequence generation.
|
31
22
|
)
|
23
|
+
|
32
24
|
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
33
|
-
actor = DeepRLAgent(
|
25
|
+
actor = DeepRLAgent(domain_name="point_maze", problem_name="PointMaze-FourRoomsEnvDense-11x11-Goal-4x4", algorithm=TD3, num_timesteps=200000)
|
34
26
|
actor.learn()
|
35
27
|
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
36
28
|
full_sequence = actor.generate_observation(
|
@@ -39,5 +31,5 @@ full_sequence = actor.generate_observation(
|
|
39
31
|
)
|
40
32
|
|
41
33
|
partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)))
|
42
|
-
closest_goal = recognizer.inference_phase(partial_sequence,
|
34
|
+
closest_goal = recognizer.inference_phase(partial_sequence, PointMazeProperty("PointMaze-FourRoomsEnvDense-11x11-Goal-4x4").str_to_goal(), 0.5)
|
43
35
|
print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (4, 4)")
|
File without changes
|
File without changes
|