gr-libs 0.1.7.post0__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gr_libs/__init__.py +4 -1
- gr_libs/_evaluation/__init__.py +1 -0
- gr_libs/_evaluation/_analyze_results_cross_alg_cross_domain.py +260 -0
- gr_libs/_evaluation/_generate_experiments_results.py +141 -0
- gr_libs/_evaluation/_generate_task_specific_statistics_plots.py +497 -0
- gr_libs/_evaluation/_get_plans_images.py +61 -0
- gr_libs/_evaluation/_increasing_and_decreasing_.py +106 -0
- gr_libs/_version.py +2 -2
- gr_libs/all_experiments.py +294 -0
- gr_libs/environment/__init__.py +30 -9
- gr_libs/environment/_utils/utils.py +27 -0
- gr_libs/environment/environment.py +417 -54
- gr_libs/metrics/__init__.py +7 -0
- gr_libs/metrics/metrics.py +231 -54
- gr_libs/ml/__init__.py +2 -5
- gr_libs/ml/agent.py +21 -6
- gr_libs/ml/base/__init__.py +3 -1
- gr_libs/ml/base/rl_agent.py +81 -13
- gr_libs/ml/consts.py +1 -1
- gr_libs/ml/neural/__init__.py +1 -3
- gr_libs/ml/neural/deep_rl_learner.py +619 -378
- gr_libs/ml/neural/utils/__init__.py +1 -2
- gr_libs/ml/neural/utils/dictlist.py +3 -3
- gr_libs/ml/planner/mcts/{utils → _utils}/__init__.py +1 -1
- gr_libs/ml/planner/mcts/{utils → _utils}/node.py +11 -7
- gr_libs/ml/planner/mcts/{utils → _utils}/tree.py +15 -11
- gr_libs/ml/planner/mcts/mcts_model.py +571 -312
- gr_libs/ml/sequential/__init__.py +0 -1
- gr_libs/ml/sequential/_lstm_model.py +270 -0
- gr_libs/ml/tabular/__init__.py +1 -3
- gr_libs/ml/tabular/state.py +7 -7
- gr_libs/ml/tabular/tabular_q_learner.py +150 -82
- gr_libs/ml/tabular/tabular_rl_agent.py +42 -28
- gr_libs/ml/utils/__init__.py +2 -3
- gr_libs/ml/utils/format.py +28 -97
- gr_libs/ml/utils/math.py +5 -3
- gr_libs/ml/utils/other.py +3 -3
- gr_libs/ml/utils/storage.py +88 -81
- gr_libs/odgr_executor.py +268 -0
- gr_libs/problems/consts.py +1549 -1227
- gr_libs/recognizer/_utils/__init__.py +0 -0
- gr_libs/recognizer/_utils/format.py +18 -0
- gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +233 -88
- gr_libs/recognizer/graml/_gr_dataset.py +233 -0
- gr_libs/recognizer/graml/graml_recognizer.py +586 -252
- gr_libs/recognizer/recognizer.py +90 -30
- gr_libs/tutorials/draco_panda_tutorial.py +58 -0
- gr_libs/tutorials/draco_parking_tutorial.py +56 -0
- gr_libs/tutorials/gcdraco_panda_tutorial.py +62 -0
- gr_libs/tutorials/gcdraco_parking_tutorial.py +57 -0
- gr_libs/tutorials/graml_minigrid_tutorial.py +64 -0
- gr_libs/tutorials/graml_panda_tutorial.py +57 -0
- gr_libs/tutorials/graml_parking_tutorial.py +52 -0
- gr_libs/tutorials/graml_point_maze_tutorial.py +60 -0
- gr_libs/tutorials/graql_minigrid_tutorial.py +50 -0
- {gr_libs-0.1.7.post0.dist-info → gr_libs-0.2.2.dist-info}/METADATA +84 -29
- gr_libs-0.2.2.dist-info/RECORD +71 -0
- {gr_libs-0.1.7.post0.dist-info → gr_libs-0.2.2.dist-info}/WHEEL +1 -1
- gr_libs-0.2.2.dist-info/top_level.txt +2 -0
- tests/test_draco.py +14 -0
- tests/test_gcdraco.py +10 -0
- tests/test_graml.py +12 -8
- tests/test_graql.py +3 -2
- evaluation/analyze_results_cross_alg_cross_domain.py +0 -277
- evaluation/create_minigrid_map_image.py +0 -34
- evaluation/file_system.py +0 -42
- evaluation/generate_experiments_results.py +0 -92
- evaluation/generate_experiments_results_new_ver1.py +0 -254
- evaluation/generate_experiments_results_new_ver2.py +0 -331
- evaluation/generate_task_specific_statistics_plots.py +0 -272
- evaluation/get_plans_images.py +0 -47
- evaluation/increasing_and_decreasing_.py +0 -63
- gr_libs/environment/utils/utils.py +0 -17
- gr_libs/ml/neural/utils/penv.py +0 -57
- gr_libs/ml/sequential/lstm_model.py +0 -192
- gr_libs/recognizer/graml/gr_dataset.py +0 -134
- gr_libs/recognizer/utils/__init__.py +0 -1
- gr_libs/recognizer/utils/format.py +0 -13
- gr_libs-0.1.7.post0.dist-info/RECORD +0 -67
- gr_libs-0.1.7.post0.dist-info/top_level.txt +0 -4
- tutorials/graml_minigrid_tutorial.py +0 -34
- tutorials/graml_panda_tutorial.py +0 -41
- tutorials/graml_parking_tutorial.py +0 -39
- tutorials/graml_point_maze_tutorial.py +0 -39
- tutorials/graql_minigrid_tutorial.py +0 -34
- /gr_libs/environment/{utils → _utils}/__init__.py +0 -0
gr_libs/recognizer/recognizer.py
CHANGED
@@ -1,45 +1,105 @@
|
|
1
1
|
from abc import ABC, abstractmethod
|
2
|
-
|
3
|
-
from gr_libs.environment.
|
4
|
-
from gr_libs.environment.
|
2
|
+
|
3
|
+
from gr_libs.environment._utils.utils import domain_to_env_property
|
4
|
+
from gr_libs.environment.environment import SUPPORTED_DOMAINS
|
5
5
|
from gr_libs.ml.base.rl_agent import RLAgent
|
6
6
|
|
7
|
+
|
7
8
|
class Recognizer(ABC):
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
9
|
+
def __init__(
|
10
|
+
self,
|
11
|
+
domain_name: str,
|
12
|
+
env_name: str,
|
13
|
+
collect_statistics=False,
|
14
|
+
rl_agent_type: type[RLAgent] = None,
|
15
|
+
**kwargs,
|
16
|
+
):
|
17
|
+
assert domain_name in SUPPORTED_DOMAINS
|
18
|
+
self.rl_agent_type = rl_agent_type
|
19
|
+
self.domain_name = domain_name
|
20
|
+
self.env_prop_type = domain_to_env_property(self.domain_name)
|
21
|
+
self.env_prop = self.env_prop_type(env_name)
|
22
|
+
self.collect_statistics = collect_statistics
|
23
|
+
|
24
|
+
@abstractmethod
|
25
|
+
def inference_phase(self, inf_sequence, true_goal, percentage) -> str:
|
26
|
+
pass
|
27
|
+
|
19
28
|
|
20
29
|
class LearningRecognizer(Recognizer):
|
21
|
-
|
22
|
-
|
30
|
+
"""
|
31
|
+
A class that represents a learning recognizer.
|
32
|
+
|
33
|
+
Inherits from the Recognizer class.
|
34
|
+
"""
|
35
|
+
|
36
|
+
def __init__(self, *args, **kwargs):
|
37
|
+
super().__init__(*args, **kwargs)
|
38
|
+
|
39
|
+
def domain_learning_phase(self, base_goals: list[str], train_configs: list):
|
40
|
+
"""
|
41
|
+
Perform the domain learning phase.
|
42
|
+
|
43
|
+
Args:
|
44
|
+
base_goals (List[str]): The base goals for the learning phase.
|
45
|
+
train_configs (List): The training configurations.
|
46
|
+
|
47
|
+
"""
|
48
|
+
self.original_train_configs = train_configs
|
23
49
|
|
24
|
-
def domain_learning_phase(self, base_goals: List[str], train_configs: List):
|
25
|
-
self.original_train_configs = train_configs
|
26
50
|
|
27
51
|
# a recognizer that needs to train agents for every new goal as part of the goal adaptation phase (that's why it needs dynamic train configs)
|
28
52
|
class GaAgentTrainerRecognizer(Recognizer):
|
29
|
-
|
30
|
-
|
53
|
+
"""
|
54
|
+
A class representing a recognizer for GaAgentTrainer.
|
55
|
+
"""
|
31
56
|
|
32
|
-
|
33
|
-
|
34
|
-
|
57
|
+
def __init__(self, *args, **kwargs):
|
58
|
+
super().__init__(*args, **kwargs)
|
59
|
+
|
60
|
+
@abstractmethod
|
61
|
+
def goals_adaptation_phase(self, dynamic_goals: list[str], dynamic_train_configs):
|
62
|
+
"""
|
63
|
+
Perform the goals adaptation phase.
|
64
|
+
|
65
|
+
Args:
|
66
|
+
dynamic_goals (List[str]): The list of dynamic goals.
|
67
|
+
dynamic_train_configs: The dynamic training configurations.
|
68
|
+
|
69
|
+
Returns:
|
70
|
+
None
|
71
|
+
"""
|
72
|
+
|
73
|
+
def domain_learning_phase(self, base_goals: list[str], train_configs: list):
|
74
|
+
"""
|
75
|
+
Perform the domain learning phase.
|
76
|
+
|
77
|
+
Args:
|
78
|
+
base_goals (List[str]): List of base goals.
|
79
|
+
train_configs (List): List of training configurations.
|
80
|
+
|
81
|
+
Returns:
|
82
|
+
None
|
83
|
+
"""
|
84
|
+
super().domain_learning_phase(base_goals, train_configs)
|
35
85
|
|
36
|
-
def domain_learning_phase(self, base_goals: List[str], train_configs: List):
|
37
|
-
super().domain_learning_phase(base_goals, train_configs)
|
38
86
|
|
39
87
|
class GaAdaptingRecognizer(Recognizer):
|
40
|
-
|
41
|
-
|
88
|
+
"""
|
89
|
+
A recognizer that doesn't require more training given a set of new goals, hence it doesn't receive train configs in the goal adaptation phase.
|
90
|
+
"""
|
91
|
+
|
92
|
+
def __init__(self, *args, **kwargs):
|
93
|
+
super().__init__(*args, **kwargs)
|
94
|
+
|
95
|
+
@abstractmethod
|
96
|
+
def goals_adaptation_phase(self, dynamic_goals: list[str]):
|
97
|
+
"""
|
98
|
+
Perform the goals adaptation phase.
|
99
|
+
|
100
|
+
Args:
|
101
|
+
dynamic_goals (List[str]): A list of dynamic goals to be adapted.
|
42
102
|
|
43
|
-
|
44
|
-
|
45
|
-
|
103
|
+
Returns:
|
104
|
+
None
|
105
|
+
"""
|
@@ -0,0 +1,58 @@
|
|
1
|
+
import numpy as np
|
2
|
+
from stable_baselines3 import PPO
|
3
|
+
|
4
|
+
from gr_libs import Draco
|
5
|
+
from gr_libs.environment._utils.utils import domain_to_env_property
|
6
|
+
from gr_libs.environment.environment import PANDA
|
7
|
+
from gr_libs.metrics import mean_wasserstein_distance
|
8
|
+
from gr_libs.metrics.metrics import stochastic_amplified_selection
|
9
|
+
from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
|
10
|
+
from gr_libs.ml.utils.format import random_subset_with_order
|
11
|
+
|
12
|
+
|
13
|
+
def run_draco_panda_tutorial():
|
14
|
+
recognizer = Draco(
|
15
|
+
domain_name=PANDA,
|
16
|
+
env_name="PandaMyReachDense",
|
17
|
+
evaluation_function=mean_wasserstein_distance, # or mean_p_value
|
18
|
+
)
|
19
|
+
|
20
|
+
recognizer.goals_adaptation_phase(
|
21
|
+
dynamic_goals=[
|
22
|
+
np.array([[-0.1, -0.1, 0.1]]),
|
23
|
+
np.array([[-0.1, 0.1, 0.1]]),
|
24
|
+
np.array([[0.2, 0.2, 0.1]]),
|
25
|
+
],
|
26
|
+
dynamic_train_configs=[(PPO, 200000), (PPO, 200000), (PPO, 200000)],
|
27
|
+
)
|
28
|
+
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
29
|
+
property_type = domain_to_env_property(PANDA)
|
30
|
+
env_property = property_type("PandaMyReachDense")
|
31
|
+
problem_name = env_property.goal_to_problem_str(np.array([[-0.1, -0.1, 0.1]]))
|
32
|
+
actor = DeepRLAgent(
|
33
|
+
domain_name=PANDA,
|
34
|
+
problem_name=problem_name,
|
35
|
+
env_prop=env_property,
|
36
|
+
algorithm=PPO,
|
37
|
+
num_timesteps=400000,
|
38
|
+
)
|
39
|
+
actor.learn()
|
40
|
+
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
41
|
+
full_sequence = actor.generate_observation(
|
42
|
+
action_selection_method=stochastic_amplified_selection,
|
43
|
+
random_optimalism=True, # the noise that's added to the actions
|
44
|
+
with_dict=True,
|
45
|
+
)
|
46
|
+
partial_sequence = random_subset_with_order(
|
47
|
+
full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False
|
48
|
+
)
|
49
|
+
closest_goal = recognizer.inference_phase(
|
50
|
+
partial_sequence, np.array([[-0.1, -0.1, 0.1]]), 0.5
|
51
|
+
)
|
52
|
+
print(
|
53
|
+
f"closest_goal returned by DRACO: {closest_goal}\nactual goal actor aimed towards: [-0.1, -0.1, 0.1]"
|
54
|
+
)
|
55
|
+
|
56
|
+
|
57
|
+
if __name__ == "__main__":
|
58
|
+
run_draco_panda_tutorial()
|
@@ -0,0 +1,56 @@
|
|
1
|
+
from stable_baselines3 import SAC, TD3
|
2
|
+
|
3
|
+
from gr_libs import Draco
|
4
|
+
from gr_libs.environment._utils.utils import domain_to_env_property
|
5
|
+
from gr_libs.environment.environment import PARKING, ParkingProperty
|
6
|
+
from gr_libs.metrics import mean_wasserstein_distance
|
7
|
+
from gr_libs.metrics.metrics import stochastic_amplified_selection
|
8
|
+
from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
|
9
|
+
from gr_libs.ml.utils.format import random_subset_with_order
|
10
|
+
|
11
|
+
|
12
|
+
def run_draco_parking_tutorial():
|
13
|
+
recognizer = Draco(
|
14
|
+
domain_name=PARKING,
|
15
|
+
env_name="Parking-S-14-PC-",
|
16
|
+
evaluation_function=mean_wasserstein_distance, # or mean_p_value
|
17
|
+
)
|
18
|
+
|
19
|
+
recognizer.goals_adaptation_phase(
|
20
|
+
dynamic_goals=["1", "11", "21"],
|
21
|
+
dynamic_train_configs=[(SAC, 200000), (SAC, 200000), (SAC, 200000)],
|
22
|
+
)
|
23
|
+
|
24
|
+
property_type = domain_to_env_property(PARKING)
|
25
|
+
env_property = property_type("Parking-S-14-PC-")
|
26
|
+
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
27
|
+
actor = DeepRLAgent(
|
28
|
+
domain_name="parking",
|
29
|
+
problem_name="Parking-S-14-PC--GI-11-v0",
|
30
|
+
env_prop=env_property,
|
31
|
+
algorithm=TD3,
|
32
|
+
num_timesteps=400000,
|
33
|
+
)
|
34
|
+
actor.learn()
|
35
|
+
|
36
|
+
full_sequence = actor.generate_observation(
|
37
|
+
action_selection_method=stochastic_amplified_selection,
|
38
|
+
random_optimalism=True, # the noise that's added to the actions
|
39
|
+
with_dict=True,
|
40
|
+
)
|
41
|
+
|
42
|
+
partial_sequence = random_subset_with_order(
|
43
|
+
full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False
|
44
|
+
)
|
45
|
+
closest_goal = recognizer.inference_phase(
|
46
|
+
partial_sequence,
|
47
|
+
ParkingProperty("Parking-S-14-PC--GI-11-v0").str_to_goal(),
|
48
|
+
0.5,
|
49
|
+
)
|
50
|
+
print(
|
51
|
+
f"closest_goal returned by GCDRACO: {closest_goal}\nactual goal actor aimed towards: 11"
|
52
|
+
)
|
53
|
+
|
54
|
+
|
55
|
+
if __name__ == "__main__":
|
56
|
+
run_draco_parking_tutorial()
|
@@ -0,0 +1,62 @@
|
|
1
|
+
import numpy as np
|
2
|
+
from stable_baselines3 import PPO, SAC
|
3
|
+
|
4
|
+
from gr_libs import GCDraco
|
5
|
+
from gr_libs.environment._utils.utils import domain_to_env_property
|
6
|
+
from gr_libs.environment.environment import PANDA, PandaProperty
|
7
|
+
from gr_libs.metrics import mean_wasserstein_distance, stochastic_amplified_selection
|
8
|
+
from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
|
9
|
+
from gr_libs.ml.utils.format import random_subset_with_order
|
10
|
+
|
11
|
+
|
12
|
+
def run_gcdraco_panda_tutorial():
|
13
|
+
recognizer = GCDraco(
|
14
|
+
domain_name=PANDA,
|
15
|
+
env_name="PandaMyReachDense",
|
16
|
+
evaluation_function=mean_wasserstein_distance, # or mean_p_value
|
17
|
+
)
|
18
|
+
|
19
|
+
recognizer.domain_learning_phase(
|
20
|
+
base_goals=[np.array([PandaProperty.sample_goal()]) for _ in range(30)],
|
21
|
+
train_configs=[(SAC, 800000)],
|
22
|
+
)
|
23
|
+
|
24
|
+
recognizer.goals_adaptation_phase(
|
25
|
+
dynamic_goals=[
|
26
|
+
np.array([[-0.1, -0.1, 0.1]]),
|
27
|
+
np.array([[-0.1, 0.1, 0.1]]),
|
28
|
+
np.array([[0.2, 0.2, 0.1]]),
|
29
|
+
],
|
30
|
+
)
|
31
|
+
|
32
|
+
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
33
|
+
property_type = domain_to_env_property(PANDA)
|
34
|
+
env_property = property_type("PandaMyReachDense")
|
35
|
+
problem_name = env_property.goal_to_problem_str(np.array([[-0.1, -0.1, 0.1]]))
|
36
|
+
actor = DeepRLAgent(
|
37
|
+
domain_name=PANDA,
|
38
|
+
problem_name=problem_name,
|
39
|
+
env_prop=env_property,
|
40
|
+
algorithm=PPO,
|
41
|
+
num_timesteps=400000,
|
42
|
+
)
|
43
|
+
actor.learn()
|
44
|
+
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
45
|
+
full_sequence = actor.generate_observation(
|
46
|
+
action_selection_method=stochastic_amplified_selection,
|
47
|
+
random_optimalism=True, # the noise that's added to the actions
|
48
|
+
with_dict=True,
|
49
|
+
)
|
50
|
+
partial_sequence = random_subset_with_order(
|
51
|
+
full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False
|
52
|
+
)
|
53
|
+
closest_goal = recognizer.inference_phase(
|
54
|
+
partial_sequence, np.array([[-0.1, -0.1, 0.1]]), 0.5
|
55
|
+
)
|
56
|
+
print(
|
57
|
+
f"closest_goal returned by GCDRACO: {closest_goal}\nactual goal actor aimed towards: [-0.1, -0.1, 0.1]"
|
58
|
+
)
|
59
|
+
|
60
|
+
|
61
|
+
if __name__ == "__main__":
|
62
|
+
run_gcdraco_panda_tutorial()
|
@@ -0,0 +1,57 @@
|
|
1
|
+
from stable_baselines3 import PPO, TD3
|
2
|
+
|
3
|
+
from gr_libs import GCDraco
|
4
|
+
from gr_libs.environment._utils.utils import domain_to_env_property
|
5
|
+
from gr_libs.environment.environment import PARKING, ParkingProperty
|
6
|
+
from gr_libs.metrics import mean_wasserstein_distance, stochastic_amplified_selection
|
7
|
+
from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
|
8
|
+
from gr_libs.ml.utils.format import random_subset_with_order
|
9
|
+
|
10
|
+
|
11
|
+
def run_gcdraco_parking_tutorial():
|
12
|
+
recognizer = GCDraco(
|
13
|
+
domain_name=PARKING,
|
14
|
+
env_name="Parking-S-14-PC-",
|
15
|
+
evaluation_function=mean_wasserstein_distance, # or mean_p_value
|
16
|
+
)
|
17
|
+
|
18
|
+
recognizer.domain_learning_phase([i for i in range(1, 21)], [(PPO, 200000)])
|
19
|
+
recognizer.goals_adaptation_phase(
|
20
|
+
dynamic_goals=["1", "11", "21"]
|
21
|
+
# no need for expert sequence generation since GCRL is used
|
22
|
+
)
|
23
|
+
|
24
|
+
property_type = domain_to_env_property(PARKING)
|
25
|
+
env_property = property_type("Parking-S-14-PC-")
|
26
|
+
|
27
|
+
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
28
|
+
actor = DeepRLAgent(
|
29
|
+
domain_name="parking",
|
30
|
+
problem_name="Parking-S-14-PC--GI-11-v0",
|
31
|
+
env_prop=env_property,
|
32
|
+
algorithm=TD3,
|
33
|
+
num_timesteps=400000,
|
34
|
+
)
|
35
|
+
actor.learn()
|
36
|
+
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
37
|
+
full_sequence = actor.generate_observation(
|
38
|
+
action_selection_method=stochastic_amplified_selection,
|
39
|
+
random_optimalism=True, # the noise that's added to the actions
|
40
|
+
with_dict=True,
|
41
|
+
)
|
42
|
+
|
43
|
+
partial_sequence = random_subset_with_order(
|
44
|
+
full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False
|
45
|
+
)
|
46
|
+
closest_goal = recognizer.inference_phase(
|
47
|
+
partial_sequence,
|
48
|
+
ParkingProperty("Parking-S-14-PC--GI-11-v0").str_to_goal(),
|
49
|
+
0.5,
|
50
|
+
)
|
51
|
+
print(
|
52
|
+
f"closest_goal returned by GCDRACO: {closest_goal}\nactual goal actor aimed towards: 11"
|
53
|
+
)
|
54
|
+
|
55
|
+
|
56
|
+
if __name__ == "__main__":
|
57
|
+
run_gcdraco_parking_tutorial()
|
@@ -0,0 +1,64 @@
|
|
1
|
+
from gr_libs import ExpertBasedGraml
|
2
|
+
from gr_libs.environment._utils.utils import domain_to_env_property
|
3
|
+
from gr_libs.environment.environment import MINIGRID, QLEARNING
|
4
|
+
from gr_libs.metrics.metrics import stochastic_amplified_selection
|
5
|
+
from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
|
6
|
+
from gr_libs.ml.utils.format import random_subset_with_order
|
7
|
+
|
8
|
+
|
9
|
+
def run_graml_minigrid_tutorial():
|
10
|
+
recognizer = ExpertBasedGraml(
|
11
|
+
domain_name=MINIGRID, env_name="MiniGrid-SimpleCrossingS13N4"
|
12
|
+
)
|
13
|
+
|
14
|
+
recognizer.domain_learning_phase(
|
15
|
+
base_goals=[
|
16
|
+
(11, 1),
|
17
|
+
(11, 11),
|
18
|
+
(1, 11),
|
19
|
+
(7, 11),
|
20
|
+
(8, 1),
|
21
|
+
(10, 6),
|
22
|
+
(6, 9),
|
23
|
+
(11, 3),
|
24
|
+
(11, 5),
|
25
|
+
],
|
26
|
+
train_configs=[(QLEARNING, 100000) for _ in range(9)],
|
27
|
+
)
|
28
|
+
|
29
|
+
recognizer.goals_adaptation_phase(
|
30
|
+
dynamic_goals=[(11, 1), (11, 11), (1, 11)],
|
31
|
+
dynamic_train_configs=[
|
32
|
+
(QLEARNING, 100000) for _ in range(3)
|
33
|
+
], # for expert sequence generation.
|
34
|
+
)
|
35
|
+
|
36
|
+
property_type = domain_to_env_property(MINIGRID)
|
37
|
+
env_property = property_type("MiniGrid-SimpleCrossingS13N4")
|
38
|
+
|
39
|
+
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
40
|
+
actor = TabularQLearner(
|
41
|
+
domain_name="minigrid",
|
42
|
+
problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0",
|
43
|
+
env_prop=env_property,
|
44
|
+
algorithm=QLEARNING,
|
45
|
+
num_timesteps=100000,
|
46
|
+
)
|
47
|
+
actor.learn()
|
48
|
+
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
49
|
+
full_sequence = actor.generate_observation(
|
50
|
+
action_selection_method=stochastic_amplified_selection,
|
51
|
+
random_optimalism=True, # the noise that's added to the actions
|
52
|
+
)
|
53
|
+
|
54
|
+
partial_sequence = random_subset_with_order(
|
55
|
+
full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False
|
56
|
+
)
|
57
|
+
closest_goal = recognizer.inference_phase(partial_sequence, (11, 1), 0.5)
|
58
|
+
print(
|
59
|
+
f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (11, 1)"
|
60
|
+
)
|
61
|
+
|
62
|
+
|
63
|
+
if __name__ == "__main__":
|
64
|
+
run_graml_minigrid_tutorial()
|
@@ -0,0 +1,57 @@
|
|
1
|
+
import numpy as np
|
2
|
+
from stable_baselines3 import PPO, SAC
|
3
|
+
|
4
|
+
from gr_libs import GCGraml
|
5
|
+
from gr_libs.environment._utils.utils import domain_to_env_property
|
6
|
+
from gr_libs.environment.environment import PANDA, PandaProperty
|
7
|
+
from gr_libs.metrics.metrics import stochastic_amplified_selection
|
8
|
+
from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
|
9
|
+
from gr_libs.ml.utils.format import random_subset_with_order
|
10
|
+
|
11
|
+
|
12
|
+
def run_graml_panda_tutorial():
|
13
|
+
recognizer = GCGraml( # TODO make these tutorials into pytests
|
14
|
+
domain_name=PANDA, env_name="PandaMyReachDense"
|
15
|
+
)
|
16
|
+
recognizer.domain_learning_phase(
|
17
|
+
base_goals=[np.array([PandaProperty.sample_goal()]) for _ in range(1, 30)],
|
18
|
+
train_configs=[(SAC, 800000)],
|
19
|
+
)
|
20
|
+
recognizer.goals_adaptation_phase(
|
21
|
+
dynamic_goals=[
|
22
|
+
np.array([[-0.1, -0.1, 0.1]]),
|
23
|
+
np.array([[-0.1, 0.1, 0.1]]),
|
24
|
+
np.array([[0.2, 0.2, 0.1]]),
|
25
|
+
]
|
26
|
+
)
|
27
|
+
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
28
|
+
property_type = domain_to_env_property(PANDA)
|
29
|
+
env_property = property_type("PandaMyReachDense")
|
30
|
+
problem_name = env_property.goal_to_problem_str(np.array([[-0.1, -0.1, 0.1]]))
|
31
|
+
actor = DeepRLAgent(
|
32
|
+
domain_name=PANDA,
|
33
|
+
problem_name=problem_name,
|
34
|
+
env_prop=env_property,
|
35
|
+
algorithm=PPO,
|
36
|
+
num_timesteps=400000,
|
37
|
+
)
|
38
|
+
actor.learn()
|
39
|
+
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
40
|
+
full_sequence = actor.generate_observation(
|
41
|
+
action_selection_method=stochastic_amplified_selection,
|
42
|
+
random_optimalism=True, # the noise that's added to the actions
|
43
|
+
)
|
44
|
+
|
45
|
+
partial_sequence = random_subset_with_order(
|
46
|
+
full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False
|
47
|
+
)
|
48
|
+
closest_goal = recognizer.inference_phase(
|
49
|
+
partial_sequence, np.array([[-0.1, -0.1, 0.1]]), 0.5
|
50
|
+
)
|
51
|
+
print(
|
52
|
+
f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: [-0.1, -0.1, 0.1]"
|
53
|
+
)
|
54
|
+
|
55
|
+
|
56
|
+
if __name__ == "__main__":
|
57
|
+
run_graml_panda_tutorial()
|
@@ -0,0 +1,52 @@
|
|
1
|
+
from stable_baselines3 import PPO, TD3
|
2
|
+
|
3
|
+
from gr_libs.environment._utils.utils import domain_to_env_property
|
4
|
+
from gr_libs.environment.environment import PARKING, ParkingProperty
|
5
|
+
from gr_libs.metrics.metrics import stochastic_amplified_selection
|
6
|
+
from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
|
7
|
+
from gr_libs.ml.utils.format import random_subset_with_order
|
8
|
+
from gr_libs.recognizer.graml.graml_recognizer import GCGraml
|
9
|
+
|
10
|
+
|
11
|
+
def run_graml_parking_tutorial():
|
12
|
+
recognizer = GCGraml(domain_name=PARKING, env_name="Parking-S-14-PC-")
|
13
|
+
|
14
|
+
recognizer.domain_learning_phase([i for i in range(1, 21)], [(PPO, 200000)])
|
15
|
+
recognizer.goals_adaptation_phase(
|
16
|
+
dynamic_goals=["1", "11", "21"]
|
17
|
+
# no need for expert sequence generation since GCRL is used
|
18
|
+
)
|
19
|
+
|
20
|
+
property_type = domain_to_env_property(PARKING)
|
21
|
+
env_property = property_type("Parking-S-14-PC-")
|
22
|
+
|
23
|
+
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
24
|
+
actor = DeepRLAgent(
|
25
|
+
domain_name="parking",
|
26
|
+
problem_name="Parking-S-14-PC--GI-11-v0",
|
27
|
+
env_prop=env_property,
|
28
|
+
algorithm=TD3,
|
29
|
+
num_timesteps=400000,
|
30
|
+
)
|
31
|
+
actor.learn()
|
32
|
+
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
33
|
+
full_sequence = actor.generate_observation(
|
34
|
+
action_selection_method=stochastic_amplified_selection,
|
35
|
+
random_optimalism=True, # the noise that's added to the actions
|
36
|
+
)
|
37
|
+
|
38
|
+
partial_sequence = random_subset_with_order(
|
39
|
+
full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False
|
40
|
+
)
|
41
|
+
closest_goal = recognizer.inference_phase(
|
42
|
+
partial_sequence,
|
43
|
+
ParkingProperty("Parking-S-14-PC--GI-11-v0").str_to_goal(),
|
44
|
+
0.5,
|
45
|
+
)
|
46
|
+
print(
|
47
|
+
f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: 11"
|
48
|
+
)
|
49
|
+
|
50
|
+
|
51
|
+
if __name__ == "__main__":
|
52
|
+
run_graml_parking_tutorial()
|
@@ -0,0 +1,60 @@
|
|
1
|
+
from stable_baselines3 import SAC, TD3
|
2
|
+
|
3
|
+
from gr_libs.environment._utils.utils import domain_to_env_property
|
4
|
+
from gr_libs.environment.environment import POINT_MAZE, PointMazeProperty
|
5
|
+
from gr_libs.metrics.metrics import stochastic_amplified_selection
|
6
|
+
from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
|
7
|
+
from gr_libs.ml.utils.format import random_subset_with_order
|
8
|
+
from gr_libs.recognizer.graml.graml_recognizer import ExpertBasedGraml
|
9
|
+
|
10
|
+
|
11
|
+
def run_graml_point_maze_tutorial():
|
12
|
+
recognizer = ExpertBasedGraml(
|
13
|
+
domain_name=POINT_MAZE, env_name="PointMaze-FourRoomsEnvDense-11x11"
|
14
|
+
)
|
15
|
+
|
16
|
+
recognizer.domain_learning_phase(
|
17
|
+
[(9, 1), (9, 9), (1, 9), (3, 3), (3, 4), (8, 2), (3, 7), (2, 8)],
|
18
|
+
[(SAC, 200000) for _ in range(8)],
|
19
|
+
)
|
20
|
+
|
21
|
+
recognizer.goals_adaptation_phase(
|
22
|
+
dynamic_goals=[(4, 4), (7, 3), (3, 7)],
|
23
|
+
dynamic_train_configs=[
|
24
|
+
(SAC, 200000) for _ in range(3)
|
25
|
+
], # for expert sequence generation.
|
26
|
+
)
|
27
|
+
|
28
|
+
property_type = domain_to_env_property(POINT_MAZE)
|
29
|
+
env_property = property_type("PointMaze-FourRoomsEnvDense-11x11")
|
30
|
+
|
31
|
+
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
32
|
+
actor = DeepRLAgent(
|
33
|
+
domain_name="point_maze",
|
34
|
+
problem_name="PointMaze-FourRoomsEnvDense-11x11-Goal-4x4",
|
35
|
+
env_prop=env_property,
|
36
|
+
algorithm=TD3,
|
37
|
+
num_timesteps=200000,
|
38
|
+
)
|
39
|
+
actor.learn()
|
40
|
+
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
41
|
+
full_sequence = actor.generate_observation(
|
42
|
+
action_selection_method=stochastic_amplified_selection,
|
43
|
+
random_optimalism=True, # the noise that's added to the actions
|
44
|
+
)
|
45
|
+
|
46
|
+
partial_sequence = random_subset_with_order(
|
47
|
+
full_sequence, (int)(0.5 * len(full_sequence))
|
48
|
+
)
|
49
|
+
closest_goal = recognizer.inference_phase(
|
50
|
+
partial_sequence,
|
51
|
+
PointMazeProperty("PointMaze-FourRoomsEnvDense-11x11-Goal-4x4").str_to_goal(),
|
52
|
+
0.5,
|
53
|
+
)
|
54
|
+
print(
|
55
|
+
f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (4, 4)"
|
56
|
+
)
|
57
|
+
|
58
|
+
|
59
|
+
if __name__ == "__main__":
|
60
|
+
run_graml_point_maze_tutorial()
|
@@ -0,0 +1,50 @@
|
|
1
|
+
from gr_libs import Graql
|
2
|
+
from gr_libs.environment._utils.utils import domain_to_env_property
|
3
|
+
from gr_libs.environment.environment import MINIGRID, QLEARNING
|
4
|
+
from gr_libs.metrics.metrics import stochastic_amplified_selection
|
5
|
+
from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
|
6
|
+
from gr_libs.ml.utils.format import random_subset_with_order
|
7
|
+
|
8
|
+
|
9
|
+
def run_graql_minigrid_tutorial():
|
10
|
+
recognizer = Graql(domain_name="minigrid", env_name="MiniGrid-SimpleCrossingS13N4")
|
11
|
+
|
12
|
+
# Graql doesn't have a domain learning phase, so we skip it
|
13
|
+
|
14
|
+
recognizer.goals_adaptation_phase(
|
15
|
+
dynamic_goals=[(11, 1), (11, 11), (1, 11)],
|
16
|
+
dynamic_train_configs=[
|
17
|
+
(QLEARNING, 100000) for _ in range(3)
|
18
|
+
], # for expert sequence generation.
|
19
|
+
)
|
20
|
+
|
21
|
+
property_type = domain_to_env_property(MINIGRID)
|
22
|
+
env_property = property_type("MiniGrid-SimpleCrossingS13N4")
|
23
|
+
|
24
|
+
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
25
|
+
actor = TabularQLearner(
|
26
|
+
domain_name="minigrid",
|
27
|
+
problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0",
|
28
|
+
env_prop=env_property,
|
29
|
+
algorithm=QLEARNING,
|
30
|
+
num_timesteps=100000,
|
31
|
+
)
|
32
|
+
actor.learn()
|
33
|
+
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
34
|
+
full_sequence = actor.generate_observation(
|
35
|
+
action_selection_method=stochastic_amplified_selection,
|
36
|
+
random_optimalism=True, # the noise that's added to the actions
|
37
|
+
)
|
38
|
+
|
39
|
+
partial_sequence = random_subset_with_order(
|
40
|
+
full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False
|
41
|
+
)
|
42
|
+
closest_goal = recognizer.inference_phase(partial_sequence, (11, 1), 0.5)
|
43
|
+
print(
|
44
|
+
f"closest_goal returned by Graql: {closest_goal}\nactual goal actor aimed towards: (11, 1)"
|
45
|
+
)
|
46
|
+
return closest_goal, (11, 1)
|
47
|
+
|
48
|
+
|
49
|
+
if __name__ == "__main__":
|
50
|
+
run_graql_minigrid_tutorial()
|