gr-libs 0.1.7.post0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. gr_libs/__init__.py +4 -1
  2. gr_libs/_evaluation/__init__.py +1 -0
  3. gr_libs/_evaluation/_analyze_results_cross_alg_cross_domain.py +260 -0
  4. gr_libs/_evaluation/_generate_experiments_results.py +141 -0
  5. gr_libs/_evaluation/_generate_task_specific_statistics_plots.py +497 -0
  6. gr_libs/_evaluation/_get_plans_images.py +61 -0
  7. gr_libs/_evaluation/_increasing_and_decreasing_.py +106 -0
  8. gr_libs/_version.py +2 -2
  9. gr_libs/all_experiments.py +294 -0
  10. gr_libs/environment/__init__.py +30 -9
  11. gr_libs/environment/_utils/utils.py +27 -0
  12. gr_libs/environment/environment.py +417 -54
  13. gr_libs/metrics/__init__.py +7 -0
  14. gr_libs/metrics/metrics.py +231 -54
  15. gr_libs/ml/__init__.py +2 -5
  16. gr_libs/ml/agent.py +21 -6
  17. gr_libs/ml/base/__init__.py +3 -1
  18. gr_libs/ml/base/rl_agent.py +81 -13
  19. gr_libs/ml/consts.py +1 -1
  20. gr_libs/ml/neural/__init__.py +1 -3
  21. gr_libs/ml/neural/deep_rl_learner.py +619 -378
  22. gr_libs/ml/neural/utils/__init__.py +1 -2
  23. gr_libs/ml/neural/utils/dictlist.py +3 -3
  24. gr_libs/ml/planner/mcts/{utils → _utils}/__init__.py +1 -1
  25. gr_libs/ml/planner/mcts/{utils → _utils}/node.py +11 -7
  26. gr_libs/ml/planner/mcts/{utils → _utils}/tree.py +15 -11
  27. gr_libs/ml/planner/mcts/mcts_model.py +571 -312
  28. gr_libs/ml/sequential/__init__.py +0 -1
  29. gr_libs/ml/sequential/_lstm_model.py +270 -0
  30. gr_libs/ml/tabular/__init__.py +1 -3
  31. gr_libs/ml/tabular/state.py +7 -7
  32. gr_libs/ml/tabular/tabular_q_learner.py +150 -82
  33. gr_libs/ml/tabular/tabular_rl_agent.py +42 -28
  34. gr_libs/ml/utils/__init__.py +2 -3
  35. gr_libs/ml/utils/format.py +28 -97
  36. gr_libs/ml/utils/math.py +5 -3
  37. gr_libs/ml/utils/other.py +3 -3
  38. gr_libs/ml/utils/storage.py +88 -81
  39. gr_libs/odgr_executor.py +268 -0
  40. gr_libs/problems/consts.py +1549 -1227
  41. gr_libs/recognizer/_utils/__init__.py +0 -0
  42. gr_libs/recognizer/_utils/format.py +18 -0
  43. gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +233 -88
  44. gr_libs/recognizer/graml/_gr_dataset.py +233 -0
  45. gr_libs/recognizer/graml/graml_recognizer.py +586 -252
  46. gr_libs/recognizer/recognizer.py +90 -30
  47. gr_libs/tutorials/draco_panda_tutorial.py +58 -0
  48. gr_libs/tutorials/draco_parking_tutorial.py +56 -0
  49. gr_libs/tutorials/gcdraco_panda_tutorial.py +62 -0
  50. gr_libs/tutorials/gcdraco_parking_tutorial.py +57 -0
  51. gr_libs/tutorials/graml_minigrid_tutorial.py +64 -0
  52. gr_libs/tutorials/graml_panda_tutorial.py +57 -0
  53. gr_libs/tutorials/graml_parking_tutorial.py +52 -0
  54. gr_libs/tutorials/graml_point_maze_tutorial.py +60 -0
  55. gr_libs/tutorials/graql_minigrid_tutorial.py +50 -0
  56. {gr_libs-0.1.7.post0.dist-info → gr_libs-0.2.2.dist-info}/METADATA +84 -29
  57. gr_libs-0.2.2.dist-info/RECORD +71 -0
  58. {gr_libs-0.1.7.post0.dist-info → gr_libs-0.2.2.dist-info}/WHEEL +1 -1
  59. gr_libs-0.2.2.dist-info/top_level.txt +2 -0
  60. tests/test_draco.py +14 -0
  61. tests/test_gcdraco.py +10 -0
  62. tests/test_graml.py +12 -8
  63. tests/test_graql.py +3 -2
  64. evaluation/analyze_results_cross_alg_cross_domain.py +0 -277
  65. evaluation/create_minigrid_map_image.py +0 -34
  66. evaluation/file_system.py +0 -42
  67. evaluation/generate_experiments_results.py +0 -92
  68. evaluation/generate_experiments_results_new_ver1.py +0 -254
  69. evaluation/generate_experiments_results_new_ver2.py +0 -331
  70. evaluation/generate_task_specific_statistics_plots.py +0 -272
  71. evaluation/get_plans_images.py +0 -47
  72. evaluation/increasing_and_decreasing_.py +0 -63
  73. gr_libs/environment/utils/utils.py +0 -17
  74. gr_libs/ml/neural/utils/penv.py +0 -57
  75. gr_libs/ml/sequential/lstm_model.py +0 -192
  76. gr_libs/recognizer/graml/gr_dataset.py +0 -134
  77. gr_libs/recognizer/utils/__init__.py +0 -1
  78. gr_libs/recognizer/utils/format.py +0 -13
  79. gr_libs-0.1.7.post0.dist-info/RECORD +0 -67
  80. gr_libs-0.1.7.post0.dist-info/top_level.txt +0 -4
  81. tutorials/graml_minigrid_tutorial.py +0 -34
  82. tutorials/graml_panda_tutorial.py +0 -41
  83. tutorials/graml_parking_tutorial.py +0 -39
  84. tutorials/graml_point_maze_tutorial.py +0 -39
  85. tutorials/graql_minigrid_tutorial.py +0 -34
  86. /gr_libs/environment/{utils → _utils}/__init__.py +0 -0
@@ -1,45 +1,105 @@
1
1
  from abc import ABC, abstractmethod
2
- from typing import List, Type
3
- from gr_libs.environment.environment import EnvProperty, SUPPORTED_DOMAINS
4
- from gr_libs.environment.utils.utils import domain_to_env_property
2
+
3
+ from gr_libs.environment._utils.utils import domain_to_env_property
4
+ from gr_libs.environment.environment import SUPPORTED_DOMAINS
5
5
  from gr_libs.ml.base.rl_agent import RLAgent
6
6
 
7
+
7
8
  class Recognizer(ABC):
8
- def __init__(self, domain_name: str, env_name:str, collect_statistics=False, rl_agent_type: Type[RLAgent]=None):
9
- assert domain_name in SUPPORTED_DOMAINS
10
- self.rl_agent_type = rl_agent_type
11
- self.domain_name = domain_name
12
- self.env_prop_type = domain_to_env_property(self.domain_name)
13
- self.env_prop = self.env_prop_type(env_name)
14
- self.collect_statistics = collect_statistics
15
-
16
- @abstractmethod
17
- def inference_phase(self, inf_sequence, true_goal, percentage) -> str:
18
- pass
9
+ def __init__(
10
+ self,
11
+ domain_name: str,
12
+ env_name: str,
13
+ collect_statistics=False,
14
+ rl_agent_type: type[RLAgent] = None,
15
+ **kwargs,
16
+ ):
17
+ assert domain_name in SUPPORTED_DOMAINS
18
+ self.rl_agent_type = rl_agent_type
19
+ self.domain_name = domain_name
20
+ self.env_prop_type = domain_to_env_property(self.domain_name)
21
+ self.env_prop = self.env_prop_type(env_name)
22
+ self.collect_statistics = collect_statistics
23
+
24
+ @abstractmethod
25
+ def inference_phase(self, inf_sequence, true_goal, percentage) -> str:
26
+ pass
27
+
19
28
 
20
29
  class LearningRecognizer(Recognizer):
21
- def __init__(self, *args, **kwargs):
22
- super().__init__(*args, **kwargs)
30
+ """
31
+ A class that represents a learning recognizer.
32
+
33
+ Inherits from the Recognizer class.
34
+ """
35
+
36
+ def __init__(self, *args, **kwargs):
37
+ super().__init__(*args, **kwargs)
38
+
39
+ def domain_learning_phase(self, base_goals: list[str], train_configs: list):
40
+ """
41
+ Perform the domain learning phase.
42
+
43
+ Args:
44
+ base_goals (List[str]): The base goals for the learning phase.
45
+ train_configs (List): The training configurations.
46
+
47
+ """
48
+ self.original_train_configs = train_configs
23
49
 
24
- def domain_learning_phase(self, base_goals: List[str], train_configs: List):
25
- self.original_train_configs = train_configs
26
50
 
27
51
  # a recognizer that needs to train agents for every new goal as part of the goal adaptation phase (that's why it needs dynamic train configs)
28
52
  class GaAgentTrainerRecognizer(Recognizer):
29
- def __init__(self, *args, **kwargs):
30
- super().__init__(*args, **kwargs)
53
+ """
54
+ A class representing a recognizer for GaAgentTrainer.
55
+ """
31
56
 
32
- @abstractmethod
33
- def goals_adaptation_phase(self, dynamic_goals: List[str], dynamic_train_configs):
34
- pass
57
+ def __init__(self, *args, **kwargs):
58
+ super().__init__(*args, **kwargs)
59
+
60
+ @abstractmethod
61
+ def goals_adaptation_phase(self, dynamic_goals: list[str], dynamic_train_configs):
62
+ """
63
+ Perform the goals adaptation phase.
64
+
65
+ Args:
66
+ dynamic_goals (List[str]): The list of dynamic goals.
67
+ dynamic_train_configs: The dynamic training configurations.
68
+
69
+ Returns:
70
+ None
71
+ """
72
+
73
+ def domain_learning_phase(self, base_goals: list[str], train_configs: list):
74
+ """
75
+ Perform the domain learning phase.
76
+
77
+ Args:
78
+ base_goals (List[str]): List of base goals.
79
+ train_configs (List): List of training configurations.
80
+
81
+ Returns:
82
+ None
83
+ """
84
+ super().domain_learning_phase(base_goals, train_configs)
35
85
 
36
- def domain_learning_phase(self, base_goals: List[str], train_configs: List):
37
- super().domain_learning_phase(base_goals, train_configs)
38
86
 
39
87
  class GaAdaptingRecognizer(Recognizer):
40
- def __init__(self, *args, **kwargs):
41
- super().__init__(*args, **kwargs)
88
+ """
89
+ A recognizer that doesn't require more training given a set of new goals, hence it doesn't receive train configs in the goal adaptation phase.
90
+ """
91
+
92
+ def __init__(self, *args, **kwargs):
93
+ super().__init__(*args, **kwargs)
94
+
95
+ @abstractmethod
96
+ def goals_adaptation_phase(self, dynamic_goals: list[str]):
97
+ """
98
+ Perform the goals adaptation phase.
99
+
100
+ Args:
101
+ dynamic_goals (List[str]): A list of dynamic goals to be adapted.
42
102
 
43
- @abstractmethod
44
- def goals_adaptation_phase(self, dynamic_goals: List[str]):
45
- pass
103
+ Returns:
104
+ None
105
+ """
@@ -0,0 +1,58 @@
1
+ import numpy as np
2
+ from stable_baselines3 import PPO
3
+
4
+ from gr_libs import Draco
5
+ from gr_libs.environment._utils.utils import domain_to_env_property
6
+ from gr_libs.environment.environment import PANDA
7
+ from gr_libs.metrics import mean_wasserstein_distance
8
+ from gr_libs.metrics.metrics import stochastic_amplified_selection
9
+ from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
10
+ from gr_libs.ml.utils.format import random_subset_with_order
11
+
12
+
13
+ def run_draco_panda_tutorial():
14
+ recognizer = Draco(
15
+ domain_name=PANDA,
16
+ env_name="PandaMyReachDense",
17
+ evaluation_function=mean_wasserstein_distance, # or mean_p_value
18
+ )
19
+
20
+ recognizer.goals_adaptation_phase(
21
+ dynamic_goals=[
22
+ np.array([[-0.1, -0.1, 0.1]]),
23
+ np.array([[-0.1, 0.1, 0.1]]),
24
+ np.array([[0.2, 0.2, 0.1]]),
25
+ ],
26
+ dynamic_train_configs=[(PPO, 200000), (PPO, 200000), (PPO, 200000)],
27
+ )
28
+ # TD3 is different from recognizer and expert algorithms, which are SAC #
29
+ property_type = domain_to_env_property(PANDA)
30
+ env_property = property_type("PandaMyReachDense")
31
+ problem_name = env_property.goal_to_problem_str(np.array([[-0.1, -0.1, 0.1]]))
32
+ actor = DeepRLAgent(
33
+ domain_name=PANDA,
34
+ problem_name=problem_name,
35
+ env_prop=env_property,
36
+ algorithm=PPO,
37
+ num_timesteps=400000,
38
+ )
39
+ actor.learn()
40
+ # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
41
+ full_sequence = actor.generate_observation(
42
+ action_selection_method=stochastic_amplified_selection,
43
+ random_optimalism=True, # the noise that's added to the actions
44
+ with_dict=True,
45
+ )
46
+ partial_sequence = random_subset_with_order(
47
+ full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False
48
+ )
49
+ closest_goal = recognizer.inference_phase(
50
+ partial_sequence, np.array([[-0.1, -0.1, 0.1]]), 0.5
51
+ )
52
+ print(
53
+ f"closest_goal returned by DRACO: {closest_goal}\nactual goal actor aimed towards: [-0.1, -0.1, 0.1]"
54
+ )
55
+
56
+
57
+ if __name__ == "__main__":
58
+ run_draco_panda_tutorial()
@@ -0,0 +1,56 @@
1
+ from stable_baselines3 import SAC, TD3
2
+
3
+ from gr_libs import Draco
4
+ from gr_libs.environment._utils.utils import domain_to_env_property
5
+ from gr_libs.environment.environment import PARKING, ParkingProperty
6
+ from gr_libs.metrics import mean_wasserstein_distance
7
+ from gr_libs.metrics.metrics import stochastic_amplified_selection
8
+ from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
9
+ from gr_libs.ml.utils.format import random_subset_with_order
10
+
11
+
12
+ def run_draco_parking_tutorial():
13
+ recognizer = Draco(
14
+ domain_name=PARKING,
15
+ env_name="Parking-S-14-PC-",
16
+ evaluation_function=mean_wasserstein_distance, # or mean_p_value
17
+ )
18
+
19
+ recognizer.goals_adaptation_phase(
20
+ dynamic_goals=["1", "11", "21"],
21
+ dynamic_train_configs=[(SAC, 200000), (SAC, 200000), (SAC, 200000)],
22
+ )
23
+
24
+ property_type = domain_to_env_property(PARKING)
25
+ env_property = property_type("Parking-S-14-PC-")
26
+ # TD3 is different from recognizer and expert algorithms, which are SAC #
27
+ actor = DeepRLAgent(
28
+ domain_name="parking",
29
+ problem_name="Parking-S-14-PC--GI-11-v0",
30
+ env_prop=env_property,
31
+ algorithm=TD3,
32
+ num_timesteps=400000,
33
+ )
34
+ actor.learn()
35
+
36
+ full_sequence = actor.generate_observation(
37
+ action_selection_method=stochastic_amplified_selection,
38
+ random_optimalism=True, # the noise that's added to the actions
39
+ with_dict=True,
40
+ )
41
+
42
+ partial_sequence = random_subset_with_order(
43
+ full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False
44
+ )
45
+ closest_goal = recognizer.inference_phase(
46
+ partial_sequence,
47
+ ParkingProperty("Parking-S-14-PC--GI-11-v0").str_to_goal(),
48
+ 0.5,
49
+ )
50
+ print(
51
+ f"closest_goal returned by GCDRACO: {closest_goal}\nactual goal actor aimed towards: 11"
52
+ )
53
+
54
+
55
+ if __name__ == "__main__":
56
+ run_draco_parking_tutorial()
@@ -0,0 +1,62 @@
1
+ import numpy as np
2
+ from stable_baselines3 import PPO, SAC
3
+
4
+ from gr_libs import GCDraco
5
+ from gr_libs.environment._utils.utils import domain_to_env_property
6
+ from gr_libs.environment.environment import PANDA, PandaProperty
7
+ from gr_libs.metrics import mean_wasserstein_distance, stochastic_amplified_selection
8
+ from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
9
+ from gr_libs.ml.utils.format import random_subset_with_order
10
+
11
+
12
+ def run_gcdraco_panda_tutorial():
13
+ recognizer = GCDraco(
14
+ domain_name=PANDA,
15
+ env_name="PandaMyReachDense",
16
+ evaluation_function=mean_wasserstein_distance, # or mean_p_value
17
+ )
18
+
19
+ recognizer.domain_learning_phase(
20
+ base_goals=[np.array([PandaProperty.sample_goal()]) for _ in range(30)],
21
+ train_configs=[(SAC, 800000)],
22
+ )
23
+
24
+ recognizer.goals_adaptation_phase(
25
+ dynamic_goals=[
26
+ np.array([[-0.1, -0.1, 0.1]]),
27
+ np.array([[-0.1, 0.1, 0.1]]),
28
+ np.array([[0.2, 0.2, 0.1]]),
29
+ ],
30
+ )
31
+
32
+ # TD3 is different from recognizer and expert algorithms, which are SAC #
33
+ property_type = domain_to_env_property(PANDA)
34
+ env_property = property_type("PandaMyReachDense")
35
+ problem_name = env_property.goal_to_problem_str(np.array([[-0.1, -0.1, 0.1]]))
36
+ actor = DeepRLAgent(
37
+ domain_name=PANDA,
38
+ problem_name=problem_name,
39
+ env_prop=env_property,
40
+ algorithm=PPO,
41
+ num_timesteps=400000,
42
+ )
43
+ actor.learn()
44
+ # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
45
+ full_sequence = actor.generate_observation(
46
+ action_selection_method=stochastic_amplified_selection,
47
+ random_optimalism=True, # the noise that's added to the actions
48
+ with_dict=True,
49
+ )
50
+ partial_sequence = random_subset_with_order(
51
+ full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False
52
+ )
53
+ closest_goal = recognizer.inference_phase(
54
+ partial_sequence, np.array([[-0.1, -0.1, 0.1]]), 0.5
55
+ )
56
+ print(
57
+ f"closest_goal returned by GCDRACO: {closest_goal}\nactual goal actor aimed towards: [-0.1, -0.1, 0.1]"
58
+ )
59
+
60
+
61
+ if __name__ == "__main__":
62
+ run_gcdraco_panda_tutorial()
@@ -0,0 +1,57 @@
1
+ from stable_baselines3 import PPO, TD3
2
+
3
+ from gr_libs import GCDraco
4
+ from gr_libs.environment._utils.utils import domain_to_env_property
5
+ from gr_libs.environment.environment import PARKING, ParkingProperty
6
+ from gr_libs.metrics import mean_wasserstein_distance, stochastic_amplified_selection
7
+ from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
8
+ from gr_libs.ml.utils.format import random_subset_with_order
9
+
10
+
11
+ def run_gcdraco_parking_tutorial():
12
+ recognizer = GCDraco(
13
+ domain_name=PARKING,
14
+ env_name="Parking-S-14-PC-",
15
+ evaluation_function=mean_wasserstein_distance, # or mean_p_value
16
+ )
17
+
18
+ recognizer.domain_learning_phase([i for i in range(1, 21)], [(PPO, 200000)])
19
+ recognizer.goals_adaptation_phase(
20
+ dynamic_goals=["1", "11", "21"]
21
+ # no need for expert sequence generation since GCRL is used
22
+ )
23
+
24
+ property_type = domain_to_env_property(PARKING)
25
+ env_property = property_type("Parking-S-14-PC-")
26
+
27
+ # TD3 is different from recognizer and expert algorithms, which are SAC #
28
+ actor = DeepRLAgent(
29
+ domain_name="parking",
30
+ problem_name="Parking-S-14-PC--GI-11-v0",
31
+ env_prop=env_property,
32
+ algorithm=TD3,
33
+ num_timesteps=400000,
34
+ )
35
+ actor.learn()
36
+ # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
37
+ full_sequence = actor.generate_observation(
38
+ action_selection_method=stochastic_amplified_selection,
39
+ random_optimalism=True, # the noise that's added to the actions
40
+ with_dict=True,
41
+ )
42
+
43
+ partial_sequence = random_subset_with_order(
44
+ full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False
45
+ )
46
+ closest_goal = recognizer.inference_phase(
47
+ partial_sequence,
48
+ ParkingProperty("Parking-S-14-PC--GI-11-v0").str_to_goal(),
49
+ 0.5,
50
+ )
51
+ print(
52
+ f"closest_goal returned by GCDRACO: {closest_goal}\nactual goal actor aimed towards: 11"
53
+ )
54
+
55
+
56
+ if __name__ == "__main__":
57
+ run_gcdraco_parking_tutorial()
@@ -0,0 +1,64 @@
1
+ from gr_libs import ExpertBasedGraml
2
+ from gr_libs.environment._utils.utils import domain_to_env_property
3
+ from gr_libs.environment.environment import MINIGRID, QLEARNING
4
+ from gr_libs.metrics.metrics import stochastic_amplified_selection
5
+ from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
6
+ from gr_libs.ml.utils.format import random_subset_with_order
7
+
8
+
9
+ def run_graml_minigrid_tutorial():
10
+ recognizer = ExpertBasedGraml(
11
+ domain_name=MINIGRID, env_name="MiniGrid-SimpleCrossingS13N4"
12
+ )
13
+
14
+ recognizer.domain_learning_phase(
15
+ base_goals=[
16
+ (11, 1),
17
+ (11, 11),
18
+ (1, 11),
19
+ (7, 11),
20
+ (8, 1),
21
+ (10, 6),
22
+ (6, 9),
23
+ (11, 3),
24
+ (11, 5),
25
+ ],
26
+ train_configs=[(QLEARNING, 100000) for _ in range(9)],
27
+ )
28
+
29
+ recognizer.goals_adaptation_phase(
30
+ dynamic_goals=[(11, 1), (11, 11), (1, 11)],
31
+ dynamic_train_configs=[
32
+ (QLEARNING, 100000) for _ in range(3)
33
+ ], # for expert sequence generation.
34
+ )
35
+
36
+ property_type = domain_to_env_property(MINIGRID)
37
+ env_property = property_type("MiniGrid-SimpleCrossingS13N4")
38
+
39
+ # TD3 is different from recognizer and expert algorithms, which are SAC #
40
+ actor = TabularQLearner(
41
+ domain_name="minigrid",
42
+ problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0",
43
+ env_prop=env_property,
44
+ algorithm=QLEARNING,
45
+ num_timesteps=100000,
46
+ )
47
+ actor.learn()
48
+ # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
49
+ full_sequence = actor.generate_observation(
50
+ action_selection_method=stochastic_amplified_selection,
51
+ random_optimalism=True, # the noise that's added to the actions
52
+ )
53
+
54
+ partial_sequence = random_subset_with_order(
55
+ full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False
56
+ )
57
+ closest_goal = recognizer.inference_phase(partial_sequence, (11, 1), 0.5)
58
+ print(
59
+ f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (11, 1)"
60
+ )
61
+
62
+
63
+ if __name__ == "__main__":
64
+ run_graml_minigrid_tutorial()
@@ -0,0 +1,57 @@
1
+ import numpy as np
2
+ from stable_baselines3 import PPO, SAC
3
+
4
+ from gr_libs import GCGraml
5
+ from gr_libs.environment._utils.utils import domain_to_env_property
6
+ from gr_libs.environment.environment import PANDA, PandaProperty
7
+ from gr_libs.metrics.metrics import stochastic_amplified_selection
8
+ from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
9
+ from gr_libs.ml.utils.format import random_subset_with_order
10
+
11
+
12
+ def run_graml_panda_tutorial():
13
+ recognizer = GCGraml( # TODO make these tutorials into pytests
14
+ domain_name=PANDA, env_name="PandaMyReachDense"
15
+ )
16
+ recognizer.domain_learning_phase(
17
+ base_goals=[np.array([PandaProperty.sample_goal()]) for _ in range(1, 30)],
18
+ train_configs=[(SAC, 800000)],
19
+ )
20
+ recognizer.goals_adaptation_phase(
21
+ dynamic_goals=[
22
+ np.array([[-0.1, -0.1, 0.1]]),
23
+ np.array([[-0.1, 0.1, 0.1]]),
24
+ np.array([[0.2, 0.2, 0.1]]),
25
+ ]
26
+ )
27
+ # TD3 is different from recognizer and expert algorithms, which are SAC #
28
+ property_type = domain_to_env_property(PANDA)
29
+ env_property = property_type("PandaMyReachDense")
30
+ problem_name = env_property.goal_to_problem_str(np.array([[-0.1, -0.1, 0.1]]))
31
+ actor = DeepRLAgent(
32
+ domain_name=PANDA,
33
+ problem_name=problem_name,
34
+ env_prop=env_property,
35
+ algorithm=PPO,
36
+ num_timesteps=400000,
37
+ )
38
+ actor.learn()
39
+ # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
40
+ full_sequence = actor.generate_observation(
41
+ action_selection_method=stochastic_amplified_selection,
42
+ random_optimalism=True, # the noise that's added to the actions
43
+ )
44
+
45
+ partial_sequence = random_subset_with_order(
46
+ full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False
47
+ )
48
+ closest_goal = recognizer.inference_phase(
49
+ partial_sequence, np.array([[-0.1, -0.1, 0.1]]), 0.5
50
+ )
51
+ print(
52
+ f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: [-0.1, -0.1, 0.1]"
53
+ )
54
+
55
+
56
+ if __name__ == "__main__":
57
+ run_graml_panda_tutorial()
@@ -0,0 +1,52 @@
1
+ from stable_baselines3 import PPO, TD3
2
+
3
+ from gr_libs.environment._utils.utils import domain_to_env_property
4
+ from gr_libs.environment.environment import PARKING, ParkingProperty
5
+ from gr_libs.metrics.metrics import stochastic_amplified_selection
6
+ from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
7
+ from gr_libs.ml.utils.format import random_subset_with_order
8
+ from gr_libs.recognizer.graml.graml_recognizer import GCGraml
9
+
10
+
11
+ def run_graml_parking_tutorial():
12
+ recognizer = GCGraml(domain_name=PARKING, env_name="Parking-S-14-PC-")
13
+
14
+ recognizer.domain_learning_phase([i for i in range(1, 21)], [(PPO, 200000)])
15
+ recognizer.goals_adaptation_phase(
16
+ dynamic_goals=["1", "11", "21"]
17
+ # no need for expert sequence generation since GCRL is used
18
+ )
19
+
20
+ property_type = domain_to_env_property(PARKING)
21
+ env_property = property_type("Parking-S-14-PC-")
22
+
23
+ # TD3 is different from recognizer and expert algorithms, which are SAC #
24
+ actor = DeepRLAgent(
25
+ domain_name="parking",
26
+ problem_name="Parking-S-14-PC--GI-11-v0",
27
+ env_prop=env_property,
28
+ algorithm=TD3,
29
+ num_timesteps=400000,
30
+ )
31
+ actor.learn()
32
+ # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
33
+ full_sequence = actor.generate_observation(
34
+ action_selection_method=stochastic_amplified_selection,
35
+ random_optimalism=True, # the noise that's added to the actions
36
+ )
37
+
38
+ partial_sequence = random_subset_with_order(
39
+ full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False
40
+ )
41
+ closest_goal = recognizer.inference_phase(
42
+ partial_sequence,
43
+ ParkingProperty("Parking-S-14-PC--GI-11-v0").str_to_goal(),
44
+ 0.5,
45
+ )
46
+ print(
47
+ f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: 11"
48
+ )
49
+
50
+
51
+ if __name__ == "__main__":
52
+ run_graml_parking_tutorial()
@@ -0,0 +1,60 @@
1
+ from stable_baselines3 import SAC, TD3
2
+
3
+ from gr_libs.environment._utils.utils import domain_to_env_property
4
+ from gr_libs.environment.environment import POINT_MAZE, PointMazeProperty
5
+ from gr_libs.metrics.metrics import stochastic_amplified_selection
6
+ from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
7
+ from gr_libs.ml.utils.format import random_subset_with_order
8
+ from gr_libs.recognizer.graml.graml_recognizer import ExpertBasedGraml
9
+
10
+
11
+ def run_graml_point_maze_tutorial():
12
+ recognizer = ExpertBasedGraml(
13
+ domain_name=POINT_MAZE, env_name="PointMaze-FourRoomsEnvDense-11x11"
14
+ )
15
+
16
+ recognizer.domain_learning_phase(
17
+ [(9, 1), (9, 9), (1, 9), (3, 3), (3, 4), (8, 2), (3, 7), (2, 8)],
18
+ [(SAC, 200000) for _ in range(8)],
19
+ )
20
+
21
+ recognizer.goals_adaptation_phase(
22
+ dynamic_goals=[(4, 4), (7, 3), (3, 7)],
23
+ dynamic_train_configs=[
24
+ (SAC, 200000) for _ in range(3)
25
+ ], # for expert sequence generation.
26
+ )
27
+
28
+ property_type = domain_to_env_property(POINT_MAZE)
29
+ env_property = property_type("PointMaze-FourRoomsEnvDense-11x11")
30
+
31
+ # TD3 is different from recognizer and expert algorithms, which are SAC #
32
+ actor = DeepRLAgent(
33
+ domain_name="point_maze",
34
+ problem_name="PointMaze-FourRoomsEnvDense-11x11-Goal-4x4",
35
+ env_prop=env_property,
36
+ algorithm=TD3,
37
+ num_timesteps=200000,
38
+ )
39
+ actor.learn()
40
+ # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
41
+ full_sequence = actor.generate_observation(
42
+ action_selection_method=stochastic_amplified_selection,
43
+ random_optimalism=True, # the noise that's added to the actions
44
+ )
45
+
46
+ partial_sequence = random_subset_with_order(
47
+ full_sequence, (int)(0.5 * len(full_sequence))
48
+ )
49
+ closest_goal = recognizer.inference_phase(
50
+ partial_sequence,
51
+ PointMazeProperty("PointMaze-FourRoomsEnvDense-11x11-Goal-4x4").str_to_goal(),
52
+ 0.5,
53
+ )
54
+ print(
55
+ f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (4, 4)"
56
+ )
57
+
58
+
59
+ if __name__ == "__main__":
60
+ run_graml_point_maze_tutorial()
@@ -0,0 +1,50 @@
1
+ from gr_libs import Graql
2
+ from gr_libs.environment._utils.utils import domain_to_env_property
3
+ from gr_libs.environment.environment import MINIGRID, QLEARNING
4
+ from gr_libs.metrics.metrics import stochastic_amplified_selection
5
+ from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
6
+ from gr_libs.ml.utils.format import random_subset_with_order
7
+
8
+
9
+ def run_graql_minigrid_tutorial():
10
+ recognizer = Graql(domain_name="minigrid", env_name="MiniGrid-SimpleCrossingS13N4")
11
+
12
+ # Graql doesn't have a domain learning phase, so we skip it
13
+
14
+ recognizer.goals_adaptation_phase(
15
+ dynamic_goals=[(11, 1), (11, 11), (1, 11)],
16
+ dynamic_train_configs=[
17
+ (QLEARNING, 100000) for _ in range(3)
18
+ ], # for expert sequence generation.
19
+ )
20
+
21
+ property_type = domain_to_env_property(MINIGRID)
22
+ env_property = property_type("MiniGrid-SimpleCrossingS13N4")
23
+
24
+ # TD3 is different from recognizer and expert algorithms, which are SAC #
25
+ actor = TabularQLearner(
26
+ domain_name="minigrid",
27
+ problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0",
28
+ env_prop=env_property,
29
+ algorithm=QLEARNING,
30
+ num_timesteps=100000,
31
+ )
32
+ actor.learn()
33
+ # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
34
+ full_sequence = actor.generate_observation(
35
+ action_selection_method=stochastic_amplified_selection,
36
+ random_optimalism=True, # the noise that's added to the actions
37
+ )
38
+
39
+ partial_sequence = random_subset_with_order(
40
+ full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False
41
+ )
42
+ closest_goal = recognizer.inference_phase(partial_sequence, (11, 1), 0.5)
43
+ print(
44
+ f"closest_goal returned by Graql: {closest_goal}\nactual goal actor aimed towards: (11, 1)"
45
+ )
46
+ return closest_goal, (11, 1)
47
+
48
+
49
+ if __name__ == "__main__":
50
+ run_graql_minigrid_tutorial()