gr-libs 0.1.4__py3-none-any.whl → 0.1.6.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -103,7 +103,6 @@ class Graml(LearningRecognizer):
103
103
  self.plans_dict[f"{true_goal}_true"] = true_sequence
104
104
 
105
105
  with open(embeddings_path + f'/{true_goal}_{percentage}_plans_dict.pkl', 'wb') as plans_file:
106
- # TODO erase AGENT_BASED macros
107
106
  to_dump = {}
108
107
  for goal, obss in self.plans_dict.items():
109
108
  if goal == f"{true_goal}_true":
@@ -243,7 +242,7 @@ class GCGraml(Graml, GaAdaptingRecognizer):
243
242
  if num_timesteps != None: kwargs["num_timesteps"] = num_timesteps
244
243
  gc_agent = self.rl_agent_type(**kwargs)
245
244
  gc_agent.learn()
246
- self.agents.append(ContextualAgent(problem_name=self.env_prop.name, problem_goal="general", agent=gc_agent)) # TODO change
245
+ self.agents.append(ContextualAgent(problem_name=self.env_prop.name, problem_goal="general", agent=gc_agent))
247
246
 
248
247
  def generate_sequences_library(self, goal: str) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
249
248
  problem_name = self.env_prop.goal_to_problem_str(goal)
@@ -1,6 +1,5 @@
1
1
  from abc import ABC, abstractmethod
2
2
  from typing import List, Type
3
-
4
3
  from gr_libs.environment.environment import EnvProperty, SUPPORTED_DOMAINS
5
4
  from gr_libs.environment.utils.utils import domain_to_env_property
6
5
  from gr_libs.ml.base.rl_agent import RLAgent
@@ -18,7 +17,7 @@ class Recognizer(ABC):
18
17
  def inference_phase(self, inf_sequence, true_goal, percentage) -> str:
19
18
  pass
20
19
 
21
- class LearningRecognizer(Recognizer): # TODO add a class diagram with the inheritance of all calsses
20
+ class LearningRecognizer(Recognizer):
22
21
  def __init__(self, *args, **kwargs):
23
22
  super().__init__(*args, **kwargs)
24
23
 
@@ -26,7 +25,7 @@ class LearningRecognizer(Recognizer): # TODO add a class diagram with the inheri
26
25
  self.original_train_configs = train_configs
27
26
 
28
27
  # a recognizer that needs to train agents for every new goal as part of the goal adaptation phase (that's why it needs dynamic train configs)
29
- class GaAgentTrainerRecognizer(Recognizer): # TODO add a class diagram with the inheritance of all calsses
28
+ class GaAgentTrainerRecognizer(Recognizer):
30
29
  def __init__(self, *args, **kwargs):
31
30
  super().__init__(*args, **kwargs)
32
31
 
@@ -37,7 +36,7 @@ class GaAgentTrainerRecognizer(Recognizer): # TODO add a class diagram with the
37
36
  def domain_learning_phase(self, base_goals: List[str], train_configs: List):
38
37
  super().domain_learning_phase(base_goals, train_configs)
39
38
 
40
- class GaAdaptingRecognizer(Recognizer): # TODO add a class diagram with the inheritance of all calsses
39
+ class GaAdaptingRecognizer(Recognizer):
41
40
  def __init__(self, *args, **kwargs):
42
41
  super().__init__(*args, **kwargs)
43
42
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gr_libs
3
- Version: 0.1.4
3
+ Version: 0.1.6.post1
4
4
  Summary: Package with goal recognition frameworks baselines
5
5
  Author: Ben Nageris
6
6
  Author-email: Matan Shamir <matan.shamir@live.biu.ac.il>, Osher Elhadad <osher.elhadad@live.biu.ac.il>
@@ -17,6 +17,7 @@ Requires-Dist: torchvision
17
17
  Requires-Dist: rl_zoo3
18
18
  Requires-Dist: stable_baselines3[extra]
19
19
  Requires-Dist: sb3_contrib
20
+ Requires-Dist: pytest
20
21
  Provides-Extra: minigrid
21
22
  Requires-Dist: gr_envs[minigrid]; extra == "minigrid"
22
23
  Provides-Extra: highway
@@ -111,6 +112,25 @@ After installing GRLib, you will have access to custom Gym environments, allowin
111
112
 
112
113
  Tutorials demonstrating basic ODGR scenarios is available in the sub-package `tutorials`. These tutorials walk through the initialization and deployment process, showcasing how different GR algorithms adapt to emerging goals in various Gym environments.
113
114
 
115
+ ## Working with an initial dataset of trained agents
116
+ gr_libs also includes a library of trained agents for the various supported environments within the package.
117
+ To get the dataset of trained agents, you can run:
118
+ ```sh
119
+ python download_dataset.py
120
+ ```
121
+
122
+ An alternative is to use our docker image, which includes the dataset in it.
123
+ You can:
124
+ 1. pull the image:
125
+ ```sh
126
+ docker pull ghcr.io/MatanShamir1/gr_test_base:latest
127
+ ```
128
+ 2. run a container:
129
+ ```sh
130
+ docker run -it ghcr.io/MatanShamir1/gr_test_base:latest bash
131
+ ```
132
+ 3. don't forget to install the package from within the container, go back to 'Setup' for that.
133
+
114
134
  ### Method 1: Writing a Custom Script
115
135
 
116
136
  1. **Create a recognizer**
@@ -118,6 +138,7 @@ Tutorials demonstrating basic ODGR scenarios is available in the sub-package `tu
118
138
  Specify the domain name and specific environment for the recognizer, effectively telling it the domain theory - the collection of states and actions in the environment.
119
139
 
120
140
  ```python
141
+ import gr_libs.environment # Triggers gym env registration - you must run it!
121
142
  recognizer = Graql(
122
143
  domain_name="minigrid",
123
144
  env_name="MiniGrid-SimpleCrossingS13N4"
@@ -1,3 +1,5 @@
1
+ CI/README.md,sha256=CbWNAWrXFFwYq3sWAORhoQIE5busoNyYh_rFWVH1enw,800
2
+ CI/docker_build_context/Dockerfile,sha256=Rk7LYTxOW7VVJcmNa8csZ4BwkunMYIiHX4WVSuMam50,311
1
3
  evaluation/analyze_results_cross_alg_cross_domain.py,sha256=s_DDh4rNfRnvQ0PDa2d5411jYOa7CaI1YeB8Dpup7QU,9803
2
4
  evaluation/create_minigrid_map_image.py,sha256=jaSW3n3tY222iFUeAMqedBP9cvD88GCzPrQ6_XHv5oQ,1242
3
5
  evaluation/file_system.py,sha256=SSYnj8QGFkq-8V_0s7x2MWbD88aFaoFY4Ogc_Pt8m6U,1601
@@ -8,19 +10,20 @@ evaluation/generate_task_specific_statistics_plots.py,sha256=rBsqaMe2irP_Cfo-icw
8
10
  evaluation/get_plans_images.py,sha256=BT-bGWuOPUAYpZVDwk7YMRBLdgKaDbNOBjMrtcl1Vjk,2346
9
11
  evaluation/increasing_and_decreasing_.py,sha256=fu1hkEjhOQC3jEsjiS7emW_UPRpVFCaae0d0E2MGZqI,2991
10
12
  gr_libs/__init__.py,sha256=-uKsQiHIL7yojbDwlTR-I8sj1WX9XT52PoFbPjtUTKo,145
11
- gr_libs/environment/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
- gr_libs/environment/environment.py,sha256=0-3kZJCmLMF9o0NignZaMEwQb94NZQ2gmsOyfjPXKDI,6919
13
+ gr_libs/_version.py,sha256=C8Me-BH17Mqlv65Ba3Tqc5gFEzabp8fxxyIA9C_XdDQ,517
14
+ gr_libs/environment/__init__.py,sha256=HFVGBcufWf8-ahCo6h_s2pFEyvDy59cFg8z908RgdYo,1038
15
+ gr_libs/environment/environment.py,sha256=d6ZbiAQ4H1aLrUFI8sm0BN9DVW3JtzpkodSi_70Z_PY,6780
13
16
  gr_libs/environment/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
17
  gr_libs/environment/utils/utils.py,sha256=4yM3s30KjyuEmWR8UuICE5rR03zsLi3tzqNDvBkdPcU,537
15
18
  gr_libs/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
- gr_libs/metrics/metrics.py,sha256=IYPL90tuxbTRaNLaFvgzd5SMUwS8gsSANuWZcSVuKkM,8737
19
+ gr_libs/metrics/metrics.py,sha256=4bnvs5suv-QrK9i1NuOzkE_E8uIzS1nlEazNDRXvZGs,8700
17
20
  gr_libs/ml/__init__.py,sha256=jrjxYqvSRgWwFWw7XQP9DzOwvmprMZ2umwT7t-DYtDU,233
18
21
  gr_libs/ml/agent.py,sha256=DSnK8nRx9SS76fAOZZEEvA68_meLjzm9lfQpMUXmGQU,1957
19
22
  gr_libs/ml/consts.py,sha256=mrbZk8n6QoGzLGaKmaxq4QlAsBbk4fhkCgXLuO9jXKw,365
20
23
  gr_libs/ml/base/__init__.py,sha256=MfIYhl_UqH8H7YoTCih8wBFA_gpTOUFq8Ph0_Nq0XQk,68
21
24
  gr_libs/ml/base/rl_agent.py,sha256=u9rnb-ma9iDM5b_BlwjcTJGSFezIGrxXINw6b-Dbl8s,1598
22
25
  gr_libs/ml/neural/__init__.py,sha256=g-0D5oFX8W52To4OR8vO8kDoBLSxAupVqwcQw8XjT5E,180
23
- gr_libs/ml/neural/deep_rl_learner.py,sha256=_d6LdbMPqN4qJlOI_UqSD7o0yzIa7EjRONdFSVYO_Ag,20677
26
+ gr_libs/ml/neural/deep_rl_learner.py,sha256=b41_b4GVlYqxhjrr1_YMcGdU9iwcMXsf3zH8D2kEucs,20659
24
27
  gr_libs/ml/neural/utils/__init__.py,sha256=bJgPfRnmfDQxdnb0OyRGwzgebEc1PnlO7-GpqszPBcc,106
25
28
  gr_libs/ml/neural/utils/dictlist.py,sha256=WpHfdWpVZ_T3PcSnOQUC--ro_tsS0dvam2WG3LcsHDw,1039
26
29
  gr_libs/ml/neural/utils/penv.py,sha256=R1uW8sePQqvTlJjpAuMx16eDU6TuGAjQF3hTR1QasMo,1862
@@ -42,22 +45,26 @@ gr_libs/ml/utils/format.py,sha256=nu7RzVwn_raG_fqqmnqlJgUjtA0yzKztkB3a5QZnRYo,30
42
45
  gr_libs/ml/utils/math.py,sha256=n62zssVOLHnUb4dPofAoFhoLOKl5n_xBzaKQOUQBoNc,440
43
46
  gr_libs/ml/utils/other.py,sha256=HKUfeLBbd4DgJxSTs3ya9KQ85Acx4TjycRrtGD9WQ3s,505
44
47
  gr_libs/ml/utils/storage.py,sha256=oCdvL_ypCglnSJsyyXzNyV_UJASTfioa3yJhFlFso64,4277
48
+ gr_libs/problems/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
+ gr_libs/problems/consts.py,sha256=ON7yfKTAKETg7i3okDYuOzEU7KWvynyubl0m7TlU6Hs,38808
45
50
  gr_libs/recognizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
- gr_libs/recognizer/recognizer.py,sha256=ysJYOGe5OlERMAeMwclKpwqw2tQvbSvGnLZrq4qP0xk,1895
51
+ gr_libs/recognizer/recognizer.py,sha256=ZrApJVdBQxKRYhhDiWLCNGmlxgi674nwgb30BgVggC8,1705
47
52
  gr_libs/recognizer/recognizer_doc.md,sha256=RnTvbZhl2opvU7-QT4pULCV5HCdJTw2dsu8WQOOiR3E,2521
48
53
  gr_libs/recognizer/gr_as_rl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
54
  gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py,sha256=84GdfohC2dZoNH_QEo7GpSt8nZWdfqSRKCTY99X_iME,5215
50
55
  gr_libs/recognizer/graml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
56
  gr_libs/recognizer/graml/gr_dataset.py,sha256=lG6m3ulxFELpH1oURnlcmNDWOrxyuzvlAR28ZTqB7L8,7224
52
- gr_libs/recognizer/graml/graml_recognizer.py,sha256=OblODjvWSi8KZ5ByvGyL5Mm1GY3IZb8yspfk81Dbebg,15721
57
+ gr_libs/recognizer/graml/graml_recognizer.py,sha256=SGs7rtkA73lbCv9HISa6dfjVUJUhlH54QriVsoGVRss,15672
53
58
  gr_libs/recognizer/utils/__init__.py,sha256=ewSroxL7aATvvm-Xzc1_-61mP2LU2U28YaOEqvVVDB0,41
54
59
  gr_libs/recognizer/utils/format.py,sha256=e0AnqtPeYoJsV9Z7cEBpgbzTM0hLNxFIjn07fQ3YbQw,492
55
- tutorials/graml_minigrid_tutorial.py,sha256=iLs7mOYNAZ5wtxtSwiE8tvbLMIueQ5VmVmDnBBjWG_4,1589
56
- tutorials/graml_panda_tutorial.py,sha256=DuHVDLe49qwgWouLxwalqdT1P4dlNOOMdgDc3ocNX5Y,1820
57
- tutorials/graml_parking_tutorial.py,sha256=sQ496DNuAo9GZ_0iUZ_6Hqe5zFxIYZ_pBIHQscQvR4o,2501
58
- tutorials/graml_point_maze_tutorial.py,sha256=TnLT9FdDj6AF8lm0lDIZum4ouPE5rye4RBH8z4Exj2Y,2713
59
- tutorials/graql_minigrid_tutorial.py,sha256=VoXbEgL_hjQLfau6WohXxPK8rrv1VLA874F8PZ7ZtPk,1421
60
- gr_libs-0.1.4.dist-info/METADATA,sha256=wH7aEvKh4kRTXBs75uPSBW87s23dgcsKRqgkVSwKDQc,8905
61
- gr_libs-0.1.4.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
62
- gr_libs-0.1.4.dist-info/top_level.txt,sha256=k7_l789QSJEr9JrtvsRMxNoTIDwNduq8mhIN-YoPJUM,29
63
- gr_libs-0.1.4.dist-info/RECORD,,
60
+ tests/test_graml.py,sha256=ZJB2jqtf4Q2-KZredkJq90teqmHBIvigCAQpvR5G110,559
61
+ tests/test_graql.py,sha256=-onMi13e2wStOmB5bYv2f3Ita3QFFiw416XMBkby0OI,141
62
+ tutorials/graml_minigrid_tutorial.py,sha256=ONvxFi79R7d8dcd6gy083Z_yy9A2flhGTDIDRxurdx8,1782
63
+ tutorials/graml_panda_tutorial.py,sha256=wtv_lsw0vsU7j45GKeWecTfE7jzfh4iVGEVnQyaWthM,2063
64
+ tutorials/graml_parking_tutorial.py,sha256=46-sfxmYA9jLRSpqIF9z69MLSfOSTJarfjlQ_Igq294,1769
65
+ tutorials/graml_point_maze_tutorial.py,sha256=mYq3IxYbf9jidq-4VdT3MdStV80Q5lytFv6Xzzn22Ys,1835
66
+ tutorials/graql_minigrid_tutorial.py,sha256=Jb0TCUhiZQkFeafJWUTPnCISd4FKfPrqP-xfHiqCGKE,1635
67
+ gr_libs-0.1.6.post1.dist-info/METADATA,sha256=UPwlwVlbGTpTsUhYwWH5hYr-hSBpcWjrFIA7sWg0Kj4,9620
68
+ gr_libs-0.1.6.post1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
69
+ gr_libs-0.1.6.post1.dist-info/top_level.txt,sha256=rL-bbK-KnLzVbLIUCdN1riH58lup3jG0NJ3LTt_qSwo,38
70
+ gr_libs-0.1.6.post1.dist-info/RECORD,,
@@ -1,3 +1,5 @@
1
+ CI
1
2
  evaluation
2
3
  gr_libs
4
+ tests
3
5
  tutorials
tests/test_graml.py ADDED
@@ -0,0 +1,16 @@
1
+ from tutorials.graml_minigrid_tutorial import run_graml_minigrid_tutorial
2
+ from tutorials.graml_panda_tutorial import run_graml_panda_tutorial
3
+ from tutorials.graml_parking_tutorial import run_graml_parking_tutorial
4
+ from tutorials.graml_point_maze_tutorial import run_graml_point_maze_tutorial
5
+
6
+ def test_graml_minigrid_tutorial():
7
+ run_graml_minigrid_tutorial()
8
+
9
+ def test_graml_panda_tutorial():
10
+ run_graml_panda_tutorial()
11
+
12
+ def test_graml_parking_tutorial():
13
+ run_graml_parking_tutorial()
14
+
15
+ def test_graml_point_maze_tutorial():
16
+ run_graml_point_maze_tutorial()
tests/test_graql.py ADDED
@@ -0,0 +1,4 @@
1
+ from tutorials.graql_minigrid_tutorial import run_graql_minigrid_tutorial
2
+
3
+ def test_graql_minigrid_tutorial():
4
+ run_graql_minigrid_tutorial()
@@ -1,30 +1,34 @@
1
- from gr_libs.environment.environment import QLEARNING
1
+ from gr_libs.environment.environment import MINIGRID, QLEARNING
2
2
  from gr_libs.metrics.metrics import stochastic_amplified_selection
3
3
  from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
4
4
  from gr_libs.ml.utils.format import random_subset_with_order
5
5
  from gr_libs import ExpertBasedGraml
6
6
 
7
- recognizer = ExpertBasedGraml(
8
- domain_name="minigrid",
9
- env_name="MiniGrid-SimpleCrossingS13N4"
10
- )
7
+ def run_graml_minigrid_tutorial():
8
+ recognizer = ExpertBasedGraml(
9
+ domain_name=MINIGRID,
10
+ env_name="MiniGrid-SimpleCrossingS13N4"
11
+ )
11
12
 
12
- recognizer.domain_learning_phase(base_goals=[(11,1), (11,11), (1,11), (7,11), (8,1), (10,6), (6,9), (11,3), (11,5)],
13
- train_configs=[(QLEARNING, 100000) for _ in range(9)])
13
+ recognizer.domain_learning_phase(base_goals=[(11,1), (11,11), (1,11), (7,11), (8,1), (10,6), (6,9), (11,3), (11,5)],
14
+ train_configs=[(QLEARNING, 100000) for _ in range(9)])
14
15
 
15
- recognizer.goals_adaptation_phase(
16
- dynamic_goals = [(11,1), (11,11), (1,11)],
17
- dynamic_train_configs=[(QLEARNING, 100000) for _ in range(3)] # for expert sequence generation.
18
- )
19
- # TD3 is different from recognizer and expert algorithms, which are SAC #
20
- actor = TabularQLearner(domain_name="minigrid", problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0", algorithm=QLEARNING, num_timesteps=100000)
21
- actor.learn()
22
- # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
23
- full_sequence = actor.generate_observation(
24
- action_selection_method=stochastic_amplified_selection,
25
- random_optimalism=True, # the noise that's added to the actions
26
- )
16
+ recognizer.goals_adaptation_phase(
17
+ dynamic_goals = [(11,1), (11,11), (1,11)],
18
+ dynamic_train_configs=[(QLEARNING, 100000) for _ in range(3)] # for expert sequence generation.
19
+ )
20
+ # TD3 is different from recognizer and expert algorithms, which are SAC #
21
+ actor = TabularQLearner(domain_name="minigrid", problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0", algorithm=QLEARNING, num_timesteps=100000)
22
+ actor.learn()
23
+ # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
24
+ full_sequence = actor.generate_observation(
25
+ action_selection_method=stochastic_amplified_selection,
26
+ random_optimalism=True, # the noise that's added to the actions
27
+ )
27
28
 
28
- partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
29
- closest_goal = recognizer.inference_phase(partial_sequence, (11,1), 0.5)
30
- print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (11, 1)")
29
+ partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
30
+ closest_goal = recognizer.inference_phase(partial_sequence, (11,1), 0.5)
31
+ print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (11, 1)")
32
+
33
+ if __name__ == "__main__":
34
+ run_graml_minigrid_tutorial()
@@ -1,32 +1,41 @@
1
1
 
2
2
  import numpy as np
3
3
  from stable_baselines3 import PPO, SAC
4
- from gr_libs.environment.environment import PANDA, GCEnvProperty, PandaProperty
4
+ import gr_libs.environment.environment
5
+ from gr_libs.environment.environment import PANDA, EnvProperty, GCEnvProperty, PandaProperty
5
6
  from gr_libs.environment.utils.utils import domain_to_env_property
6
7
  from gr_libs.metrics.metrics import stochastic_amplified_selection
7
8
  from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
8
9
  from gr_libs.ml.utils.format import random_subset_with_order
9
10
  from gr_libs import GCGraml
10
11
 
11
- recognizer = GCGraml( # TODO make these tutorials into pytests
12
- domain_name=PANDA,
13
- env_name="PandaMyReachDense"
14
- )
15
- recognizer.domain_learning_phase(base_goals=[np.array([PandaProperty.sample_goal()]) for _ in range(1,30)],
16
- train_configs=[(SAC, 800000)])
17
- recognizer.goals_adaptation_phase(dynamic_goals=[np.array([[-0.1, -0.1, 0.1]]), np.array([[-0.1, 0.1, 0.1]]), np.array([[0.2, 0.2, 0.1]])])
18
- # TD3 is different from recognizer and expert algorithms, which are SAC #
19
- property_type = domain_to_env_property(PANDA)
20
- env_property = property_type("PandaMyReachDense")
21
- problem_name = env_property.goal_to_problem_str(np.array([[-0.1, -0.1, 0.1]]))
22
- actor = DeepRLAgent(domain_name=PANDA, problem_name=problem_name, algorithm=PPO, num_timesteps=400000)
23
- actor.learn()
24
- # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
25
- full_sequence = actor.generate_observation(
26
- action_selection_method=stochastic_amplified_selection,
27
- random_optimalism=True, # the noise that's added to the actions
28
- )
12
+ def run_graml_panda_tutorial():
13
+ recognizer = GCGraml( # TODO make these tutorials into pytests
14
+ domain_name=PANDA,
15
+ env_name="PandaMyReachDense"
16
+ )
17
+ recognizer.domain_learning_phase(
18
+ base_goals=[np.array([PandaProperty.sample_goal()]) for _ in range(1,30)],
19
+ train_configs=[(SAC, 800000)]
20
+ )
21
+ recognizer.goals_adaptation_phase(
22
+ dynamic_goals=[np.array([[-0.1, -0.1, 0.1]]), np.array([[-0.1, 0.1, 0.1]]), np.array([[0.2, 0.2, 0.1]])]
23
+ )
24
+ # TD3 is different from recognizer and expert algorithms, which are SAC #
25
+ property_type = domain_to_env_property(PANDA)
26
+ env_property = property_type("PandaMyReachDense")
27
+ problem_name = env_property.goal_to_problem_str(np.array([[-0.1, -0.1, 0.1]]))
28
+ actor = DeepRLAgent(domain_name=PANDA, problem_name=problem_name, algorithm=PPO, num_timesteps=400000)
29
+ actor.learn()
30
+ # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
31
+ full_sequence = actor.generate_observation(
32
+ action_selection_method=stochastic_amplified_selection,
33
+ random_optimalism=True, # the noise that's added to the actions
34
+ )
29
35
 
30
- partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
31
- closest_goal = recognizer.inference_phase(partial_sequence, np.array([[-0.1, -0.1, 0.1]]), 0.5)
32
- print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: [-0.1, -0.1, 0.1]")
36
+ partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
37
+ closest_goal = recognizer.inference_phase(partial_sequence, np.array([[-0.1, -0.1, 0.1]]), 0.5)
38
+ print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: [-0.1, -0.1, 0.1]")
39
+
40
+ if __name__ == "__main__":
41
+ run_graml_panda_tutorial()
@@ -1,38 +1,38 @@
1
1
 
2
2
  from stable_baselines3 import PPO, SAC, TD3
3
- from gr_libs.environment.environment import EnvProperty, GCEnvProperty, ParkingProperty
3
+ from gr_libs.environment.environment import PARKING, EnvProperty, GCEnvProperty, ParkingProperty
4
4
  from gr_libs.metrics.metrics import stochastic_amplified_selection
5
5
  from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
6
6
  from gr_libs.ml.utils.format import random_subset_with_order
7
7
  from gr_libs.recognizer.graml.graml_recognizer import ExpertBasedGraml, GCGraml
8
8
 
9
- # Consider extracting all these to "default point_maze (or every other domain) variables" module which would simplify things like the problem_list_to_str_tuple function, sizes of inputs, etc.
10
- recognizer = GCGraml(
11
- env_name="parking", # TODO change to macros which are importable from some info or env module of enums.
12
- problems=[ParkingProperty("parking-v0")],
13
- train_configs=[(PPO, 400000)],
14
- gc_goal_set=[f"Parking-S-14-PC--GI-{i}-v0" for i in range(1,21)]
15
- )
16
- recognizer.domain_learning_phase()
17
- recognizer.goals_adaptation_phase(
18
- dynamic_goals_problems = [ParkingProperty(p) for p in ["Parking-S-14-PC--GI-1-v0",
19
- "Parking-S-14-PC--GI-4-v0",
20
- "Parking-S-14-PC--GI-8-v0",
21
- "Parking-S-14-PC--GI-11-v0",
22
- "Parking-S-14-PC--GI-14-v0",
23
- "Parking-S-14-PC--GI-18-v0",
24
- "Parking-S-14-PC--GI-21-v0"]] # TODO detach the goal from the environment instance in every gym env, add the ability to alter it from outside.
25
- #dynamic_train_configs=[(SAC, 400000) for _ in range(7)] # for expert sequence generation. TODO change to require this only if sequence generation method is EXPERT.
26
- )
27
- # TD3 is different from recognizer and expert algorithms, which are SAC #
28
- actor = DeepRLAgent(env_name="parking", problem_name="Parking-S-14-PC--GI-8-v0", algorithm=TD3, num_timesteps=400000)
29
- actor.learn()
30
- # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
31
- full_sequence = actor.generate_observation(
32
- action_selection_method=stochastic_amplified_selection,
33
- random_optimalism=True, # the noise that's added to the actions
34
- )
9
+ def run_graml_parking_tutorial():
10
+ recognizer = GCGraml(
11
+ domain_name=PARKING,
12
+ env_name="Parking-S-14-PC-"
13
+ )
14
+
15
+ recognizer.domain_learning_phase(
16
+ [i for i in range(1,21)],
17
+ [(PPO, 200000)]
18
+ )
19
+ recognizer.goals_adaptation_phase(
20
+ dynamic_goals = ["1", "11", "21"]
21
+ # no need for expert sequence generation since GCRL is used
22
+ )
35
23
 
36
- partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
37
- closest_goal = recognizer.inference_phase(partial_sequence, ParkingProperty("Parking-S-14-PC--GI-8-v0").str_to_goal(), 0.5)
38
- print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: 8")
24
+ # TD3 is different from recognizer and expert algorithms, which are SAC #
25
+ actor = DeepRLAgent(domain_name="parking", problem_name="Parking-S-14-PC--GI-11-v0", algorithm=TD3, num_timesteps=400000)
26
+ actor.learn()
27
+ # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
28
+ full_sequence = actor.generate_observation(
29
+ action_selection_method=stochastic_amplified_selection,
30
+ random_optimalism=True, # the noise that's added to the actions
31
+ )
32
+
33
+ partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
34
+ closest_goal = recognizer.inference_phase(partial_sequence, ParkingProperty("Parking-S-14-PC--GI-11-v0").str_to_goal(), 0.5)
35
+ print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: 11")
36
+
37
+ if __name__ == "__main__":
38
+ run_graml_parking_tutorial()
@@ -1,43 +1,39 @@
1
1
 
2
2
  from stable_baselines3 import SAC, TD3
3
- from gr_libs.environment.utils.format import maze_str_to_goal
3
+ from gr_libs.environment.environment import POINT_MAZE, PointMazeProperty
4
4
  from gr_libs.metrics.metrics import stochastic_amplified_selection
5
5
  from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
6
6
  from gr_libs.ml.utils.format import random_subset_with_order
7
7
  from gr_libs.recognizer.graml.graml_recognizer import ExpertBasedGraml
8
8
 
9
- # Consider extracting all these to "default point_maze (or every other domain) variables" module which would simplify things like the problem_list_to_str_tuple function, sizes of inputs, etc.
10
- recognizer = ExpertBasedGraml(
11
- env_name="point_maze", # TODO change to macros which are importable from some info or env module of enums.
12
- problems=[("PointMaze-FourRoomsEnvDense-11x11-Goal-9x1"),
13
- ("PointMaze-FourRoomsEnv-11x11-Goal-9x9"), # this one doesn't work with dense rewards because of encountering local minima
14
- ("PointMaze-FourRoomsEnvDense-11x11-Goal-1x9"),
15
- ("PointMaze-FourRoomsEnvDense-11x11-Goal-3x3"),
16
- ("PointMaze-FourRoomsEnvDense-11x11-Goal-3x4"),
17
- ("PointMaze-FourRoomsEnvDense-11x11-Goal-8x2"),
18
- ("PointMaze-FourRoomsEnvDense-11x11-Goal-3x7"),
19
- ("PointMaze-FourRoomsEnvDense-11x11-Goal-2x8")],
20
- task_str_to_goal=maze_str_to_goal,
21
- method=DeepRLAgent,
22
- collect_statistics=False,
23
- train_configs=[(SAC, 200000) for _ in range(8)],
24
- )
25
- recognizer.domain_learning_phase()
26
- recognizer.goals_adaptation_phase(
27
- dynamic_goals_problems = ["PointMaze-FourRoomsEnvDense-11x11-Goal-4x4",
28
- "PointMaze-FourRoomsEnvDense-11x11-Goal-7x3",
29
- "PointMaze-FourRoomsEnvDense-11x11-Goal-3x7"],
30
- dynamic_train_configs=[(SAC, 200000) for _ in range(3)] # for expert sequence generation. TODO change to require this only if sequence generation method is EXPERT.
31
- )
32
- # TD3 is different from recognizer and expert algorithms, which are SAC #
33
- actor = DeepRLAgent(env_name="point_maze", problem_name="PointMaze-FourRoomsEnvDense-11x11-Goal-4x4", algorithm=TD3, num_timesteps=200000)
34
- actor.learn()
35
- # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
36
- full_sequence = actor.generate_observation(
37
- action_selection_method=stochastic_amplified_selection,
38
- random_optimalism=True, # the noise that's added to the actions
39
- )
9
+ def run_graml_point_maze_tutorial():
10
+ recognizer = ExpertBasedGraml(
11
+ domain_name=POINT_MAZE,
12
+ env_name="PointMaze-FourRoomsEnvDense-11x11"
13
+ )
40
14
 
41
- partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)))
42
- closest_goal = recognizer.inference_phase(partial_sequence, maze_str_to_goal("PointMaze-FourRoomsEnvDense-11x11-Goal-4x4"), 0.5)
43
- print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (4, 4)")
15
+ recognizer.domain_learning_phase(
16
+ [(9,1), (9,9), (1,9), (3,3), (3,4), (8,2), (3,7), (2,8)],
17
+ [(SAC, 200000) for _ in range(8)]
18
+ )
19
+
20
+ recognizer.goals_adaptation_phase(
21
+ dynamic_goals = [(4,4), (7,3), (3,7)],
22
+ dynamic_train_configs=[(SAC, 200000) for _ in range(3)] # for expert sequence generation.
23
+ )
24
+
25
+ # TD3 is different from recognizer and expert algorithms, which are SAC #
26
+ actor = DeepRLAgent(domain_name="point_maze", problem_name="PointMaze-FourRoomsEnvDense-11x11-Goal-4x4", algorithm=TD3, num_timesteps=200000)
27
+ actor.learn()
28
+ # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
29
+ full_sequence = actor.generate_observation(
30
+ action_selection_method=stochastic_amplified_selection,
31
+ random_optimalism=True, # the noise that's added to the actions
32
+ )
33
+
34
+ partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)))
35
+ closest_goal = recognizer.inference_phase(partial_sequence, PointMazeProperty("PointMaze-FourRoomsEnvDense-11x11-Goal-4x4").str_to_goal(), 0.5)
36
+ print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (4, 4)")
37
+
38
+ if __name__ == "__main__":
39
+ run_graml_point_maze_tutorial()
@@ -4,26 +4,31 @@ from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
4
4
  from gr_libs.ml.utils.format import random_subset_with_order
5
5
  from gr_libs import Graql
6
6
 
7
- recognizer = Graql(
8
- domain_name="minigrid",
9
- env_name="MiniGrid-SimpleCrossingS13N4"
10
- )
7
+ def run_graql_minigrid_tutorial():
8
+ recognizer = Graql(
9
+ domain_name="minigrid",
10
+ env_name="MiniGrid-SimpleCrossingS13N4"
11
+ )
11
12
 
12
- #Graql doesn't have a domain learning phase, so we skip it
13
+ #Graql doesn't have a domain learning phase, so we skip it
13
14
 
14
- recognizer.goals_adaptation_phase(
15
- dynamic_goals = [(11,1), (11,11), (1,11)],
16
- dynamic_train_configs=[(QLEARNING, 100000) for _ in range(3)] # for expert sequence generation.
17
- )
18
- # TD3 is different from recognizer and expert algorithms, which are SAC #
19
- actor = TabularQLearner(domain_name="minigrid", problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0", algorithm=QLEARNING, num_timesteps=100000)
20
- actor.learn()
21
- # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
22
- full_sequence = actor.generate_observation(
23
- action_selection_method=stochastic_amplified_selection,
24
- random_optimalism=True, # the noise that's added to the actions
25
- )
15
+ recognizer.goals_adaptation_phase(
16
+ dynamic_goals = [(11,1), (11,11), (1,11)],
17
+ dynamic_train_configs=[(QLEARNING, 100000) for _ in range(3)] # for expert sequence generation.
18
+ )
19
+ # TD3 is different from recognizer and expert algorithms, which are SAC #
20
+ actor = TabularQLearner(domain_name="minigrid", problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0", algorithm=QLEARNING, num_timesteps=100000)
21
+ actor.learn()
22
+ # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
23
+ full_sequence = actor.generate_observation(
24
+ action_selection_method=stochastic_amplified_selection,
25
+ random_optimalism=True, # the noise that's added to the actions
26
+ )
26
27
 
27
- partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
28
- closest_goal = recognizer.inference_phase(partial_sequence, (11,1), 0.5)
29
- print(f"closest_goal returned by Graql: {closest_goal}\nactual goal actor aimed towards: (11, 1)")
28
+ partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
29
+ closest_goal = recognizer.inference_phase(partial_sequence, (11,1), 0.5)
30
+ print(f"closest_goal returned by Graql: {closest_goal}\nactual goal actor aimed towards: (11, 1)")
31
+ return closest_goal, (11,1)
32
+
33
+ if __name__ == "__main__":
34
+ run_graql_minigrid_tutorial()