gr-libs 0.1.5__py3-none-any.whl → 0.1.7.post0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -82,14 +82,14 @@ class Graml(LearningRecognizer):
82
82
  dev_loader=DataLoader(dev_dataset, batch_size=self.env_prop.get_lstm_props().batch_size, shuffle=False, collate_fn=self.collate_func))
83
83
  save_weights(model=self.model, path=self.model_file_path)
84
84
 
85
- def goals_adaptation_phase(self, dynamic_goals: List[EnvProperty]):
85
+ def goals_adaptation_phase(self, dynamic_goals: List[EnvProperty], save_fig=False):
86
86
  self.is_first_inf_since_new_goals = True
87
87
  self.current_goals = dynamic_goals
88
88
  # start by training each rl agent on the base goal set
89
89
  self.embeddings_dict = {} # relevant if the embedding of the plan occurs during the goals adaptation phase
90
90
  self.plans_dict = {} # relevant if the embedding of the plan occurs during the inference phase
91
91
  for goal in self.current_goals:
92
- obss = self.generate_sequences_library(goal)
92
+ obss = self.generate_sequences_library(goal, save_fig=save_fig)
93
93
  self.plans_dict[str(goal)] = obss
94
94
 
95
95
  def get_goal_plan(self, goal):
@@ -150,7 +150,7 @@ class Graml(LearningRecognizer):
150
150
  return closest_goal
151
151
 
152
152
  @abstractmethod
153
- def generate_sequences_library(self, goal: str) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
153
+ def generate_sequences_library(self, goal: str, save_fig=False) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
154
154
  pass
155
155
 
156
156
  # this function duplicates every sequence and creates a consecutive and non-consecutive version of it
@@ -192,10 +192,10 @@ class MCTSBasedGraml(BGGraml, GaAdaptingRecognizer):
192
192
  super().__init__(*args, **kwargs)
193
193
  if self.rl_agent_type==None: self.rl_agent_type = TabularQLearner
194
194
 
195
- def generate_sequences_library(self, goal: str) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
195
+ def generate_sequences_library(self, goal: str, save_fig=False) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
196
196
  problem_name = self.env_prop.goal_to_problem_str(goal)
197
197
  img_path = os.path.join(get_policy_sequences_result_path(self.env_prop.domain_name, recognizer=self.__class__.__name__), problem_name + "_MCTS")
198
- return mcts_model.plan(self.env_prop.name, problem_name, goal, save_fig=True, img_path=img_path, env_prop=self.env_prop)
198
+ return mcts_model.plan(self.env_prop.name, problem_name, goal, save_fig=save_fig, img_path=img_path, env_prop=self.env_prop)
199
199
 
200
200
  class ExpertBasedGraml(BGGraml, GaAgentTrainerRecognizer):
201
201
  def __init__(self, *args, **kwargs):
@@ -206,15 +206,23 @@ class ExpertBasedGraml(BGGraml, GaAgentTrainerRecognizer):
206
206
  else:
207
207
  self.rl_agent_type = DeepRLAgent
208
208
 
209
- def generate_sequences_library(self, goal: str) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
209
+ def generate_sequences_library(self, goal: str, save_fig=False) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
210
210
  problem_name = self.env_prop.goal_to_problem_str(goal)
211
211
  kwargs = {"domain_name":self.domain_name, "problem_name":problem_name}
212
212
  if self.dynamic_train_configs_dict[problem_name][0] != None: kwargs["algorithm"] = self.dynamic_train_configs_dict[problem_name][0]
213
213
  if self.dynamic_train_configs_dict[problem_name][1] != None: kwargs["num_timesteps"] = self.dynamic_train_configs_dict[problem_name][1]
214
214
  agent = self.rl_agent_type(**kwargs)
215
215
  agent.learn()
216
- fig_path = get_and_create(f"{os.path.abspath(os.path.join(get_policy_sequences_result_path(domain_name=self.env_prop.domain_name, env_name=self.env_prop.name, recognizer=self.__class__.__name__), problem_name))}_bg_sequence")
217
- return [agent.generate_observation(action_selection_method=metrics.greedy_selection, random_optimalism=False, save_fig=True, fig_path=fig_path, env_prop=self.env_prop)]
216
+ agent_kwargs = {
217
+ "action_selection_method": metrics.greedy_selection,
218
+ "random_optimalism": False,
219
+ "save_fig": save_fig,
220
+ "env_prop": self.env_prop
221
+ }
222
+ if save_fig:
223
+ fig_path = get_and_create(f"{os.path.abspath(os.path.join(get_policy_sequences_result_path(domain_name=self.env_prop.domain_name, env_name=self.env_prop.name, recognizer=self.__class__.__name__), problem_name))}_bg_sequence")
224
+ agent_kwargs["fig_path"] = fig_path
225
+ return [agent.generate_observation(**agent_kwargs)]
218
226
 
219
227
  def goals_adaptation_phase(self, dynamic_goals: List[str], dynamic_train_configs):
220
228
  self.dynamic_goals_problems = [self.env_prop.goal_to_problem_str(g) for g in dynamic_goals]
@@ -244,20 +252,21 @@ class GCGraml(Graml, GaAdaptingRecognizer):
244
252
  gc_agent.learn()
245
253
  self.agents.append(ContextualAgent(problem_name=self.env_prop.name, problem_goal="general", agent=gc_agent))
246
254
 
247
- def generate_sequences_library(self, goal: str) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
255
+ def generate_sequences_library(self, goal: str, save_fig=False) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
248
256
  problem_name = self.env_prop.goal_to_problem_str(goal)
249
257
  kwargs = {"domain_name":self.domain_name, "problem_name":self.env_prop.name} # problem name is env name in gc case
250
258
  if self.original_train_configs[0][0] != None: kwargs["algorithm"] = self.original_train_configs[0][0]
251
259
  if self.original_train_configs[0][1] != None: kwargs["num_timesteps"] = self.original_train_configs[0][1]
252
260
  agent = self.rl_agent_type(**kwargs)
253
261
  agent.learn()
254
- fig_path = get_and_create(f"{os.path.abspath(os.path.join(get_policy_sequences_result_path(domain_name=self.env_prop.domain_name, env_name=self.env_prop.name, recognizer=self.__class__.__name__), problem_name))}_gc_sequence")
255
262
  agent_kwargs = {
256
263
  "action_selection_method": metrics.stochastic_amplified_selection,
257
264
  "random_optimalism": True,
258
- "save_fig": True,
259
- "fig_path": fig_path
265
+ "save_fig": save_fig
260
266
  }
267
+ if save_fig:
268
+ fig_path = get_and_create(f"{os.path.abspath(os.path.join(get_policy_sequences_result_path(domain_name=self.env_prop.domain_name, env_name=self.env_prop.name, recognizer=self.__class__.__name__), problem_name))}_gc_sequence")
269
+ agent_kwargs["fig_path"] = fig_path
261
270
  if self.env_prop.use_goal_directed_problem(): agent_kwargs["goal_directed_problem"] = problem_name
262
271
  else: agent_kwargs["goal_directed_goal"] = goal
263
272
  obss = []
@@ -1,6 +1,5 @@
1
1
  from abc import ABC, abstractmethod
2
2
  from typing import List, Type
3
-
4
3
  from gr_libs.environment.environment import EnvProperty, SUPPORTED_DOMAINS
5
4
  from gr_libs.environment.utils.utils import domain_to_env_property
6
5
  from gr_libs.ml.base.rl_agent import RLAgent
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gr_libs
3
- Version: 0.1.5
3
+ Version: 0.1.7.post0
4
4
  Summary: Package with goal recognition frameworks baselines
5
5
  Author: Ben Nageris
6
6
  Author-email: Matan Shamir <matan.shamir@live.biu.ac.il>, Osher Elhadad <osher.elhadad@live.biu.ac.il>
@@ -17,6 +17,7 @@ Requires-Dist: torchvision
17
17
  Requires-Dist: rl_zoo3
18
18
  Requires-Dist: stable_baselines3[extra]
19
19
  Requires-Dist: sb3_contrib
20
+ Requires-Dist: pytest
20
21
  Provides-Extra: minigrid
21
22
  Requires-Dist: gr_envs[minigrid]; extra == "minigrid"
22
23
  Provides-Extra: highway
@@ -111,6 +112,25 @@ After installing GRLib, you will have access to custom Gym environments, allowin
111
112
 
112
113
  Tutorials demonstrating basic ODGR scenarios is available in the sub-package `tutorials`. These tutorials walk through the initialization and deployment process, showcasing how different GR algorithms adapt to emerging goals in various Gym environments.
113
114
 
115
+ ## Working with an initial dataset of trained agents
116
+ gr_libs also includes a library of trained agents for the various supported environments within the package.
117
+ To get the dataset of trained agents, you can run:
118
+ ```sh
119
+ python download_dataset.py
120
+ ```
121
+
122
+ An alternative is to use our docker image, which includes the dataset in it.
123
+ You can:
124
+ 1. pull the image:
125
+ ```sh
126
+ docker pull ghcr.io/MatanShamir1/gr_test_base:latest
127
+ ```
128
+ 2. run a container:
129
+ ```sh
130
+ docker run -it ghcr.io/MatanShamir1/gr_test_base:latest bash
131
+ ```
132
+ 3. don't forget to install the package from within the container, go back to 'Setup' for that.
133
+
114
134
  ### Method 1: Writing a Custom Script
115
135
 
116
136
  1. **Create a recognizer**
@@ -118,6 +138,7 @@ Tutorials demonstrating basic ODGR scenarios is available in the sub-package `tu
118
138
  Specify the domain name and specific environment for the recognizer, effectively telling it the domain theory - the collection of states and actions in the environment.
119
139
 
120
140
  ```python
141
+ import gr_libs.environment # Triggers gym env registration - you must run it!
121
142
  recognizer = Graql(
122
143
  domain_name="minigrid",
123
144
  env_name="MiniGrid-SimpleCrossingS13N4"
@@ -7,13 +7,14 @@ evaluation/generate_experiments_results_new_ver2.py,sha256=jeKj_wgdM50o2vi8WZI-s
7
7
  evaluation/generate_task_specific_statistics_plots.py,sha256=rBsqaMe2irP_Cfo-icwIg4_dsleFjEH6eiQCcUBj6WU,15286
8
8
  evaluation/get_plans_images.py,sha256=BT-bGWuOPUAYpZVDwk7YMRBLdgKaDbNOBjMrtcl1Vjk,2346
9
9
  evaluation/increasing_and_decreasing_.py,sha256=fu1hkEjhOQC3jEsjiS7emW_UPRpVFCaae0d0E2MGZqI,2991
10
- gr_libs/__init__.py,sha256=-uKsQiHIL7yojbDwlTR-I8sj1WX9XT52PoFbPjtUTKo,145
11
- gr_libs/environment/__init__.py,sha256=oxEKmdvzQLKbbMxedqEf3bGsSJvp2XL9Bxr5JEO6a5o,1038
12
- gr_libs/environment/environment.py,sha256=Ca9m6W8KEt4le0HFSAUvSHW5lSHg_wwJaBqFSq1KlUg,6781
10
+ gr_libs/__init__.py,sha256=WlSRpZIpz5GxLNk96nhympbk3Z5nsMiSOyiAWj17S88,280
11
+ gr_libs/_version.py,sha256=Zy3HQFB_Viry2Rl81-7LPU4kL2FTQegnwLvl0VxTs3E,526
12
+ gr_libs/environment/__init__.py,sha256=KlRp3qdgxEmej31zDoDsYPwbcAqyDglx6x0mH0KRmHU,1032
13
+ gr_libs/environment/environment.py,sha256=d6ZbiAQ4H1aLrUFI8sm0BN9DVW3JtzpkodSi_70Z_PY,6780
13
14
  gr_libs/environment/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
15
  gr_libs/environment/utils/utils.py,sha256=4yM3s30KjyuEmWR8UuICE5rR03zsLi3tzqNDvBkdPcU,537
15
16
  gr_libs/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
- gr_libs/metrics/metrics.py,sha256=IYPL90tuxbTRaNLaFvgzd5SMUwS8gsSANuWZcSVuKkM,8737
17
+ gr_libs/metrics/metrics.py,sha256=4bnvs5suv-QrK9i1NuOzkE_E8uIzS1nlEazNDRXvZGs,8700
17
18
  gr_libs/ml/__init__.py,sha256=jrjxYqvSRgWwFWw7XQP9DzOwvmprMZ2umwT7t-DYtDU,233
18
19
  gr_libs/ml/agent.py,sha256=DSnK8nRx9SS76fAOZZEEvA68_meLjzm9lfQpMUXmGQU,1957
19
20
  gr_libs/ml/consts.py,sha256=mrbZk8n6QoGzLGaKmaxq4QlAsBbk4fhkCgXLuO9jXKw,365
@@ -34,30 +35,33 @@ gr_libs/ml/sequential/__init__.py,sha256=rusN4ahTvAeAq1Saz6qS_9HEU7WuXDJu2zwhc9W
34
35
  gr_libs/ml/sequential/lstm_model.py,sha256=Vzm-C1URR84PGNEecj69GUtn3ZmOVyh1BAY6CUnfL1Q,8978
35
36
  gr_libs/ml/tabular/__init__.py,sha256=jAfjfTFZLLlVm1KUiJdxdnaNGFp1J2KBU89q_vvradM,177
36
37
  gr_libs/ml/tabular/state.py,sha256=8xroKF3y3nRX0LK1QX5fRT2PS2WmvcDPp0UvPFdSx2A,733
37
- gr_libs/ml/tabular/tabular_q_learner.py,sha256=q6Dz4RTX0GjBumUiS2mUFKvEiKUBecj0q1RpWvPvmmE,18972
38
+ gr_libs/ml/tabular/tabular_q_learner.py,sha256=5QU9ZWC-Cu5jxv5K1TohoRjQrRDhCgTs1Mt18cqM_Sc,18970
38
39
  gr_libs/ml/tabular/tabular_rl_agent.py,sha256=7w8PYbKi8QgxHJyECWU_rURtT89spg0tHIMI1cDwYc8,3764
39
40
  gr_libs/ml/utils/__init__.py,sha256=qH3pcnem5Z6rkQ4RTZi47AXJRe1RkFEST_-DrBmfWcM,258
40
41
  gr_libs/ml/utils/env.py,sha256=AWVN0OXYmFU-J3FUiwvEAIY93Suf1oL6VNcxtyWJraM,171
41
42
  gr_libs/ml/utils/format.py,sha256=nu7RzVwn_raG_fqqmnqlJgUjtA0yzKztkB3a5QZnRYo,3071
42
43
  gr_libs/ml/utils/math.py,sha256=n62zssVOLHnUb4dPofAoFhoLOKl5n_xBzaKQOUQBoNc,440
43
44
  gr_libs/ml/utils/other.py,sha256=HKUfeLBbd4DgJxSTs3ya9KQ85Acx4TjycRrtGD9WQ3s,505
44
- gr_libs/ml/utils/storage.py,sha256=oCdvL_ypCglnSJsyyXzNyV_UJASTfioa3yJhFlFso64,4277
45
+ gr_libs/ml/utils/storage.py,sha256=52wR2pgFmcCOhqbu5Km3tegjAmtI1Fb4HYAVUnUubZk,4626
46
+ gr_libs/problems/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
+ gr_libs/problems/consts.py,sha256=ON7yfKTAKETg7i3okDYuOzEU7KWvynyubl0m7TlU6Hs,38808
45
48
  gr_libs/recognizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
- gr_libs/recognizer/recognizer.py,sha256=2lMlJNE7X13Y3FMpNfql-WAYw23NImIiomnVmCI75bM,1706
47
- gr_libs/recognizer/recognizer_doc.md,sha256=RnTvbZhl2opvU7-QT4pULCV5HCdJTw2dsu8WQOOiR3E,2521
49
+ gr_libs/recognizer/recognizer.py,sha256=ZrApJVdBQxKRYhhDiWLCNGmlxgi674nwgb30BgVggC8,1705
48
50
  gr_libs/recognizer/gr_as_rl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
51
  gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py,sha256=84GdfohC2dZoNH_QEo7GpSt8nZWdfqSRKCTY99X_iME,5215
50
52
  gr_libs/recognizer/graml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
53
  gr_libs/recognizer/graml/gr_dataset.py,sha256=lG6m3ulxFELpH1oURnlcmNDWOrxyuzvlAR28ZTqB7L8,7224
52
- gr_libs/recognizer/graml/graml_recognizer.py,sha256=SGs7rtkA73lbCv9HISa6dfjVUJUhlH54QriVsoGVRss,15672
54
+ gr_libs/recognizer/graml/graml_recognizer.py,sha256=1xLl1gHj3JxWhHtV9h3SvsW7oJdxsQQV0F-VLtlTmKQ,15911
53
55
  gr_libs/recognizer/utils/__init__.py,sha256=ewSroxL7aATvvm-Xzc1_-61mP2LU2U28YaOEqvVVDB0,41
54
56
  gr_libs/recognizer/utils/format.py,sha256=e0AnqtPeYoJsV9Z7cEBpgbzTM0hLNxFIjn07fQ3YbQw,492
55
- tutorials/graml_minigrid_tutorial.py,sha256=0jSlsKd0H3DXA7rPSnw09y56pTSXvtXCFOKSuUvfDjs,1597
56
- tutorials/graml_panda_tutorial.py,sha256=LwNQPb7Kdg7X8jY7Zk13-8uBfDP8LMNzwgH-u3KOcjw,1861
57
- tutorials/graml_parking_tutorial.py,sha256=fsLbASIESUGnZe09eEhMcqxpU0NP8k1IQyGgJq_AFVs,1549
58
- tutorials/graml_point_maze_tutorial.py,sha256=gY8GCHnq32xyY7gSw3i3DL98TlfwgMkhO17csyV2QBA,1631
59
- tutorials/graql_minigrid_tutorial.py,sha256=VoXbEgL_hjQLfau6WohXxPK8rrv1VLA874F8PZ7ZtPk,1421
60
- gr_libs-0.1.5.dist-info/METADATA,sha256=h4QUMjuxouD3o1iKg2F3doJbKLbezkV1FaGKm1oBL0o,8905
61
- gr_libs-0.1.5.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
62
- gr_libs-0.1.5.dist-info/top_level.txt,sha256=k7_l789QSJEr9JrtvsRMxNoTIDwNduq8mhIN-YoPJUM,29
63
- gr_libs-0.1.5.dist-info/RECORD,,
57
+ tests/test_graml.py,sha256=ZJB2jqtf4Q2-KZredkJq90teqmHBIvigCAQpvR5G110,559
58
+ tests/test_graql.py,sha256=-onMi13e2wStOmB5bYv2f3Ita3QFFiw416XMBkby0OI,141
59
+ tutorials/graml_minigrid_tutorial.py,sha256=ONvxFi79R7d8dcd6gy083Z_yy9A2flhGTDIDRxurdx8,1782
60
+ tutorials/graml_panda_tutorial.py,sha256=wtv_lsw0vsU7j45GKeWecTfE7jzfh4iVGEVnQyaWthM,2063
61
+ tutorials/graml_parking_tutorial.py,sha256=M6bt1WQOOgn8_CRyG2kjxF14PMeyXVAWRDq1ZRwGTXo,1808
62
+ tutorials/graml_point_maze_tutorial.py,sha256=mYq3IxYbf9jidq-4VdT3MdStV80Q5lytFv6Xzzn22Ys,1835
63
+ tutorials/graql_minigrid_tutorial.py,sha256=Jb0TCUhiZQkFeafJWUTPnCISd4FKfPrqP-xfHiqCGKE,1635
64
+ gr_libs-0.1.7.post0.dist-info/METADATA,sha256=aS7y9Nl1JErXYdpAHstuQP_W1QMcKMbGet6IfxfJ_Do,9620
65
+ gr_libs-0.1.7.post0.dist-info/WHEEL,sha256=wXxTzcEDnjrTwFYjLPcsW_7_XihufBwmpiBeiXNBGEA,91
66
+ gr_libs-0.1.7.post0.dist-info/top_level.txt,sha256=fJQF8Q8Dfh_D3pA2mhNodazNjzW6b3oWfnx6Jdo-pBU,35
67
+ gr_libs-0.1.7.post0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (78.1.0)
2
+ Generator: setuptools (80.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,3 +1,4 @@
1
1
  evaluation
2
2
  gr_libs
3
+ tests
3
4
  tutorials
tests/test_graml.py ADDED
@@ -0,0 +1,16 @@
1
+ from tutorials.graml_minigrid_tutorial import run_graml_minigrid_tutorial
2
+ from tutorials.graml_panda_tutorial import run_graml_panda_tutorial
3
+ from tutorials.graml_parking_tutorial import run_graml_parking_tutorial
4
+ from tutorials.graml_point_maze_tutorial import run_graml_point_maze_tutorial
5
+
6
+ def test_graml_minigrid_tutorial():
7
+ run_graml_minigrid_tutorial()
8
+
9
+ def test_graml_panda_tutorial():
10
+ run_graml_panda_tutorial()
11
+
12
+ def test_graml_parking_tutorial():
13
+ run_graml_parking_tutorial()
14
+
15
+ def test_graml_point_maze_tutorial():
16
+ run_graml_point_maze_tutorial()
tests/test_graql.py ADDED
@@ -0,0 +1,4 @@
1
+ from tutorials.graql_minigrid_tutorial import run_graql_minigrid_tutorial
2
+
3
+ def test_graql_minigrid_tutorial():
4
+ run_graql_minigrid_tutorial()
@@ -4,27 +4,31 @@ from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
4
4
  from gr_libs.ml.utils.format import random_subset_with_order
5
5
  from gr_libs import ExpertBasedGraml
6
6
 
7
- recognizer = ExpertBasedGraml(
8
- domain_name=MINIGRID,
9
- env_name="MiniGrid-SimpleCrossingS13N4"
10
- )
7
+ def run_graml_minigrid_tutorial():
8
+ recognizer = ExpertBasedGraml(
9
+ domain_name=MINIGRID,
10
+ env_name="MiniGrid-SimpleCrossingS13N4"
11
+ )
11
12
 
12
- recognizer.domain_learning_phase(base_goals=[(11,1), (11,11), (1,11), (7,11), (8,1), (10,6), (6,9), (11,3), (11,5)],
13
- train_configs=[(QLEARNING, 100000) for _ in range(9)])
13
+ recognizer.domain_learning_phase(base_goals=[(11,1), (11,11), (1,11), (7,11), (8,1), (10,6), (6,9), (11,3), (11,5)],
14
+ train_configs=[(QLEARNING, 100000) for _ in range(9)])
14
15
 
15
- recognizer.goals_adaptation_phase(
16
- dynamic_goals = [(11,1), (11,11), (1,11)],
17
- dynamic_train_configs=[(QLEARNING, 100000) for _ in range(3)] # for expert sequence generation.
18
- )
19
- # TD3 is different from recognizer and expert algorithms, which are SAC #
20
- actor = TabularQLearner(domain_name="minigrid", problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0", algorithm=QLEARNING, num_timesteps=100000)
21
- actor.learn()
22
- # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
23
- full_sequence = actor.generate_observation(
24
- action_selection_method=stochastic_amplified_selection,
25
- random_optimalism=True, # the noise that's added to the actions
26
- )
16
+ recognizer.goals_adaptation_phase(
17
+ dynamic_goals = [(11,1), (11,11), (1,11)],
18
+ dynamic_train_configs=[(QLEARNING, 100000) for _ in range(3)] # for expert sequence generation.
19
+ )
20
+ # TD3 is different from recognizer and expert algorithms, which are SAC #
21
+ actor = TabularQLearner(domain_name="minigrid", problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0", algorithm=QLEARNING, num_timesteps=100000)
22
+ actor.learn()
23
+ # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
24
+ full_sequence = actor.generate_observation(
25
+ action_selection_method=stochastic_amplified_selection,
26
+ random_optimalism=True, # the noise that's added to the actions
27
+ )
27
28
 
28
- partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
29
- closest_goal = recognizer.inference_phase(partial_sequence, (11,1), 0.5)
30
- print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (11, 1)")
29
+ partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
30
+ closest_goal = recognizer.inference_phase(partial_sequence, (11,1), 0.5)
31
+ print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (11, 1)")
32
+
33
+ if __name__ == "__main__":
34
+ run_graml_minigrid_tutorial()
@@ -9,29 +9,33 @@ from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
9
9
  from gr_libs.ml.utils.format import random_subset_with_order
10
10
  from gr_libs import GCGraml
11
11
 
12
- recognizer = GCGraml( # TODO make these tutorials into pytests
13
- domain_name=PANDA,
14
- env_name="PandaMyReachDense"
15
- )
16
- recognizer.domain_learning_phase(
17
- base_goals=[np.array([PandaProperty.sample_goal()]) for _ in range(1,30)],
18
- train_configs=[(SAC, 800000)]
19
- )
20
- recognizer.goals_adaptation_phase(
21
- dynamic_goals=[np.array([[-0.1, -0.1, 0.1]]), np.array([[-0.1, 0.1, 0.1]]), np.array([[0.2, 0.2, 0.1]])]
22
- )
23
- # TD3 is different from recognizer and expert algorithms, which are SAC #
24
- property_type = domain_to_env_property(PANDA)
25
- env_property = property_type("PandaMyReachDense")
26
- problem_name = env_property.goal_to_problem_str(np.array([[-0.1, -0.1, 0.1]]))
27
- actor = DeepRLAgent(domain_name=PANDA, problem_name=problem_name, algorithm=PPO, num_timesteps=400000)
28
- actor.learn()
29
- # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
30
- full_sequence = actor.generate_observation(
31
- action_selection_method=stochastic_amplified_selection,
32
- random_optimalism=True, # the noise that's added to the actions
33
- )
12
+ def run_graml_panda_tutorial():
13
+ recognizer = GCGraml( # TODO make these tutorials into pytests
14
+ domain_name=PANDA,
15
+ env_name="PandaMyReachDense"
16
+ )
17
+ recognizer.domain_learning_phase(
18
+ base_goals=[np.array([PandaProperty.sample_goal()]) for _ in range(1,30)],
19
+ train_configs=[(SAC, 800000)]
20
+ )
21
+ recognizer.goals_adaptation_phase(
22
+ dynamic_goals=[np.array([[-0.1, -0.1, 0.1]]), np.array([[-0.1, 0.1, 0.1]]), np.array([[0.2, 0.2, 0.1]])]
23
+ )
24
+ # TD3 is different from recognizer and expert algorithms, which are SAC #
25
+ property_type = domain_to_env_property(PANDA)
26
+ env_property = property_type("PandaMyReachDense")
27
+ problem_name = env_property.goal_to_problem_str(np.array([[-0.1, -0.1, 0.1]]))
28
+ actor = DeepRLAgent(domain_name=PANDA, problem_name=problem_name, algorithm=PPO, num_timesteps=400000)
29
+ actor.learn()
30
+ # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
31
+ full_sequence = actor.generate_observation(
32
+ action_selection_method=stochastic_amplified_selection,
33
+ random_optimalism=True, # the noise that's added to the actions
34
+ )
34
35
 
35
- partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
36
- closest_goal = recognizer.inference_phase(partial_sequence, np.array([[-0.1, -0.1, 0.1]]), 0.5)
37
- print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: [-0.1, -0.1, 0.1]")
36
+ partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
37
+ closest_goal = recognizer.inference_phase(partial_sequence, np.array([[-0.1, -0.1, 0.1]]), 0.5)
38
+ print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: [-0.1, -0.1, 0.1]")
39
+
40
+ if __name__ == "__main__":
41
+ run_graml_panda_tutorial()
@@ -5,30 +5,35 @@ from gr_libs.metrics.metrics import stochastic_amplified_selection
5
5
  from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
6
6
  from gr_libs.ml.utils.format import random_subset_with_order
7
7
  from gr_libs.recognizer.graml.graml_recognizer import ExpertBasedGraml, GCGraml
8
+ import gr_libs.environment.environment
8
9
 
9
- recognizer = GCGraml(
10
- domain_name=PARKING,
11
- env_name="Parking-S-14-PC-"
12
- )
13
-
14
- recognizer.domain_learning_phase(
15
- [i for i in range(1,21)],
16
- [(PPO, 200000)]
17
- )
18
- recognizer.goals_adaptation_phase(
19
- dynamic_goals = ["1", "11", "21"]
20
- # no need for expert sequence generation since GCRL is used
21
- )
10
+ def run_graml_parking_tutorial():
11
+ recognizer = GCGraml(
12
+ domain_name=PARKING,
13
+ env_name="Parking-S-14-PC-"
14
+ )
15
+
16
+ recognizer.domain_learning_phase(
17
+ [i for i in range(1,21)],
18
+ [(PPO, 200000)]
19
+ )
20
+ recognizer.goals_adaptation_phase(
21
+ dynamic_goals = ["1", "11", "21"]
22
+ # no need for expert sequence generation since GCRL is used
23
+ )
22
24
 
23
- # TD3 is different from recognizer and expert algorithms, which are SAC #
24
- actor = DeepRLAgent(domain_name="parking", problem_name="Parking-S-14-PC--GI-11-v0", algorithm=TD3, num_timesteps=400000)
25
- actor.learn()
26
- # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
27
- full_sequence = actor.generate_observation(
28
- action_selection_method=stochastic_amplified_selection,
29
- random_optimalism=True, # the noise that's added to the actions
30
- )
25
+ # TD3 is different from recognizer and expert algorithms, which are SAC #
26
+ actor = DeepRLAgent(domain_name="parking", problem_name="Parking-S-14-PC--GI-11-v0", algorithm=TD3, num_timesteps=400000)
27
+ actor.learn()
28
+ # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
29
+ full_sequence = actor.generate_observation(
30
+ action_selection_method=stochastic_amplified_selection,
31
+ random_optimalism=True, # the noise that's added to the actions
32
+ )
31
33
 
32
- partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
33
- closest_goal = recognizer.inference_phase(partial_sequence, ParkingProperty("Parking-S-14-PC--GI-11-v0").str_to_goal(), 0.5)
34
- print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: 11")
34
+ partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
35
+ closest_goal = recognizer.inference_phase(partial_sequence, ParkingProperty("Parking-S-14-PC--GI-11-v0").str_to_goal(), 0.5)
36
+ print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: 11")
37
+
38
+ if __name__ == "__main__":
39
+ run_graml_parking_tutorial()
@@ -6,30 +6,34 @@ from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
6
6
  from gr_libs.ml.utils.format import random_subset_with_order
7
7
  from gr_libs.recognizer.graml.graml_recognizer import ExpertBasedGraml
8
8
 
9
- recognizer = ExpertBasedGraml(
10
- domain_name=POINT_MAZE,
11
- env_name="PointMaze-FourRoomsEnvDense-11x11"
12
- )
9
+ def run_graml_point_maze_tutorial():
10
+ recognizer = ExpertBasedGraml(
11
+ domain_name=POINT_MAZE,
12
+ env_name="PointMaze-FourRoomsEnvDense-11x11"
13
+ )
13
14
 
14
- recognizer.domain_learning_phase(
15
- [(9,1), (9,9), (1,9), (3,3), (3,4), (8,2), (3,7), (2,8)],
16
- [(SAC, 200000) for _ in range(8)]
17
- )
15
+ recognizer.domain_learning_phase(
16
+ [(9,1), (9,9), (1,9), (3,3), (3,4), (8,2), (3,7), (2,8)],
17
+ [(SAC, 200000) for _ in range(8)]
18
+ )
18
19
 
19
- recognizer.goals_adaptation_phase(
20
- dynamic_goals = [(4,4), (7,3), (3,7)],
21
- dynamic_train_configs=[(SAC, 200000) for _ in range(3)] # for expert sequence generation.
22
- )
20
+ recognizer.goals_adaptation_phase(
21
+ dynamic_goals = [(4,4), (7,3), (3,7)],
22
+ dynamic_train_configs=[(SAC, 200000) for _ in range(3)] # for expert sequence generation.
23
+ )
23
24
 
24
- # TD3 is different from recognizer and expert algorithms, which are SAC #
25
- actor = DeepRLAgent(domain_name="point_maze", problem_name="PointMaze-FourRoomsEnvDense-11x11-Goal-4x4", algorithm=TD3, num_timesteps=200000)
26
- actor.learn()
27
- # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
28
- full_sequence = actor.generate_observation(
29
- action_selection_method=stochastic_amplified_selection,
30
- random_optimalism=True, # the noise that's added to the actions
31
- )
25
+ # TD3 is different from recognizer and expert algorithms, which are SAC #
26
+ actor = DeepRLAgent(domain_name="point_maze", problem_name="PointMaze-FourRoomsEnvDense-11x11-Goal-4x4", algorithm=TD3, num_timesteps=200000)
27
+ actor.learn()
28
+ # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
29
+ full_sequence = actor.generate_observation(
30
+ action_selection_method=stochastic_amplified_selection,
31
+ random_optimalism=True, # the noise that's added to the actions
32
+ )
32
33
 
33
- partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)))
34
- closest_goal = recognizer.inference_phase(partial_sequence, PointMazeProperty("PointMaze-FourRoomsEnvDense-11x11-Goal-4x4").str_to_goal(), 0.5)
35
- print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (4, 4)")
34
+ partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)))
35
+ closest_goal = recognizer.inference_phase(partial_sequence, PointMazeProperty("PointMaze-FourRoomsEnvDense-11x11-Goal-4x4").str_to_goal(), 0.5)
36
+ print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (4, 4)")
37
+
38
+ if __name__ == "__main__":
39
+ run_graml_point_maze_tutorial()
@@ -4,26 +4,31 @@ from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
4
4
  from gr_libs.ml.utils.format import random_subset_with_order
5
5
  from gr_libs import Graql
6
6
 
7
- recognizer = Graql(
8
- domain_name="minigrid",
9
- env_name="MiniGrid-SimpleCrossingS13N4"
10
- )
7
+ def run_graql_minigrid_tutorial():
8
+ recognizer = Graql(
9
+ domain_name="minigrid",
10
+ env_name="MiniGrid-SimpleCrossingS13N4"
11
+ )
11
12
 
12
- #Graql doesn't have a domain learning phase, so we skip it
13
+ #Graql doesn't have a domain learning phase, so we skip it
13
14
 
14
- recognizer.goals_adaptation_phase(
15
- dynamic_goals = [(11,1), (11,11), (1,11)],
16
- dynamic_train_configs=[(QLEARNING, 100000) for _ in range(3)] # for expert sequence generation.
17
- )
18
- # TD3 is different from recognizer and expert algorithms, which are SAC #
19
- actor = TabularQLearner(domain_name="minigrid", problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0", algorithm=QLEARNING, num_timesteps=100000)
20
- actor.learn()
21
- # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
22
- full_sequence = actor.generate_observation(
23
- action_selection_method=stochastic_amplified_selection,
24
- random_optimalism=True, # the noise that's added to the actions
25
- )
15
+ recognizer.goals_adaptation_phase(
16
+ dynamic_goals = [(11,1), (11,11), (1,11)],
17
+ dynamic_train_configs=[(QLEARNING, 100000) for _ in range(3)] # for expert sequence generation.
18
+ )
19
+ # TD3 is different from recognizer and expert algorithms, which are SAC #
20
+ actor = TabularQLearner(domain_name="minigrid", problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0", algorithm=QLEARNING, num_timesteps=100000)
21
+ actor.learn()
22
+ # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
23
+ full_sequence = actor.generate_observation(
24
+ action_selection_method=stochastic_amplified_selection,
25
+ random_optimalism=True, # the noise that's added to the actions
26
+ )
26
27
 
27
- partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
28
- closest_goal = recognizer.inference_phase(partial_sequence, (11,1), 0.5)
29
- print(f"closest_goal returned by Graql: {closest_goal}\nactual goal actor aimed towards: (11, 1)")
28
+ partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
29
+ closest_goal = recognizer.inference_phase(partial_sequence, (11,1), 0.5)
30
+ print(f"closest_goal returned by Graql: {closest_goal}\nactual goal actor aimed towards: (11, 1)")
31
+ return closest_goal, (11,1)
32
+
33
+ if __name__ == "__main__":
34
+ run_graql_minigrid_tutorial()
@@ -1,61 +0,0 @@
1
- # Recognizer Module Documentation
2
-
3
- This document provides an overview of the recognizer module, including its class hierarchy and instructions for adding a new class of recognizer.
4
-
5
- ## Class Hierarchy
6
-
7
- The recognizer module consists of an abstract base class `Recognizer` and several derived classes, each implementing specific behaviors. The main classes are:
8
-
9
- 1. **Recognizer (Abstract Base Class)**
10
- - `inference_phase()` (abstract method)
11
-
12
- 2. **LearningRecognizer (Extends Recognizer)**
13
- - `domain_learning_phase()`
14
-
15
- 3. **GaAgentTrainerRecognizer (Extends Recognizer)**
16
- - `goals_adaptation_phase()` (abstract method)
17
- - `domain_learning_phase()`
18
-
19
- 4. **GaAdaptingRecognizer (Extends Recognizer)**
20
- - `goals_adaptation_phase()` (abstract method)
21
-
22
- 5. **GRAsRL (Extends Recognizer)**
23
- - Implements `goals_adaptation_phase()`
24
- - Implements `inference_phase()`
25
-
26
- 6. **Specific Implementations:**
27
- - `Graql (Extends GRAsRL, GaAgentTrainerRecognizer)`
28
- - `Draco (Extends GRAsRL, GaAgentTrainerRecognizer)`
29
- - `GCDraco (Extends GRAsRL, LearningRecognizer, GaAdaptingRecognizer)`
30
- - `Graml (Extends LearningRecognizer)`
31
-
32
- ## How to Add a New Recognizer Class
33
-
34
- To add a new class of recognizer, follow these steps:
35
-
36
- 1. **Determine the Type of Recognizer:**
37
- - Will it require learning? Extend `LearningRecognizer`.
38
- - Will it adapt goals dynamically? Extend `GaAdaptingRecognizer`.
39
- - Will it train agents for new goals? Extend `GaAgentTrainerRecognizer`.
40
- - Will it involve RL-based recognition? Extend `GRAsRL`.
41
-
42
- 2. **Define the Class:**
43
- - Create a new class that extends the appropriate base class(es).
44
- - Implement the required abstract methods (`inference_phase()`, `goals_adaptation_phase()`, etc.).
45
-
46
- 3. **Initialize the Recognizer:**
47
- - Ensure proper initialization by calling `super().__init__(*args, **kwargs)`.
48
- - Set up any necessary agent storage or evaluation functions.
49
-
50
- 4. **Implement Core Methods:**
51
- - Define how the recognizer processes inference sequences.
52
- - Implement learning or goal adaptation logic if applicable.
53
-
54
- 5. **Register the Recognizer:**
55
- - Ensure it integrates properly with the existing system by using the correct `domain_to_env_property()`.
56
-
57
- 6. **Test the New Recognizer:**
58
- - Run experiments to validate its behavior.
59
- - Compare results against existing recognizers to ensure correctness.
60
-
61
- By following these steps, you can seamlessly integrate a new recognizer into the framework while maintaining compatibility with the existing structure.