gr-libs 0.1.5__py3-none-any.whl → 0.1.7.post0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gr_libs/__init__.py +5 -1
- gr_libs/_version.py +21 -0
- gr_libs/environment/__init__.py +2 -2
- gr_libs/environment/environment.py +1 -1
- gr_libs/metrics/metrics.py +1 -2
- gr_libs/ml/tabular/tabular_q_learner.py +1 -1
- gr_libs/ml/utils/storage.py +7 -0
- gr_libs/problems/__init__.py +0 -0
- gr_libs/problems/consts.py +1244 -0
- gr_libs/recognizer/graml/graml_recognizer.py +21 -12
- gr_libs/recognizer/recognizer.py +0 -1
- {gr_libs-0.1.5.dist-info → gr_libs-0.1.7.post0.dist-info}/METADATA +22 -1
- {gr_libs-0.1.5.dist-info → gr_libs-0.1.7.post0.dist-info}/RECORD +22 -18
- {gr_libs-0.1.5.dist-info → gr_libs-0.1.7.post0.dist-info}/WHEEL +1 -1
- {gr_libs-0.1.5.dist-info → gr_libs-0.1.7.post0.dist-info}/top_level.txt +1 -0
- tests/test_graml.py +16 -0
- tests/test_graql.py +4 -0
- tutorials/graml_minigrid_tutorial.py +25 -21
- tutorials/graml_panda_tutorial.py +29 -25
- tutorials/graml_parking_tutorial.py +29 -24
- tutorials/graml_point_maze_tutorial.py +27 -23
- tutorials/graql_minigrid_tutorial.py +25 -20
- gr_libs/recognizer/recognizer_doc.md +0 -61
@@ -82,14 +82,14 @@ class Graml(LearningRecognizer):
|
|
82
82
|
dev_loader=DataLoader(dev_dataset, batch_size=self.env_prop.get_lstm_props().batch_size, shuffle=False, collate_fn=self.collate_func))
|
83
83
|
save_weights(model=self.model, path=self.model_file_path)
|
84
84
|
|
85
|
-
def goals_adaptation_phase(self, dynamic_goals: List[EnvProperty]):
|
85
|
+
def goals_adaptation_phase(self, dynamic_goals: List[EnvProperty], save_fig=False):
|
86
86
|
self.is_first_inf_since_new_goals = True
|
87
87
|
self.current_goals = dynamic_goals
|
88
88
|
# start by training each rl agent on the base goal set
|
89
89
|
self.embeddings_dict = {} # relevant if the embedding of the plan occurs during the goals adaptation phase
|
90
90
|
self.plans_dict = {} # relevant if the embedding of the plan occurs during the inference phase
|
91
91
|
for goal in self.current_goals:
|
92
|
-
obss = self.generate_sequences_library(goal)
|
92
|
+
obss = self.generate_sequences_library(goal, save_fig=save_fig)
|
93
93
|
self.plans_dict[str(goal)] = obss
|
94
94
|
|
95
95
|
def get_goal_plan(self, goal):
|
@@ -150,7 +150,7 @@ class Graml(LearningRecognizer):
|
|
150
150
|
return closest_goal
|
151
151
|
|
152
152
|
@abstractmethod
|
153
|
-
def generate_sequences_library(self, goal: str) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
|
153
|
+
def generate_sequences_library(self, goal: str, save_fig=False) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
|
154
154
|
pass
|
155
155
|
|
156
156
|
# this function duplicates every sequence and creates a consecutive and non-consecutive version of it
|
@@ -192,10 +192,10 @@ class MCTSBasedGraml(BGGraml, GaAdaptingRecognizer):
|
|
192
192
|
super().__init__(*args, **kwargs)
|
193
193
|
if self.rl_agent_type==None: self.rl_agent_type = TabularQLearner
|
194
194
|
|
195
|
-
def generate_sequences_library(self, goal: str) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
|
195
|
+
def generate_sequences_library(self, goal: str, save_fig=False) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
|
196
196
|
problem_name = self.env_prop.goal_to_problem_str(goal)
|
197
197
|
img_path = os.path.join(get_policy_sequences_result_path(self.env_prop.domain_name, recognizer=self.__class__.__name__), problem_name + "_MCTS")
|
198
|
-
return mcts_model.plan(self.env_prop.name, problem_name, goal, save_fig=
|
198
|
+
return mcts_model.plan(self.env_prop.name, problem_name, goal, save_fig=save_fig, img_path=img_path, env_prop=self.env_prop)
|
199
199
|
|
200
200
|
class ExpertBasedGraml(BGGraml, GaAgentTrainerRecognizer):
|
201
201
|
def __init__(self, *args, **kwargs):
|
@@ -206,15 +206,23 @@ class ExpertBasedGraml(BGGraml, GaAgentTrainerRecognizer):
|
|
206
206
|
else:
|
207
207
|
self.rl_agent_type = DeepRLAgent
|
208
208
|
|
209
|
-
def generate_sequences_library(self, goal: str) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
|
209
|
+
def generate_sequences_library(self, goal: str, save_fig=False) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
|
210
210
|
problem_name = self.env_prop.goal_to_problem_str(goal)
|
211
211
|
kwargs = {"domain_name":self.domain_name, "problem_name":problem_name}
|
212
212
|
if self.dynamic_train_configs_dict[problem_name][0] != None: kwargs["algorithm"] = self.dynamic_train_configs_dict[problem_name][0]
|
213
213
|
if self.dynamic_train_configs_dict[problem_name][1] != None: kwargs["num_timesteps"] = self.dynamic_train_configs_dict[problem_name][1]
|
214
214
|
agent = self.rl_agent_type(**kwargs)
|
215
215
|
agent.learn()
|
216
|
-
|
217
|
-
|
216
|
+
agent_kwargs = {
|
217
|
+
"action_selection_method": metrics.greedy_selection,
|
218
|
+
"random_optimalism": False,
|
219
|
+
"save_fig": save_fig,
|
220
|
+
"env_prop": self.env_prop
|
221
|
+
}
|
222
|
+
if save_fig:
|
223
|
+
fig_path = get_and_create(f"{os.path.abspath(os.path.join(get_policy_sequences_result_path(domain_name=self.env_prop.domain_name, env_name=self.env_prop.name, recognizer=self.__class__.__name__), problem_name))}_bg_sequence")
|
224
|
+
agent_kwargs["fig_path"] = fig_path
|
225
|
+
return [agent.generate_observation(**agent_kwargs)]
|
218
226
|
|
219
227
|
def goals_adaptation_phase(self, dynamic_goals: List[str], dynamic_train_configs):
|
220
228
|
self.dynamic_goals_problems = [self.env_prop.goal_to_problem_str(g) for g in dynamic_goals]
|
@@ -244,20 +252,21 @@ class GCGraml(Graml, GaAdaptingRecognizer):
|
|
244
252
|
gc_agent.learn()
|
245
253
|
self.agents.append(ContextualAgent(problem_name=self.env_prop.name, problem_goal="general", agent=gc_agent))
|
246
254
|
|
247
|
-
def generate_sequences_library(self, goal: str) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
|
255
|
+
def generate_sequences_library(self, goal: str, save_fig=False) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
|
248
256
|
problem_name = self.env_prop.goal_to_problem_str(goal)
|
249
257
|
kwargs = {"domain_name":self.domain_name, "problem_name":self.env_prop.name} # problem name is env name in gc case
|
250
258
|
if self.original_train_configs[0][0] != None: kwargs["algorithm"] = self.original_train_configs[0][0]
|
251
259
|
if self.original_train_configs[0][1] != None: kwargs["num_timesteps"] = self.original_train_configs[0][1]
|
252
260
|
agent = self.rl_agent_type(**kwargs)
|
253
261
|
agent.learn()
|
254
|
-
fig_path = get_and_create(f"{os.path.abspath(os.path.join(get_policy_sequences_result_path(domain_name=self.env_prop.domain_name, env_name=self.env_prop.name, recognizer=self.__class__.__name__), problem_name))}_gc_sequence")
|
255
262
|
agent_kwargs = {
|
256
263
|
"action_selection_method": metrics.stochastic_amplified_selection,
|
257
264
|
"random_optimalism": True,
|
258
|
-
"save_fig":
|
259
|
-
"fig_path": fig_path
|
265
|
+
"save_fig": save_fig
|
260
266
|
}
|
267
|
+
if save_fig:
|
268
|
+
fig_path = get_and_create(f"{os.path.abspath(os.path.join(get_policy_sequences_result_path(domain_name=self.env_prop.domain_name, env_name=self.env_prop.name, recognizer=self.__class__.__name__), problem_name))}_gc_sequence")
|
269
|
+
agent_kwargs["fig_path"] = fig_path
|
261
270
|
if self.env_prop.use_goal_directed_problem(): agent_kwargs["goal_directed_problem"] = problem_name
|
262
271
|
else: agent_kwargs["goal_directed_goal"] = goal
|
263
272
|
obss = []
|
gr_libs/recognizer/recognizer.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: gr_libs
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.7.post0
|
4
4
|
Summary: Package with goal recognition frameworks baselines
|
5
5
|
Author: Ben Nageris
|
6
6
|
Author-email: Matan Shamir <matan.shamir@live.biu.ac.il>, Osher Elhadad <osher.elhadad@live.biu.ac.il>
|
@@ -17,6 +17,7 @@ Requires-Dist: torchvision
|
|
17
17
|
Requires-Dist: rl_zoo3
|
18
18
|
Requires-Dist: stable_baselines3[extra]
|
19
19
|
Requires-Dist: sb3_contrib
|
20
|
+
Requires-Dist: pytest
|
20
21
|
Provides-Extra: minigrid
|
21
22
|
Requires-Dist: gr_envs[minigrid]; extra == "minigrid"
|
22
23
|
Provides-Extra: highway
|
@@ -111,6 +112,25 @@ After installing GRLib, you will have access to custom Gym environments, allowin
|
|
111
112
|
|
112
113
|
Tutorials demonstrating basic ODGR scenarios is available in the sub-package `tutorials`. These tutorials walk through the initialization and deployment process, showcasing how different GR algorithms adapt to emerging goals in various Gym environments.
|
113
114
|
|
115
|
+
## Working with an initial dataset of trained agents
|
116
|
+
gr_libs also includes a library of trained agents for the various supported environments within the package.
|
117
|
+
To get the dataset of trained agents, you can run:
|
118
|
+
```sh
|
119
|
+
python download_dataset.py
|
120
|
+
```
|
121
|
+
|
122
|
+
An alternative is to use our docker image, which includes the dataset in it.
|
123
|
+
You can:
|
124
|
+
1. pull the image:
|
125
|
+
```sh
|
126
|
+
docker pull ghcr.io/MatanShamir1/gr_test_base:latest
|
127
|
+
```
|
128
|
+
2. run a container:
|
129
|
+
```sh
|
130
|
+
docker run -it ghcr.io/MatanShamir1/gr_test_base:latest bash
|
131
|
+
```
|
132
|
+
3. don't forget to install the package from within the container, go back to 'Setup' for that.
|
133
|
+
|
114
134
|
### Method 1: Writing a Custom Script
|
115
135
|
|
116
136
|
1. **Create a recognizer**
|
@@ -118,6 +138,7 @@ Tutorials demonstrating basic ODGR scenarios is available in the sub-package `tu
|
|
118
138
|
Specify the domain name and specific environment for the recognizer, effectively telling it the domain theory - the collection of states and actions in the environment.
|
119
139
|
|
120
140
|
```python
|
141
|
+
import gr_libs.environment # Triggers gym env registration - you must run it!
|
121
142
|
recognizer = Graql(
|
122
143
|
domain_name="minigrid",
|
123
144
|
env_name="MiniGrid-SimpleCrossingS13N4"
|
@@ -7,13 +7,14 @@ evaluation/generate_experiments_results_new_ver2.py,sha256=jeKj_wgdM50o2vi8WZI-s
|
|
7
7
|
evaluation/generate_task_specific_statistics_plots.py,sha256=rBsqaMe2irP_Cfo-icwIg4_dsleFjEH6eiQCcUBj6WU,15286
|
8
8
|
evaluation/get_plans_images.py,sha256=BT-bGWuOPUAYpZVDwk7YMRBLdgKaDbNOBjMrtcl1Vjk,2346
|
9
9
|
evaluation/increasing_and_decreasing_.py,sha256=fu1hkEjhOQC3jEsjiS7emW_UPRpVFCaae0d0E2MGZqI,2991
|
10
|
-
gr_libs/__init__.py,sha256
|
11
|
-
gr_libs/
|
12
|
-
gr_libs/environment/
|
10
|
+
gr_libs/__init__.py,sha256=WlSRpZIpz5GxLNk96nhympbk3Z5nsMiSOyiAWj17S88,280
|
11
|
+
gr_libs/_version.py,sha256=Zy3HQFB_Viry2Rl81-7LPU4kL2FTQegnwLvl0VxTs3E,526
|
12
|
+
gr_libs/environment/__init__.py,sha256=KlRp3qdgxEmej31zDoDsYPwbcAqyDglx6x0mH0KRmHU,1032
|
13
|
+
gr_libs/environment/environment.py,sha256=d6ZbiAQ4H1aLrUFI8sm0BN9DVW3JtzpkodSi_70Z_PY,6780
|
13
14
|
gr_libs/environment/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
15
|
gr_libs/environment/utils/utils.py,sha256=4yM3s30KjyuEmWR8UuICE5rR03zsLi3tzqNDvBkdPcU,537
|
15
16
|
gr_libs/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
|
-
gr_libs/metrics/metrics.py,sha256=
|
17
|
+
gr_libs/metrics/metrics.py,sha256=4bnvs5suv-QrK9i1NuOzkE_E8uIzS1nlEazNDRXvZGs,8700
|
17
18
|
gr_libs/ml/__init__.py,sha256=jrjxYqvSRgWwFWw7XQP9DzOwvmprMZ2umwT7t-DYtDU,233
|
18
19
|
gr_libs/ml/agent.py,sha256=DSnK8nRx9SS76fAOZZEEvA68_meLjzm9lfQpMUXmGQU,1957
|
19
20
|
gr_libs/ml/consts.py,sha256=mrbZk8n6QoGzLGaKmaxq4QlAsBbk4fhkCgXLuO9jXKw,365
|
@@ -34,30 +35,33 @@ gr_libs/ml/sequential/__init__.py,sha256=rusN4ahTvAeAq1Saz6qS_9HEU7WuXDJu2zwhc9W
|
|
34
35
|
gr_libs/ml/sequential/lstm_model.py,sha256=Vzm-C1URR84PGNEecj69GUtn3ZmOVyh1BAY6CUnfL1Q,8978
|
35
36
|
gr_libs/ml/tabular/__init__.py,sha256=jAfjfTFZLLlVm1KUiJdxdnaNGFp1J2KBU89q_vvradM,177
|
36
37
|
gr_libs/ml/tabular/state.py,sha256=8xroKF3y3nRX0LK1QX5fRT2PS2WmvcDPp0UvPFdSx2A,733
|
37
|
-
gr_libs/ml/tabular/tabular_q_learner.py,sha256=
|
38
|
+
gr_libs/ml/tabular/tabular_q_learner.py,sha256=5QU9ZWC-Cu5jxv5K1TohoRjQrRDhCgTs1Mt18cqM_Sc,18970
|
38
39
|
gr_libs/ml/tabular/tabular_rl_agent.py,sha256=7w8PYbKi8QgxHJyECWU_rURtT89spg0tHIMI1cDwYc8,3764
|
39
40
|
gr_libs/ml/utils/__init__.py,sha256=qH3pcnem5Z6rkQ4RTZi47AXJRe1RkFEST_-DrBmfWcM,258
|
40
41
|
gr_libs/ml/utils/env.py,sha256=AWVN0OXYmFU-J3FUiwvEAIY93Suf1oL6VNcxtyWJraM,171
|
41
42
|
gr_libs/ml/utils/format.py,sha256=nu7RzVwn_raG_fqqmnqlJgUjtA0yzKztkB3a5QZnRYo,3071
|
42
43
|
gr_libs/ml/utils/math.py,sha256=n62zssVOLHnUb4dPofAoFhoLOKl5n_xBzaKQOUQBoNc,440
|
43
44
|
gr_libs/ml/utils/other.py,sha256=HKUfeLBbd4DgJxSTs3ya9KQ85Acx4TjycRrtGD9WQ3s,505
|
44
|
-
gr_libs/ml/utils/storage.py,sha256=
|
45
|
+
gr_libs/ml/utils/storage.py,sha256=52wR2pgFmcCOhqbu5Km3tegjAmtI1Fb4HYAVUnUubZk,4626
|
46
|
+
gr_libs/problems/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
47
|
+
gr_libs/problems/consts.py,sha256=ON7yfKTAKETg7i3okDYuOzEU7KWvynyubl0m7TlU6Hs,38808
|
45
48
|
gr_libs/recognizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
46
|
-
gr_libs/recognizer/recognizer.py,sha256=
|
47
|
-
gr_libs/recognizer/recognizer_doc.md,sha256=RnTvbZhl2opvU7-QT4pULCV5HCdJTw2dsu8WQOOiR3E,2521
|
49
|
+
gr_libs/recognizer/recognizer.py,sha256=ZrApJVdBQxKRYhhDiWLCNGmlxgi674nwgb30BgVggC8,1705
|
48
50
|
gr_libs/recognizer/gr_as_rl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
49
51
|
gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py,sha256=84GdfohC2dZoNH_QEo7GpSt8nZWdfqSRKCTY99X_iME,5215
|
50
52
|
gr_libs/recognizer/graml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
51
53
|
gr_libs/recognizer/graml/gr_dataset.py,sha256=lG6m3ulxFELpH1oURnlcmNDWOrxyuzvlAR28ZTqB7L8,7224
|
52
|
-
gr_libs/recognizer/graml/graml_recognizer.py,sha256=
|
54
|
+
gr_libs/recognizer/graml/graml_recognizer.py,sha256=1xLl1gHj3JxWhHtV9h3SvsW7oJdxsQQV0F-VLtlTmKQ,15911
|
53
55
|
gr_libs/recognizer/utils/__init__.py,sha256=ewSroxL7aATvvm-Xzc1_-61mP2LU2U28YaOEqvVVDB0,41
|
54
56
|
gr_libs/recognizer/utils/format.py,sha256=e0AnqtPeYoJsV9Z7cEBpgbzTM0hLNxFIjn07fQ3YbQw,492
|
55
|
-
|
56
|
-
|
57
|
-
tutorials/
|
58
|
-
tutorials/
|
59
|
-
tutorials/
|
60
|
-
|
61
|
-
|
62
|
-
gr_libs-0.1.
|
63
|
-
gr_libs-0.1.
|
57
|
+
tests/test_graml.py,sha256=ZJB2jqtf4Q2-KZredkJq90teqmHBIvigCAQpvR5G110,559
|
58
|
+
tests/test_graql.py,sha256=-onMi13e2wStOmB5bYv2f3Ita3QFFiw416XMBkby0OI,141
|
59
|
+
tutorials/graml_minigrid_tutorial.py,sha256=ONvxFi79R7d8dcd6gy083Z_yy9A2flhGTDIDRxurdx8,1782
|
60
|
+
tutorials/graml_panda_tutorial.py,sha256=wtv_lsw0vsU7j45GKeWecTfE7jzfh4iVGEVnQyaWthM,2063
|
61
|
+
tutorials/graml_parking_tutorial.py,sha256=M6bt1WQOOgn8_CRyG2kjxF14PMeyXVAWRDq1ZRwGTXo,1808
|
62
|
+
tutorials/graml_point_maze_tutorial.py,sha256=mYq3IxYbf9jidq-4VdT3MdStV80Q5lytFv6Xzzn22Ys,1835
|
63
|
+
tutorials/graql_minigrid_tutorial.py,sha256=Jb0TCUhiZQkFeafJWUTPnCISd4FKfPrqP-xfHiqCGKE,1635
|
64
|
+
gr_libs-0.1.7.post0.dist-info/METADATA,sha256=aS7y9Nl1JErXYdpAHstuQP_W1QMcKMbGet6IfxfJ_Do,9620
|
65
|
+
gr_libs-0.1.7.post0.dist-info/WHEEL,sha256=wXxTzcEDnjrTwFYjLPcsW_7_XihufBwmpiBeiXNBGEA,91
|
66
|
+
gr_libs-0.1.7.post0.dist-info/top_level.txt,sha256=fJQF8Q8Dfh_D3pA2mhNodazNjzW6b3oWfnx6Jdo-pBU,35
|
67
|
+
gr_libs-0.1.7.post0.dist-info/RECORD,,
|
tests/test_graml.py
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
from tutorials.graml_minigrid_tutorial import run_graml_minigrid_tutorial
|
2
|
+
from tutorials.graml_panda_tutorial import run_graml_panda_tutorial
|
3
|
+
from tutorials.graml_parking_tutorial import run_graml_parking_tutorial
|
4
|
+
from tutorials.graml_point_maze_tutorial import run_graml_point_maze_tutorial
|
5
|
+
|
6
|
+
def test_graml_minigrid_tutorial():
|
7
|
+
run_graml_minigrid_tutorial()
|
8
|
+
|
9
|
+
def test_graml_panda_tutorial():
|
10
|
+
run_graml_panda_tutorial()
|
11
|
+
|
12
|
+
def test_graml_parking_tutorial():
|
13
|
+
run_graml_parking_tutorial()
|
14
|
+
|
15
|
+
def test_graml_point_maze_tutorial():
|
16
|
+
run_graml_point_maze_tutorial()
|
tests/test_graql.py
ADDED
@@ -4,27 +4,31 @@ from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
|
|
4
4
|
from gr_libs.ml.utils.format import random_subset_with_order
|
5
5
|
from gr_libs import ExpertBasedGraml
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
7
|
+
def run_graml_minigrid_tutorial():
|
8
|
+
recognizer = ExpertBasedGraml(
|
9
|
+
domain_name=MINIGRID,
|
10
|
+
env_name="MiniGrid-SimpleCrossingS13N4"
|
11
|
+
)
|
11
12
|
|
12
|
-
recognizer.domain_learning_phase(base_goals=[(11,1), (11,11), (1,11), (7,11), (8,1), (10,6), (6,9), (11,3), (11,5)],
|
13
|
-
|
13
|
+
recognizer.domain_learning_phase(base_goals=[(11,1), (11,11), (1,11), (7,11), (8,1), (10,6), (6,9), (11,3), (11,5)],
|
14
|
+
train_configs=[(QLEARNING, 100000) for _ in range(9)])
|
14
15
|
|
15
|
-
recognizer.goals_adaptation_phase(
|
16
|
-
|
17
|
-
|
18
|
-
)
|
19
|
-
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
20
|
-
actor = TabularQLearner(domain_name="minigrid", problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0", algorithm=QLEARNING, num_timesteps=100000)
|
21
|
-
actor.learn()
|
22
|
-
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
23
|
-
full_sequence = actor.generate_observation(
|
24
|
-
|
25
|
-
|
26
|
-
)
|
16
|
+
recognizer.goals_adaptation_phase(
|
17
|
+
dynamic_goals = [(11,1), (11,11), (1,11)],
|
18
|
+
dynamic_train_configs=[(QLEARNING, 100000) for _ in range(3)] # for expert sequence generation.
|
19
|
+
)
|
20
|
+
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
21
|
+
actor = TabularQLearner(domain_name="minigrid", problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0", algorithm=QLEARNING, num_timesteps=100000)
|
22
|
+
actor.learn()
|
23
|
+
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
24
|
+
full_sequence = actor.generate_observation(
|
25
|
+
action_selection_method=stochastic_amplified_selection,
|
26
|
+
random_optimalism=True, # the noise that's added to the actions
|
27
|
+
)
|
27
28
|
|
28
|
-
partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
|
29
|
-
closest_goal = recognizer.inference_phase(partial_sequence, (11,1), 0.5)
|
30
|
-
print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (11, 1)")
|
29
|
+
partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
|
30
|
+
closest_goal = recognizer.inference_phase(partial_sequence, (11,1), 0.5)
|
31
|
+
print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (11, 1)")
|
32
|
+
|
33
|
+
if __name__ == "__main__":
|
34
|
+
run_graml_minigrid_tutorial()
|
@@ -9,29 +9,33 @@ from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
|
|
9
9
|
from gr_libs.ml.utils.format import random_subset_with_order
|
10
10
|
from gr_libs import GCGraml
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
)
|
20
|
-
|
21
|
-
|
22
|
-
)
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
actor
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
12
|
+
def run_graml_panda_tutorial():
|
13
|
+
recognizer = GCGraml( # TODO make these tutorials into pytests
|
14
|
+
domain_name=PANDA,
|
15
|
+
env_name="PandaMyReachDense"
|
16
|
+
)
|
17
|
+
recognizer.domain_learning_phase(
|
18
|
+
base_goals=[np.array([PandaProperty.sample_goal()]) for _ in range(1,30)],
|
19
|
+
train_configs=[(SAC, 800000)]
|
20
|
+
)
|
21
|
+
recognizer.goals_adaptation_phase(
|
22
|
+
dynamic_goals=[np.array([[-0.1, -0.1, 0.1]]), np.array([[-0.1, 0.1, 0.1]]), np.array([[0.2, 0.2, 0.1]])]
|
23
|
+
)
|
24
|
+
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
25
|
+
property_type = domain_to_env_property(PANDA)
|
26
|
+
env_property = property_type("PandaMyReachDense")
|
27
|
+
problem_name = env_property.goal_to_problem_str(np.array([[-0.1, -0.1, 0.1]]))
|
28
|
+
actor = DeepRLAgent(domain_name=PANDA, problem_name=problem_name, algorithm=PPO, num_timesteps=400000)
|
29
|
+
actor.learn()
|
30
|
+
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
31
|
+
full_sequence = actor.generate_observation(
|
32
|
+
action_selection_method=stochastic_amplified_selection,
|
33
|
+
random_optimalism=True, # the noise that's added to the actions
|
34
|
+
)
|
34
35
|
|
35
|
-
partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
|
36
|
-
closest_goal = recognizer.inference_phase(partial_sequence, np.array([[-0.1, -0.1, 0.1]]), 0.5)
|
37
|
-
print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: [-0.1, -0.1, 0.1]")
|
36
|
+
partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
|
37
|
+
closest_goal = recognizer.inference_phase(partial_sequence, np.array([[-0.1, -0.1, 0.1]]), 0.5)
|
38
|
+
print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: [-0.1, -0.1, 0.1]")
|
39
|
+
|
40
|
+
if __name__ == "__main__":
|
41
|
+
run_graml_panda_tutorial()
|
@@ -5,30 +5,35 @@ from gr_libs.metrics.metrics import stochastic_amplified_selection
|
|
5
5
|
from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
|
6
6
|
from gr_libs.ml.utils.format import random_subset_with_order
|
7
7
|
from gr_libs.recognizer.graml.graml_recognizer import ExpertBasedGraml, GCGraml
|
8
|
+
import gr_libs.environment.environment
|
8
9
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
)
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
10
|
+
def run_graml_parking_tutorial():
|
11
|
+
recognizer = GCGraml(
|
12
|
+
domain_name=PARKING,
|
13
|
+
env_name="Parking-S-14-PC-"
|
14
|
+
)
|
15
|
+
|
16
|
+
recognizer.domain_learning_phase(
|
17
|
+
[i for i in range(1,21)],
|
18
|
+
[(PPO, 200000)]
|
19
|
+
)
|
20
|
+
recognizer.goals_adaptation_phase(
|
21
|
+
dynamic_goals = ["1", "11", "21"]
|
22
|
+
# no need for expert sequence generation since GCRL is used
|
23
|
+
)
|
22
24
|
|
23
|
-
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
24
|
-
actor = DeepRLAgent(domain_name="parking", problem_name="Parking-S-14-PC--GI-11-v0", algorithm=TD3, num_timesteps=400000)
|
25
|
-
actor.learn()
|
26
|
-
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
27
|
-
full_sequence = actor.generate_observation(
|
28
|
-
|
29
|
-
|
30
|
-
)
|
25
|
+
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
26
|
+
actor = DeepRLAgent(domain_name="parking", problem_name="Parking-S-14-PC--GI-11-v0", algorithm=TD3, num_timesteps=400000)
|
27
|
+
actor.learn()
|
28
|
+
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
29
|
+
full_sequence = actor.generate_observation(
|
30
|
+
action_selection_method=stochastic_amplified_selection,
|
31
|
+
random_optimalism=True, # the noise that's added to the actions
|
32
|
+
)
|
31
33
|
|
32
|
-
partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
|
33
|
-
closest_goal = recognizer.inference_phase(partial_sequence, ParkingProperty("Parking-S-14-PC--GI-11-v0").str_to_goal(), 0.5)
|
34
|
-
print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: 11")
|
34
|
+
partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
|
35
|
+
closest_goal = recognizer.inference_phase(partial_sequence, ParkingProperty("Parking-S-14-PC--GI-11-v0").str_to_goal(), 0.5)
|
36
|
+
print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: 11")
|
37
|
+
|
38
|
+
if __name__ == "__main__":
|
39
|
+
run_graml_parking_tutorial()
|
@@ -6,30 +6,34 @@ from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
|
|
6
6
|
from gr_libs.ml.utils.format import random_subset_with_order
|
7
7
|
from gr_libs.recognizer.graml.graml_recognizer import ExpertBasedGraml
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
9
|
+
def run_graml_point_maze_tutorial():
|
10
|
+
recognizer = ExpertBasedGraml(
|
11
|
+
domain_name=POINT_MAZE,
|
12
|
+
env_name="PointMaze-FourRoomsEnvDense-11x11"
|
13
|
+
)
|
13
14
|
|
14
|
-
recognizer.domain_learning_phase(
|
15
|
-
|
16
|
-
|
17
|
-
)
|
15
|
+
recognizer.domain_learning_phase(
|
16
|
+
[(9,1), (9,9), (1,9), (3,3), (3,4), (8,2), (3,7), (2,8)],
|
17
|
+
[(SAC, 200000) for _ in range(8)]
|
18
|
+
)
|
18
19
|
|
19
|
-
recognizer.goals_adaptation_phase(
|
20
|
-
|
21
|
-
|
22
|
-
)
|
20
|
+
recognizer.goals_adaptation_phase(
|
21
|
+
dynamic_goals = [(4,4), (7,3), (3,7)],
|
22
|
+
dynamic_train_configs=[(SAC, 200000) for _ in range(3)] # for expert sequence generation.
|
23
|
+
)
|
23
24
|
|
24
|
-
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
25
|
-
actor = DeepRLAgent(domain_name="point_maze", problem_name="PointMaze-FourRoomsEnvDense-11x11-Goal-4x4", algorithm=TD3, num_timesteps=200000)
|
26
|
-
actor.learn()
|
27
|
-
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
28
|
-
full_sequence = actor.generate_observation(
|
29
|
-
|
30
|
-
|
31
|
-
)
|
25
|
+
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
26
|
+
actor = DeepRLAgent(domain_name="point_maze", problem_name="PointMaze-FourRoomsEnvDense-11x11-Goal-4x4", algorithm=TD3, num_timesteps=200000)
|
27
|
+
actor.learn()
|
28
|
+
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
29
|
+
full_sequence = actor.generate_observation(
|
30
|
+
action_selection_method=stochastic_amplified_selection,
|
31
|
+
random_optimalism=True, # the noise that's added to the actions
|
32
|
+
)
|
32
33
|
|
33
|
-
partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)))
|
34
|
-
closest_goal = recognizer.inference_phase(partial_sequence, PointMazeProperty("PointMaze-FourRoomsEnvDense-11x11-Goal-4x4").str_to_goal(), 0.5)
|
35
|
-
print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (4, 4)")
|
34
|
+
partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)))
|
35
|
+
closest_goal = recognizer.inference_phase(partial_sequence, PointMazeProperty("PointMaze-FourRoomsEnvDense-11x11-Goal-4x4").str_to_goal(), 0.5)
|
36
|
+
print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (4, 4)")
|
37
|
+
|
38
|
+
if __name__ == "__main__":
|
39
|
+
run_graml_point_maze_tutorial()
|
@@ -4,26 +4,31 @@ from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
|
|
4
4
|
from gr_libs.ml.utils.format import random_subset_with_order
|
5
5
|
from gr_libs import Graql
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
7
|
+
def run_graql_minigrid_tutorial():
|
8
|
+
recognizer = Graql(
|
9
|
+
domain_name="minigrid",
|
10
|
+
env_name="MiniGrid-SimpleCrossingS13N4"
|
11
|
+
)
|
11
12
|
|
12
|
-
#Graql doesn't have a domain learning phase, so we skip it
|
13
|
+
#Graql doesn't have a domain learning phase, so we skip it
|
13
14
|
|
14
|
-
recognizer.goals_adaptation_phase(
|
15
|
-
|
16
|
-
|
17
|
-
)
|
18
|
-
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
19
|
-
actor = TabularQLearner(domain_name="minigrid", problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0", algorithm=QLEARNING, num_timesteps=100000)
|
20
|
-
actor.learn()
|
21
|
-
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
22
|
-
full_sequence = actor.generate_observation(
|
23
|
-
|
24
|
-
|
25
|
-
)
|
15
|
+
recognizer.goals_adaptation_phase(
|
16
|
+
dynamic_goals = [(11,1), (11,11), (1,11)],
|
17
|
+
dynamic_train_configs=[(QLEARNING, 100000) for _ in range(3)] # for expert sequence generation.
|
18
|
+
)
|
19
|
+
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
20
|
+
actor = TabularQLearner(domain_name="minigrid", problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0", algorithm=QLEARNING, num_timesteps=100000)
|
21
|
+
actor.learn()
|
22
|
+
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
23
|
+
full_sequence = actor.generate_observation(
|
24
|
+
action_selection_method=stochastic_amplified_selection,
|
25
|
+
random_optimalism=True, # the noise that's added to the actions
|
26
|
+
)
|
26
27
|
|
27
|
-
partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
|
28
|
-
closest_goal = recognizer.inference_phase(partial_sequence, (11,1), 0.5)
|
29
|
-
print(f"closest_goal returned by Graql: {closest_goal}\nactual goal actor aimed towards: (11, 1)")
|
28
|
+
partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
|
29
|
+
closest_goal = recognizer.inference_phase(partial_sequence, (11,1), 0.5)
|
30
|
+
print(f"closest_goal returned by Graql: {closest_goal}\nactual goal actor aimed towards: (11, 1)")
|
31
|
+
return closest_goal, (11,1)
|
32
|
+
|
33
|
+
if __name__ == "__main__":
|
34
|
+
run_graql_minigrid_tutorial()
|
@@ -1,61 +0,0 @@
|
|
1
|
-
# Recognizer Module Documentation
|
2
|
-
|
3
|
-
This document provides an overview of the recognizer module, including its class hierarchy and instructions for adding a new class of recognizer.
|
4
|
-
|
5
|
-
## Class Hierarchy
|
6
|
-
|
7
|
-
The recognizer module consists of an abstract base class `Recognizer` and several derived classes, each implementing specific behaviors. The main classes are:
|
8
|
-
|
9
|
-
1. **Recognizer (Abstract Base Class)**
|
10
|
-
- `inference_phase()` (abstract method)
|
11
|
-
|
12
|
-
2. **LearningRecognizer (Extends Recognizer)**
|
13
|
-
- `domain_learning_phase()`
|
14
|
-
|
15
|
-
3. **GaAgentTrainerRecognizer (Extends Recognizer)**
|
16
|
-
- `goals_adaptation_phase()` (abstract method)
|
17
|
-
- `domain_learning_phase()`
|
18
|
-
|
19
|
-
4. **GaAdaptingRecognizer (Extends Recognizer)**
|
20
|
-
- `goals_adaptation_phase()` (abstract method)
|
21
|
-
|
22
|
-
5. **GRAsRL (Extends Recognizer)**
|
23
|
-
- Implements `goals_adaptation_phase()`
|
24
|
-
- Implements `inference_phase()`
|
25
|
-
|
26
|
-
6. **Specific Implementations:**
|
27
|
-
- `Graql (Extends GRAsRL, GaAgentTrainerRecognizer)`
|
28
|
-
- `Draco (Extends GRAsRL, GaAgentTrainerRecognizer)`
|
29
|
-
- `GCDraco (Extends GRAsRL, LearningRecognizer, GaAdaptingRecognizer)`
|
30
|
-
- `Graml (Extends LearningRecognizer)`
|
31
|
-
|
32
|
-
## How to Add a New Recognizer Class
|
33
|
-
|
34
|
-
To add a new class of recognizer, follow these steps:
|
35
|
-
|
36
|
-
1. **Determine the Type of Recognizer:**
|
37
|
-
- Will it require learning? Extend `LearningRecognizer`.
|
38
|
-
- Will it adapt goals dynamically? Extend `GaAdaptingRecognizer`.
|
39
|
-
- Will it train agents for new goals? Extend `GaAgentTrainerRecognizer`.
|
40
|
-
- Will it involve RL-based recognition? Extend `GRAsRL`.
|
41
|
-
|
42
|
-
2. **Define the Class:**
|
43
|
-
- Create a new class that extends the appropriate base class(es).
|
44
|
-
- Implement the required abstract methods (`inference_phase()`, `goals_adaptation_phase()`, etc.).
|
45
|
-
|
46
|
-
3. **Initialize the Recognizer:**
|
47
|
-
- Ensure proper initialization by calling `super().__init__(*args, **kwargs)`.
|
48
|
-
- Set up any necessary agent storage or evaluation functions.
|
49
|
-
|
50
|
-
4. **Implement Core Methods:**
|
51
|
-
- Define how the recognizer processes inference sequences.
|
52
|
-
- Implement learning or goal adaptation logic if applicable.
|
53
|
-
|
54
|
-
5. **Register the Recognizer:**
|
55
|
-
- Ensure it integrates properly with the existing system by using the correct `domain_to_env_property()`.
|
56
|
-
|
57
|
-
6. **Test the New Recognizer:**
|
58
|
-
- Run experiments to validate its behavior.
|
59
|
-
- Compare results against existing recognizers to ensure correctness.
|
60
|
-
|
61
|
-
By following these steps, you can seamlessly integrate a new recognizer into the framework while maintaining compatibility with the existing structure.
|