gr-libs 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. evaluation/analyze_results_cross_alg_cross_domain.py +277 -0
  2. evaluation/create_minigrid_map_image.py +34 -0
  3. evaluation/file_system.py +42 -0
  4. evaluation/generate_experiments_results.py +92 -0
  5. evaluation/generate_experiments_results_new_ver1.py +254 -0
  6. evaluation/generate_experiments_results_new_ver2.py +331 -0
  7. evaluation/generate_task_specific_statistics_plots.py +272 -0
  8. evaluation/get_plans_images.py +47 -0
  9. evaluation/increasing_and_decreasing_.py +63 -0
  10. gr_libs/__init__.py +2 -0
  11. gr_libs/environment/__init__.py +0 -0
  12. gr_libs/environment/environment.py +227 -0
  13. gr_libs/environment/utils/__init__.py +0 -0
  14. gr_libs/environment/utils/utils.py +17 -0
  15. gr_libs/metrics/__init__.py +0 -0
  16. gr_libs/metrics/metrics.py +224 -0
  17. gr_libs/ml/__init__.py +6 -0
  18. gr_libs/ml/agent.py +56 -0
  19. gr_libs/ml/base/__init__.py +1 -0
  20. gr_libs/ml/base/rl_agent.py +54 -0
  21. gr_libs/ml/consts.py +22 -0
  22. gr_libs/ml/neural/__init__.py +3 -0
  23. gr_libs/ml/neural/deep_rl_learner.py +395 -0
  24. gr_libs/ml/neural/utils/__init__.py +2 -0
  25. gr_libs/ml/neural/utils/dictlist.py +33 -0
  26. gr_libs/ml/neural/utils/penv.py +57 -0
  27. gr_libs/ml/planner/__init__.py +0 -0
  28. gr_libs/ml/planner/mcts/__init__.py +0 -0
  29. gr_libs/ml/planner/mcts/mcts_model.py +330 -0
  30. gr_libs/ml/planner/mcts/utils/__init__.py +2 -0
  31. gr_libs/ml/planner/mcts/utils/node.py +33 -0
  32. gr_libs/ml/planner/mcts/utils/tree.py +102 -0
  33. gr_libs/ml/sequential/__init__.py +1 -0
  34. gr_libs/ml/sequential/lstm_model.py +192 -0
  35. gr_libs/ml/tabular/__init__.py +3 -0
  36. gr_libs/ml/tabular/state.py +21 -0
  37. gr_libs/ml/tabular/tabular_q_learner.py +453 -0
  38. gr_libs/ml/tabular/tabular_rl_agent.py +126 -0
  39. gr_libs/ml/utils/__init__.py +6 -0
  40. gr_libs/ml/utils/env.py +7 -0
  41. gr_libs/ml/utils/format.py +100 -0
  42. gr_libs/ml/utils/math.py +13 -0
  43. gr_libs/ml/utils/other.py +24 -0
  44. gr_libs/ml/utils/storage.py +127 -0
  45. gr_libs/recognizer/__init__.py +0 -0
  46. gr_libs/recognizer/gr_as_rl/__init__.py +0 -0
  47. gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +102 -0
  48. gr_libs/recognizer/graml/__init__.py +0 -0
  49. gr_libs/recognizer/graml/gr_dataset.py +134 -0
  50. gr_libs/recognizer/graml/graml_recognizer.py +266 -0
  51. gr_libs/recognizer/recognizer.py +46 -0
  52. gr_libs/recognizer/utils/__init__.py +1 -0
  53. gr_libs/recognizer/utils/format.py +13 -0
  54. gr_libs-0.1.3.dist-info/METADATA +197 -0
  55. gr_libs-0.1.3.dist-info/RECORD +62 -0
  56. gr_libs-0.1.3.dist-info/WHEEL +5 -0
  57. gr_libs-0.1.3.dist-info/top_level.txt +3 -0
  58. tutorials/graml_minigrid_tutorial.py +30 -0
  59. tutorials/graml_panda_tutorial.py +32 -0
  60. tutorials/graml_parking_tutorial.py +38 -0
  61. tutorials/graml_point_maze_tutorial.py +43 -0
  62. tutorials/graql_minigrid_tutorial.py +29 -0
@@ -0,0 +1,266 @@
1
+ from abc import ABC, abstractmethod
2
+ from collections import namedtuple
3
+ import os
4
+ from gr_libs.environment.environment import EnvProperty, GCEnvProperty, LSTMProperties
5
+ from gr_libs.ml import utils
6
+ from gr_libs.ml.base import ContextualAgent
7
+ from typing import List, Tuple
8
+ import numpy as np
9
+ from torch.utils.data import DataLoader
10
+ from torch.nn.utils.rnn import pad_sequence
11
+ import torch
12
+ from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
13
+ from gr_libs.ml.planner.mcts import mcts_model
14
+ import dill
15
+ from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
16
+ from gr_libs.recognizer.graml.gr_dataset import GRDataset, generate_datasets
17
+ from gr_libs.ml.sequential.lstm_model import LstmObservations, train_metric_model
18
+ from gr_libs.ml.utils.format import random_subset_with_order
19
+ from gr_libs.ml.utils.storage import get_and_create, get_lstm_model_dir, get_embeddings_result_path, get_policy_sequences_result_path
20
+ from gr_libs.metrics import metrics
21
+ from gr_libs.recognizer.recognizer import GaAdaptingRecognizer, GaAgentTrainerRecognizer, LearningRecognizer, Recognizer # import first, very dependent
22
+
23
+ ### TODO IMPLEMENT MORE SELECTION METHODS, MAKE SURE action_probs IS AS IT SEEMS: list of action-probability 'es ###
24
+
25
+ def collate_fn(batch):
26
+ first_traces, second_traces, is_same_goals = zip(*batch)
27
+ # torch.stack takes tensor tuples (fixed size) and stacks them up in a matrix
28
+ first_traces_padded = pad_sequence([torch.stack(sequence) for sequence in first_traces], batch_first=True)
29
+ second_traces_padded = pad_sequence([torch.stack(sequence) for sequence in second_traces], batch_first=True)
30
+ first_traces_lengths = [len(trace) for trace in first_traces]
31
+ second_traces_lengths = [len(trace) for trace in second_traces]
32
+ return first_traces_padded.to(utils.device), second_traces_padded.to(utils.device), torch.stack(is_same_goals).to(utils.device), first_traces_lengths, second_traces_lengths
33
+
34
+ def load_weights(loaded_model : LstmObservations, path):
35
+ # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
36
+ loaded_model.load_state_dict(torch.load(path, map_location=utils.device))
37
+ loaded_model.to(utils.device) # Ensure model is on the right device
38
+ return loaded_model
39
+
40
+ def save_weights(model : LstmObservations, path):
41
+ directory = os.path.dirname(path)
42
+ if not os.path.exists(directory):
43
+ os.makedirs(directory)
44
+ torch.save(model.state_dict(), path)
45
+
46
+ class Graml(LearningRecognizer):
47
+ def __init__(self, *args, **kwargs):
48
+ super().__init__(*args, **kwargs)
49
+ self.agents: List[ContextualAgent] = []
50
+ self.train_func = train_metric_model; self.collate_func = collate_fn
51
+
52
+ @abstractmethod
53
+ def train_agents_on_base_goals(self, base_goals: List[str], train_configs: List):
54
+ pass
55
+
56
+ def domain_learning_phase(self, base_goals: List[str], train_configs: List):
57
+ super().domain_learning_phase(base_goals, train_configs)
58
+ self.train_agents_on_base_goals(base_goals, train_configs)
59
+ # train the network so it will find a metric for the observations of the base agents such that traces of agents to different goals are far from one another
60
+ self.model_directory = get_lstm_model_dir(domain_name=self.env_prop.domain_name, env_name=self.env_prop.name , model_name=self.env_prop.problem_list_to_str_tuple(self.original_problems), recognizer=self.__class__.__name__)
61
+ last_path = r"lstm_model.pth"
62
+ self.model_file_path = os.path.join(self.model_directory, last_path)
63
+ self.model = LstmObservations(input_size=self.env_prop.get_lstm_props().input_size, hidden_size=self.env_prop.get_lstm_props().hidden_size)
64
+ self.model.to(utils.device)
65
+
66
+ if os.path.exists(self.model_file_path):
67
+ print(f"Loading pre-existing lstm model in {self.model_file_path}")
68
+ load_weights(loaded_model=self.model, path=self.model_file_path)
69
+ else:
70
+ print(f"{self.model_file_path} doesn't exist, training the model")
71
+ train_samples, dev_samples = generate_datasets(num_samples=self.env_prop.get_lstm_props().num_samples,
72
+ agents=self.agents,
73
+ observation_creation_method=metrics.stochastic_amplified_selection,
74
+ problems=self.original_problems,
75
+ env_prop=self.env_prop,
76
+ gc_goal_set=self.gc_goal_set if hasattr(self, 'gc_goal_set') else None,
77
+ recognizer_name=self.__class__.__name__)
78
+
79
+ train_dataset = GRDataset(len(train_samples), train_samples)
80
+ dev_dataset = GRDataset(len(dev_samples), dev_samples)
81
+ self.train_func(self.model, train_loader=DataLoader(train_dataset, batch_size=self.env_prop.get_lstm_props().batch_size, shuffle=False, collate_fn=self.collate_func),
82
+ dev_loader=DataLoader(dev_dataset, batch_size=self.env_prop.get_lstm_props().batch_size, shuffle=False, collate_fn=self.collate_func))
83
+ save_weights(model=self.model, path=self.model_file_path)
84
+
85
+ def goals_adaptation_phase(self, dynamic_goals: List[EnvProperty]):
86
+ self.is_first_inf_since_new_goals = True
87
+ self.current_goals = dynamic_goals
88
+ # start by training each rl agent on the base goal set
89
+ self.embeddings_dict = {} # relevant if the embedding of the plan occurs during the goals adaptation phase
90
+ self.plans_dict = {} # relevant if the embedding of the plan occurs during the inference phase
91
+ for goal in self.current_goals:
92
+ obss = self.generate_sequences_library(goal)
93
+ self.plans_dict[str(goal)] = obss
94
+
95
+ def get_goal_plan(self, goal):
96
+ assert self.plans_dict, "plans_dict wasn't created during goals_adaptation_phase and now inference phase can't return the plans. when inference_same_length, keep the plans and not their embeddings during goals_adaptation_phase."
97
+ return self.plans_dict[goal]
98
+
99
+ def dump_plans(self, true_sequence, true_goal, percentage):
100
+ assert self.plans_dict, "plans_dict wasn't created during goals_adaptation_phase and now inference phase can't return the plans. when inference_same_length, keep the plans and not their embeddings during goals_adaptation_phase."
101
+ # Arrange storage
102
+ embeddings_path = get_and_create(get_embeddings_result_path(domain_name=self.env_prop.domain_name, env_name=self.env_prop.name, recognizer=self.__class__.__name__))
103
+ self.plans_dict[f"{true_goal}_true"] = true_sequence
104
+
105
+ with open(embeddings_path + f'/{true_goal}_{percentage}_plans_dict.pkl', 'wb') as plans_file:
106
+ # TODO erase AGENT_BASED macros
107
+ to_dump = {}
108
+ for goal, obss in self.plans_dict.items():
109
+ if goal == f"{true_goal}_true":
110
+ to_dump[goal] = self.agents[0].agent.simplify_observation(obss)
111
+ else:
112
+ to_dump[goal] = []
113
+ for obs in obss:
114
+ addition = self.agents[0].agent.simplify_observation(obs) if self.is_first_inf_since_new_goals else obs
115
+ to_dump[goal].append(addition)
116
+ dill.dump(to_dump, plans_file)
117
+ self.plans_dict.pop(f"{true_goal}_true")
118
+
119
+ def create_embeddings_dict(self):
120
+ for goal, obss in self.plans_dict.items():
121
+ self.embeddings_dict[goal] = []
122
+ for (cons_seq, non_cons_seq) in obss:
123
+ self.embeddings_dict[goal].append((self.model.embed_sequence(cons_seq), self.model.embed_sequence(non_cons_seq)))
124
+
125
+ def inference_phase(self, inf_sequence, true_goal, percentage) -> str:
126
+ embeddings_path = get_and_create(get_embeddings_result_path(domain_name=self.env_prop.domain_name, env_name=self.env_prop.name, recognizer=self.__class__.__name__))
127
+ simplified_inf_sequence = self.agents[0].agent.simplify_observation(inf_sequence)
128
+ new_embedding = self.model.embed_sequence(simplified_inf_sequence)
129
+ assert self.plans_dict, "plans_dict wasn't created during goals_adaptation_phase and now inference phase can't embed the plans. when inference_same_length, keep the plans and not their embeddings during goals_adaptation_phase."
130
+ if self.is_first_inf_since_new_goals:
131
+ self.is_first_inf_since_new_goals = False
132
+ self.update_sequences_library_inference_phase(inf_sequence)
133
+ self.create_embeddings_dict()
134
+
135
+ closest_goal, greatest_similarity = None, 0
136
+ for (goal, embeddings) in self.embeddings_dict.items():
137
+ sum_curr_similarities = 0
138
+ for cons_embedding, non_cons_embedding in embeddings:
139
+ sum_curr_similarities += max(torch.exp(-torch.sum(torch.abs(cons_embedding-new_embedding))), torch.exp(-torch.sum(torch.abs(non_cons_embedding-new_embedding))))
140
+ mean_similarity = sum_curr_similarities/len(embeddings)
141
+ if mean_similarity > greatest_similarity:
142
+ closest_goal = goal
143
+ greatest_similarity = mean_similarity
144
+
145
+ self.embeddings_dict[f"{true_goal}_true"] = new_embedding
146
+ if self.collect_statistics:
147
+ with open(os.path.join(embeddings_path, f'{true_goal}_{percentage}_embeddings_dict.pkl'), 'wb') as embeddings_file:
148
+ dill.dump(self.embeddings_dict, embeddings_file)
149
+ self.embeddings_dict.pop(f"{true_goal}_true")
150
+
151
+ return closest_goal
152
+
153
+ @abstractmethod
154
+ def generate_sequences_library(self, goal: str) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
155
+ pass
156
+
157
+ # this function duplicates every sequence and creates a consecutive and non-consecutive version of it
158
+ def update_sequences_library_inference_phase(self, inf_sequence) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
159
+ new_plans_dict = {}
160
+ for goal, obss in self.plans_dict.items():
161
+ new_obss = []
162
+ for obs in obss:
163
+ consecutive_partial_obs = random_subset_with_order(obs, len(inf_sequence), is_consecutive=True)
164
+ non_consecutive_partial_obs = random_subset_with_order(obs, len(inf_sequence), is_consecutive=False)
165
+ simplified_consecutive_partial_obs = self.agents[0].agent.simplify_observation(consecutive_partial_obs)
166
+ simplified_non_consecutive_partial_obs = self.agents[0].agent.simplify_observation(non_consecutive_partial_obs)
167
+ new_obss.append((simplified_consecutive_partial_obs, simplified_non_consecutive_partial_obs))
168
+ new_plans_dict[goal] = new_obss # override old full observations with new partial observations with consecutive and non-consecutive versions.
169
+ self.plans_dict = new_plans_dict
170
+
171
+ class BGGraml(Graml):
172
+ def __init__(self, *args, **kwargs):
173
+ super().__init__(*args, **kwargs)
174
+
175
+ def domain_learning_phase(self, base_goals: List[str], train_configs: List):
176
+ assert len(train_configs) == len(base_goals), "There should be train configs for every goal in BGGraml."
177
+ return super().domain_learning_phase(base_goals, train_configs)
178
+
179
+ # In case we need goal-directed agent for every goal
180
+ def train_agents_on_base_goals(self, base_goals: List[str], train_configs: List):
181
+ self.original_problems = [self.env_prop.goal_to_problem_str(g) for g in base_goals]
182
+ # start by training each rl agent on the base goal set
183
+ for (problem, goal), (algorithm, num_timesteps) in zip(zip(self.original_problems, base_goals), train_configs):
184
+ kwargs = {"domain_name":self.domain_name, "problem_name":problem}
185
+ if algorithm != None: kwargs["algorithm"] = algorithm
186
+ if num_timesteps != None: kwargs["num_timesteps"] = num_timesteps
187
+ agent = self.rl_agent_type(**kwargs)
188
+ agent.learn()
189
+ self.agents.append(ContextualAgent(problem_name=problem, problem_goal=goal, agent=agent))
190
+
191
+ class MCTSBasedGraml(BGGraml, GaAdaptingRecognizer):
192
+ def __init__(self, *args, **kwargs):
193
+ super().__init__(*args, **kwargs)
194
+ if self.rl_agent_type==None: self.rl_agent_type = TabularQLearner
195
+
196
+ def generate_sequences_library(self, goal: str) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
197
+ problem_name = self.env_prop.goal_to_problem_str(goal)
198
+ img_path = os.path.join(get_policy_sequences_result_path(self.env_prop.domain_name, recognizer=self.__class__.__name__), problem_name + "_MCTS")
199
+ return mcts_model.plan(self.env_prop.name, problem_name, goal, save_fig=True, img_path=img_path, env_prop=self.env_prop)
200
+
201
+ class ExpertBasedGraml(BGGraml, GaAgentTrainerRecognizer):
202
+ def __init__(self, *args, **kwargs):
203
+ super().__init__(*args, **kwargs)
204
+ if self.rl_agent_type==None:
205
+ if self.env_prop.is_state_discrete() and self.env_prop.is_action_discrete():
206
+ self.rl_agent_type = TabularQLearner
207
+ else:
208
+ self.rl_agent_type = DeepRLAgent
209
+
210
+ def generate_sequences_library(self, goal: str) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
211
+ problem_name = self.env_prop.goal_to_problem_str(goal)
212
+ kwargs = {"domain_name":self.domain_name, "problem_name":problem_name}
213
+ if self.dynamic_train_configs_dict[problem_name][0] != None: kwargs["algorithm"] = self.dynamic_train_configs_dict[problem_name][0]
214
+ if self.dynamic_train_configs_dict[problem_name][1] != None: kwargs["num_timesteps"] = self.dynamic_train_configs_dict[problem_name][1]
215
+ agent = self.rl_agent_type(**kwargs)
216
+ agent.learn()
217
+ fig_path = get_and_create(f"{os.path.abspath(os.path.join(get_policy_sequences_result_path(domain_name=self.env_prop.domain_name, env_name=self.env_prop.name, recognizer=self.__class__.__name__), problem_name))}_bg_sequence")
218
+ return [agent.generate_observation(action_selection_method=metrics.greedy_selection, random_optimalism=False, save_fig=True, fig_path=fig_path, env_prop=self.env_prop)]
219
+
220
+ def goals_adaptation_phase(self, dynamic_goals: List[str], dynamic_train_configs):
221
+ self.dynamic_goals_problems = [self.env_prop.goal_to_problem_str(g) for g in dynamic_goals]
222
+ self.dynamic_train_configs_dict = {problem:config for problem, config in zip(self.dynamic_goals_problems,dynamic_train_configs)}
223
+ return super().goals_adaptation_phase(dynamic_goals)
224
+
225
+ class GCGraml(Graml, GaAdaptingRecognizer):
226
+ def __init__(self, *args, **kwargs):
227
+ super().__init__(*args, **kwargs)
228
+ if self.rl_agent_type==None: self.rl_agent_type = GCDeepRLAgent
229
+ assert self.env_prop.gc_adaptable() and not self.env_prop.is_state_discrete() and not self.env_prop.is_action_discrete()
230
+
231
+ def domain_learning_phase(self, base_goals: List[str], train_configs: List):
232
+ assert len(train_configs) == 1, "There should be one train config for the sole gc agent in GCGraml."
233
+ return super().domain_learning_phase(base_goals, train_configs)
234
+
235
+ # In case we need goal-directed agent for every goal
236
+ def train_agents_on_base_goals(self, base_goals: List[str], train_configs: List):
237
+ self.gc_goal_set = base_goals
238
+ self.original_problems = self.env_prop.name # needed for gr_dataset
239
+ # start by training each rl agent on the base goal set
240
+ kwargs = {"domain_name":self.domain_name, "problem_name":self.env_prop.name}
241
+ algorithm, num_timesteps = train_configs[0] # should only be one, was asserted
242
+ if algorithm != None: kwargs["algorithm"] = algorithm
243
+ if num_timesteps != None: kwargs["num_timesteps"] = num_timesteps
244
+ gc_agent = self.rl_agent_type(**kwargs)
245
+ gc_agent.learn()
246
+ self.agents.append(ContextualAgent(problem_name=self.env_prop.name, problem_goal="general", agent=gc_agent)) # TODO change
247
+
248
+ def generate_sequences_library(self, goal: str) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
249
+ problem_name = self.env_prop.goal_to_problem_str(goal)
250
+ kwargs = {"domain_name":self.domain_name, "problem_name":self.env_prop.name} # problem name is env name in gc case
251
+ if self.original_train_configs[0][0] != None: kwargs["algorithm"] = self.original_train_configs[0][0]
252
+ if self.original_train_configs[0][1] != None: kwargs["num_timesteps"] = self.original_train_configs[0][1]
253
+ agent = self.rl_agent_type(**kwargs)
254
+ agent.learn()
255
+ fig_path = get_and_create(f"{os.path.abspath(os.path.join(get_policy_sequences_result_path(domain_name=self.env_prop.domain_name, env_name=self.env_prop.name, recognizer=self.__class__.__name__), problem_name))}_gc_sequence")
256
+ agent_kwargs = {
257
+ "action_selection_method": metrics.stochastic_amplified_selection,
258
+ "random_optimalism": True,
259
+ "save_fig": True,
260
+ "fig_path": fig_path
261
+ }
262
+ if self.env_prop.use_goal_directed_problem(): agent_kwargs["goal_directed_problem"] = problem_name
263
+ else: agent_kwargs["goal_directed_goal"] = goal
264
+ obss = []
265
+ for _ in range(5): obss.append(agent.generate_observation(**agent_kwargs))
266
+ return obss
@@ -0,0 +1,46 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import List, Type
3
+
4
+ from gr_libs.environment.environment import EnvProperty, SUPPORTED_DOMAINS
5
+ from gr_libs.environment.utils.utils import domain_to_env_property
6
+ from gr_libs.ml.base.rl_agent import RLAgent
7
+
8
+ class Recognizer(ABC):
9
+ def __init__(self, domain_name: str, env_name:str, collect_statistics=False, rl_agent_type: Type[RLAgent]=None):
10
+ assert domain_name in SUPPORTED_DOMAINS
11
+ self.rl_agent_type = rl_agent_type
12
+ self.domain_name = domain_name
13
+ self.env_prop_type = domain_to_env_property(self.domain_name)
14
+ self.env_prop = self.env_prop_type(env_name)
15
+ self.collect_statistics = collect_statistics
16
+
17
+ @abstractmethod
18
+ def inference_phase(self, inf_sequence, true_goal, percentage) -> str:
19
+ pass
20
+
21
+ class LearningRecognizer(Recognizer): # TODO add a class diagram with the inheritance of all calsses
22
+ def __init__(self, *args, **kwargs):
23
+ super().__init__(*args, **kwargs)
24
+
25
+ def domain_learning_phase(self, base_goals: List[str], train_configs: List):
26
+ self.original_train_configs = train_configs
27
+
28
+ # a recognizer that needs to train agents for every new goal as part of the goal adaptation phase (that's why it needs dynamic train configs)
29
+ class GaAgentTrainerRecognizer(Recognizer): # TODO add a class diagram with the inheritance of all calsses
30
+ def __init__(self, *args, **kwargs):
31
+ super().__init__(*args, **kwargs)
32
+
33
+ @abstractmethod
34
+ def goals_adaptation_phase(self, dynamic_goals: List[str], dynamic_train_configs):
35
+ pass
36
+
37
+ def domain_learning_phase(self, base_goals: List[str], train_configs: List):
38
+ super().domain_learning_phase(base_goals, train_configs)
39
+
40
+ class GaAdaptingRecognizer(Recognizer): # TODO add a class diagram with the inheritance of all calsses
41
+ def __init__(self, *args, **kwargs):
42
+ super().__init__(*args, **kwargs)
43
+
44
+ @abstractmethod
45
+ def goals_adaptation_phase(self, dynamic_goals: List[str]):
46
+ pass
@@ -0,0 +1 @@
1
+ from .format import recognizer_str_to_obj
@@ -0,0 +1,13 @@
1
+ from gr_libs.recognizer.graml.graml_recognizer import GCGraml, ExpertBasedGraml, MCTSBasedGraml
2
+ from gr_libs.recognizer.gr_as_rl.gr_as_rl_recognizer import Graql, Draco, GCDraco
3
+
4
+ def recognizer_str_to_obj(recognizer_str: str):
5
+ recognizer_map = {
6
+ "GCGraml": GCGraml,
7
+ "ExpertBasedGraml": ExpertBasedGraml,
8
+ "MCTSBasedGraml": MCTSBasedGraml,
9
+ "Graql": Graql,
10
+ "Draco": Draco,
11
+ "GCDraco": GCDraco
12
+ }
13
+ return recognizer_map.get(recognizer_str)
@@ -0,0 +1,197 @@
1
+ Metadata-Version: 2.4
2
+ Name: gr_libs
3
+ Version: 0.1.3
4
+ Summary: Package with goal recognition frameworks baselines
5
+ Author: Osher Elhadad, Ben Nageris
6
+ Author-email: Matan Shamir <matan.shamir@live.biu.ac.il>
7
+ License-Expression: MIT
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Operating System :: OS Independent
10
+ Requires-Python: >=3.11
11
+ Description-Content-Type: text/markdown
12
+ Requires-Dist: gr_envs
13
+ Requires-Dist: dill
14
+ Requires-Dist: opencv-python
15
+
16
+ # GRLib
17
+ GRLib is a python package containing implementations of Goal Recognition (GR) algorithms which use MDPs to represent the decision making process. All agents in those algorithms interact with an environment that's registered in gym API.
18
+ ## Setup:
19
+ If you're on linux, great, If on windows, use git bash for the next commands to work.
20
+ 1. Find where your python is installed. If you want to find where's your python3.12, you can run:
21
+ ```sh
22
+ py -3.12 -c "import sys; print(sys.executable)"
23
+ ```
24
+ 2. Create a new empty venv from that python venv module:
25
+ ```sh
26
+ C:/Users/path/to/Programs/Python/Python312/python.exe -m venv test_env
27
+ ```
28
+ 3. Activate the environment:
29
+ ```sh
30
+ source test_env/Scripts/activate
31
+ ```
32
+ 4. There's no equivalent to conda env list to check the global virtual environments status, so you can verify the active one via:
33
+ ```sh
34
+ echo $VIRTUAL_ENV
35
+ ```
36
+ 5. Install and upgrade basic package management modules:
37
+ ```sh
38
+ /path/to/python.exe -m pip install --upgrade pip setuptools wheel versioneer
39
+ ```
40
+ 6. Install the gr_libss package (can add -e for editable mode):
41
+ ```sh
42
+ cd /path/to/clone/of/GoalRecognitionLibs
43
+ pip install -e .
44
+ ```
45
+ 7. Install gr_libs package (can add -e for editable mode):
46
+ ```sh
47
+ cd /path/to/clone/of/Grlib
48
+ pip install -e .
49
+ ```
50
+
51
+
52
+ <!-- 1. Ensure you have python 3.11 installed.
53
+ If you have root permissions, simply use:
54
+ ```sh
55
+ mkdir -p ~/.local/python3.11
56
+ dnf install python3.11 --prefix ~/.local/python3.11
57
+ echo 'export PATH=$HOME/.local/python3.11/bin:$PATH' >> ~/.bashrc
58
+ source ~/.bashrc
59
+ ```
60
+ Else, use pyenv:
61
+ ```sh
62
+ pyenv install 3.11.0
63
+ ```
64
+ 2. Create a new venv or use an existing 3.11 venv, and activate it. To create a new venv:
65
+ ```sh
66
+ ~/.pyenv/versions/3.11.0/bin/python -m venv graml_env
67
+ ./Python-3.11.0/graml_env/bin/activate
68
+ ```
69
+ If you're not a sudo, and you have problems with building python getting such warnings:
70
+ ```sh
71
+ WARNING: The Python ctypes extension was not compiled. Missing the libffi lib?
72
+ ```
73
+ That means you don't have the necesarry libraries for building python, and you probably can't change that since you're not a sudoer.
74
+ An alternative solution can be using a conda env:
75
+ ```sh
76
+ conda create -n graml_env python=3.11
77
+ conda activate graml_env
78
+ ```
79
+ 3. Install GoalRecognitionLibs to get all needed dependencies:
80
+ ```sh
81
+ git clone [GoalRecognitionLibs address]
82
+ cd GoalRecognitionLibs
83
+ pip install -e . # using the conda's pip of course
84
+ ``` -->
85
+
86
+ ### Issues & Problems ###
87
+ If you're not a sudo, and you have problems with building python getting such warnings:
88
+ ```sh
89
+ WARNING: The Python ctypes extension was not compiled. Missing the libffi lib?
90
+ ```
91
+ That means you don't have the necesarry libraries for building python.
92
+
93
+ ### How to use Grlib ###
94
+ Now that you've installed the package, you have additional custom gym environments and you can start creating an ODGR scenario with the algorithm you wish to test.
95
+ The tutorial at tutorials/tutorial.py follows a simple ODGR scnenario. We guide through the initialization and deployment process following an example where GRAML is expected to adapt to new emerging goals in the point_maze gym environment.
96
+
97
+ #### Method 1: write your own script
98
+ 1. create the recognizer: we need to state the base problems on which the recognizer train.
99
+ we also need the env_name for the sake of storing the trained models.
100
+ Other notable parameters include the parameters for the training of the model: For example, Graml's LSTM needs to accept input sizes the size of the concatenation of the state space with the action space.
101
+
102
+ ```python
103
+ recognizer = Graml(
104
+ env_name="point_maze", # TODO change to macros which are importable from some info or env module of enums.
105
+ problems=[("PointMaze-FourRoomsEnvDense-11x11-Goal-9x1"),
106
+ ("PointMaze-FourRoomsEnv-11x11-Goal-9x9"), # this one doesn't work with dense rewards because of encountering local minima
107
+ ("PointMaze-FourRoomsEnvDense-11x11-Goal-1x9"),
108
+ ("PointMaze-FourRoomsEnvDense-11x11-Goal-3x3"),
109
+ ("PointMaze-FourRoomsEnvDense-11x11-Goal-3x4"),
110
+ ("PointMaze-FourRoomsEnvDense-11x11-Goal-8x2"),
111
+ ("PointMaze-FourRoomsEnvDense-11x11-Goal-3x7"),
112
+ ("PointMaze-FourRoomsEnvDense-11x11-Goal-2x8")],
113
+ task_str_to_goal=maze_str_to_goal,
114
+ method=DeepRLAgent,
115
+ collect_statistics=False,
116
+ train_configs=[(SAC, 200000) for i in range(8)],
117
+ partial_obs_type="fragmented",
118
+ batch_size=32,
119
+ input_size=6,
120
+ hidden_size=8,
121
+ num_samples=20000,
122
+ problem_list_to_str_tuple=lambda problems: "_".join([f"[{s.split('-')[-1]}]" for s in problems]),
123
+ is_learn_same_length_sequences=False,
124
+ goals_adaptation_sequence_generation_method=AGENT_BASED # take expert samples in goals adaptation phase
125
+ )
126
+ ```
127
+
128
+ 2. The domain learning phase: In GRAML's case, the recognizer generates a dataset by training agents towards the base goals and trains an metric model combined of an LSTM on traces generated by those agents.
129
+
130
+ ```python
131
+ recognizer.domain_learning_phase()
132
+ ```
133
+ 3. The goals adaptation phase: The recognizer receives new goals, along with configurations to the training of those agents - since the sequence generation method in this case is from an expert.
134
+ ```python
135
+ recognizer.goals_adaptation_phase(
136
+ dynamic_goals_problems = ["PointMaze-FourRoomsEnvDense-11x11-Goal-4x4",
137
+ "PointMaze-FourRoomsEnvDense-11x11-Goal-7x3",
138
+ "PointMaze-FourRoomsEnvDense-11x11-Goal-3x7"],
139
+ dynamic_train_configs=[(SAC, 200000) for i in range(3)] # for expert sequence generation
140
+ )
141
+ ```
142
+ 4. Inference phase - this snippet generates a partial sequence by an agent trained towards one of the goals of the inference phase. Note how the trace is generated using a different agent from the recognizer's inner agents or expert (TD3 rather than SAC), and with noise added to every action to simulate suboptimal behavior.
143
+ ```python
144
+ actor = DeepRLAgent(env_name="point_maze", problem_name="PointMaze-FourRoomsEnvDense-11x11-Goal-4x4", algorithm=TD3, num_timesteps=200000)
145
+ actor.learn()
146
+ full_sequence = actor.generate_observation(
147
+ action_selection_method=stochastic_amplified_selection,
148
+ random_optimalism=True, # the noise that's added to the actions
149
+ )
150
+ partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_fragmented="fragmented")
151
+ closest_goal = recognizer.inference_phase(partial_sequence, maze_str_to_goal("PointMaze-FourRoomsEnvDense-11x11-Goal-4x4"), 0.5)
152
+ ```
153
+ 5. check the result returned by GRAML and print whether it was right or not.
154
+ ```python
155
+ print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (4, 4)")
156
+ ```
157
+
158
+ #### Method 2: use a configuration file
159
+ The configuraiton file consts.py holds configurations of ODGR problems.
160
+ You can either use existing ones or add new ones.
161
+ Note that using the config file, despite being easier on a large scale, some inputs to the ODGR problem are not as flexible as they would be using method 1.
162
+ For example, the sequence generation will be performed by trained agents and is non configurable. The sequences will either be completely consecutive or randomly sampled from the trace.
163
+ Example for a problem:
164
+
165
+ You can use odgr_executor.py to execute a single task:
166
+ ```sh
167
+ python odgr_executor.py --recognizer MCTSBasedGraml --domain minigrid --task L1 --minigrid_env MinigridSimple
168
+ ```
169
+
170
+
171
+ ## Supported Algorithms
172
+
173
+ | **Name** | **Supervised** | **RL** | **Discrete** | **Continuous** | **Model-Based** | **Model-Free** | **Actions Only** |
174
+ | ------------------- | ------------------ | ------------------ | ------------------ | ------------------- | ------------------ | --------------------------------- |
175
+ | GRAQL | :x: | :heavy_check_mark: | :heavy_check_mark: | :x: | :x: | :heavy_check_mark: | :x: |
176
+ | DRACO | :x: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :x: | :heavy_check_mark: | :x: |
177
+ | GRAML | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :x: | :heavy_check_mark: | :heavy_check_mark: |
178
+
179
+ ## Supported Domains
180
+
181
+ | **Name** | **Action** | **State** |
182
+ | ------------------- | ------------------ | ------------------ |
183
+ | Minigrid | Discrete | Discrete |
184
+ | PointMaze | Continuous | Continuous |
185
+ | Parking | Continuous | Continuous |
186
+ | Panda | Continuous | Continuous |
187
+
188
+ ### Experiments
189
+ Given here is a guide for executing the experiments. There are benchmark domains suggested in the repository, and the 'scripts' directory suggests a series of tools to analyze them. They are defaultly set on the domains used for GRAML and GRAQL analysis during the writing of GRAML paper, but can easily be adjusted for new domains and algorithms.
190
+ 1. analyze_results_cross_alg_cross_domain.py: this script runs with no arguments. it injects information from get_experiment_results_path (for example: dataset\graml\minigrid\continuing\inference_same_seq_len\learn_diff_seq_len\experiment_results\obstacles\L111\experiment_results.pkl), and produces a plot with 4 figures showing the accuracy trend of algorithms on the domains checked one against the other. Currently GRAML is checked against GRAQL or DRACO but it can easily be adjusted from within the script.
191
+ 2. generate_task_specific_statistics_plots.py - this script produces, for a specific task execution (results of execution of experiments.py), a summary combined of a figure with sticks with the accuracies and confidence levels of an algorithm on the task on the varying percentages. figures\point_maze\obstacles\graql_point_maze_obstacles_fragmented_stats.png is an example of a path at which the output is dumped. Another product of this script is a confusion matrix with the confidence levels - visualizing the same data, and the output file resides in this path: figures\point_maze\obstacles\graml_point_maze_obstacles_fragmented_inference_same_seq_len_learn_diff_seq_len_goals_conf_mat.png.
192
+
193
+ ### How to add a new environment
194
+ 1. bla
195
+ 2. blalba
196
+
197
+ ### How to add a new Learner
@@ -0,0 +1,62 @@
1
+ evaluation/analyze_results_cross_alg_cross_domain.py,sha256=s_DDh4rNfRnvQ0PDa2d5411jYOa7CaI1YeB8Dpup7QU,9803
2
+ evaluation/create_minigrid_map_image.py,sha256=jaSW3n3tY222iFUeAMqedBP9cvD88GCzPrQ6_XHv5oQ,1242
3
+ evaluation/file_system.py,sha256=SSYnj8QGFkq-8V_0s7x2MWbD88aFaoFY4Ogc_Pt8m6U,1601
4
+ evaluation/generate_experiments_results.py,sha256=oMFt2-TX7g3O6aBflFtQ5q0PT6sngEb8104kpPVMi0s,4051
5
+ evaluation/generate_experiments_results_new_ver1.py,sha256=P9gz3xa0DoRRMQ16GQL3_wVSDYUfh8oZ3BCIUjphKaM,8909
6
+ evaluation/generate_experiments_results_new_ver2.py,sha256=jeKj_wgdM50o2vi8WZI-s3GbsQdsjultHX-8H4Xvus4,12276
7
+ evaluation/generate_task_specific_statistics_plots.py,sha256=rBsqaMe2irP_Cfo-icwIg4_dsleFjEH6eiQCcUBj6WU,15286
8
+ evaluation/get_plans_images.py,sha256=BT-bGWuOPUAYpZVDwk7YMRBLdgKaDbNOBjMrtcl1Vjk,2346
9
+ evaluation/increasing_and_decreasing_.py,sha256=fu1hkEjhOQC3jEsjiS7emW_UPRpVFCaae0d0E2MGZqI,2991
10
+ gr_libs/__init__.py,sha256=-uKsQiHIL7yojbDwlTR-I8sj1WX9XT52PoFbPjtUTKo,145
11
+ gr_libs/environment/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
+ gr_libs/environment/environment.py,sha256=0-3kZJCmLMF9o0NignZaMEwQb94NZQ2gmsOyfjPXKDI,6919
13
+ gr_libs/environment/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ gr_libs/environment/utils/utils.py,sha256=4yM3s30KjyuEmWR8UuICE5rR03zsLi3tzqNDvBkdPcU,537
15
+ gr_libs/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
+ gr_libs/metrics/metrics.py,sha256=IYPL90tuxbTRaNLaFvgzd5SMUwS8gsSANuWZcSVuKkM,8737
17
+ gr_libs/ml/__init__.py,sha256=jrjxYqvSRgWwFWw7XQP9DzOwvmprMZ2umwT7t-DYtDU,233
18
+ gr_libs/ml/agent.py,sha256=DSnK8nRx9SS76fAOZZEEvA68_meLjzm9lfQpMUXmGQU,1957
19
+ gr_libs/ml/consts.py,sha256=mrbZk8n6QoGzLGaKmaxq4QlAsBbk4fhkCgXLuO9jXKw,365
20
+ gr_libs/ml/base/__init__.py,sha256=MfIYhl_UqH8H7YoTCih8wBFA_gpTOUFq8Ph0_Nq0XQk,68
21
+ gr_libs/ml/base/rl_agent.py,sha256=u9rnb-ma9iDM5b_BlwjcTJGSFezIGrxXINw6b-Dbl8s,1598
22
+ gr_libs/ml/neural/__init__.py,sha256=g-0D5oFX8W52To4OR8vO8kDoBLSxAupVqwcQw8XjT5E,180
23
+ gr_libs/ml/neural/deep_rl_learner.py,sha256=_d6LdbMPqN4qJlOI_UqSD7o0yzIa7EjRONdFSVYO_Ag,20677
24
+ gr_libs/ml/neural/utils/__init__.py,sha256=bJgPfRnmfDQxdnb0OyRGwzgebEc1PnlO7-GpqszPBcc,106
25
+ gr_libs/ml/neural/utils/dictlist.py,sha256=WpHfdWpVZ_T3PcSnOQUC--ro_tsS0dvam2WG3LcsHDw,1039
26
+ gr_libs/ml/neural/utils/penv.py,sha256=R1uW8sePQqvTlJjpAuMx16eDU6TuGAjQF3hTR1QasMo,1862
27
+ gr_libs/ml/planner/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
+ gr_libs/ml/planner/mcts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
+ gr_libs/ml/planner/mcts/mcts_model.py,sha256=N4B2SRWAySW7sJ1JIIkKHbzpxMYo2GcuaSB-eauJmBg,21068
30
+ gr_libs/ml/planner/mcts/utils/__init__.py,sha256=8OE_XolCHiWIZZwS23lqLaLd72tsHwO8cQRRToTC0Lk,45
31
+ gr_libs/ml/planner/mcts/utils/node.py,sha256=WXXaEjfU857yIBF8gKVjr0ZGmU2Du9s1d-dBcA4QS10,1220
32
+ gr_libs/ml/planner/mcts/utils/tree.py,sha256=mLtLtPoqoU0eauNEExY94px5mdbmH-HCsYAYQDZqioI,3382
33
+ gr_libs/ml/sequential/__init__.py,sha256=rusN4ahTvAeAq1Saz6qS_9HEU7WuXDJu2zwhc9WUEYQ,61
34
+ gr_libs/ml/sequential/lstm_model.py,sha256=Vzm-C1URR84PGNEecj69GUtn3ZmOVyh1BAY6CUnfL1Q,8978
35
+ gr_libs/ml/tabular/__init__.py,sha256=jAfjfTFZLLlVm1KUiJdxdnaNGFp1J2KBU89q_vvradM,177
36
+ gr_libs/ml/tabular/state.py,sha256=8xroKF3y3nRX0LK1QX5fRT2PS2WmvcDPp0UvPFdSx2A,733
37
+ gr_libs/ml/tabular/tabular_q_learner.py,sha256=q6Dz4RTX0GjBumUiS2mUFKvEiKUBecj0q1RpWvPvmmE,18972
38
+ gr_libs/ml/tabular/tabular_rl_agent.py,sha256=7w8PYbKi8QgxHJyECWU_rURtT89spg0tHIMI1cDwYc8,3764
39
+ gr_libs/ml/utils/__init__.py,sha256=qH3pcnem5Z6rkQ4RTZi47AXJRe1RkFEST_-DrBmfWcM,258
40
+ gr_libs/ml/utils/env.py,sha256=AWVN0OXYmFU-J3FUiwvEAIY93Suf1oL6VNcxtyWJraM,171
41
+ gr_libs/ml/utils/format.py,sha256=nu7RzVwn_raG_fqqmnqlJgUjtA0yzKztkB3a5QZnRYo,3071
42
+ gr_libs/ml/utils/math.py,sha256=n62zssVOLHnUb4dPofAoFhoLOKl5n_xBzaKQOUQBoNc,440
43
+ gr_libs/ml/utils/other.py,sha256=HKUfeLBbd4DgJxSTs3ya9KQ85Acx4TjycRrtGD9WQ3s,505
44
+ gr_libs/ml/utils/storage.py,sha256=oCdvL_ypCglnSJsyyXzNyV_UJASTfioa3yJhFlFso64,4277
45
+ gr_libs/recognizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
+ gr_libs/recognizer/recognizer.py,sha256=ysJYOGe5OlERMAeMwclKpwqw2tQvbSvGnLZrq4qP0xk,1895
47
+ gr_libs/recognizer/gr_as_rl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
+ gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py,sha256=84GdfohC2dZoNH_QEo7GpSt8nZWdfqSRKCTY99X_iME,5215
49
+ gr_libs/recognizer/graml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
+ gr_libs/recognizer/graml/gr_dataset.py,sha256=lG6m3ulxFELpH1oURnlcmNDWOrxyuzvlAR28ZTqB7L8,7224
51
+ gr_libs/recognizer/graml/graml_recognizer.py,sha256=OblODjvWSi8KZ5ByvGyL5Mm1GY3IZb8yspfk81Dbebg,15721
52
+ gr_libs/recognizer/utils/__init__.py,sha256=ewSroxL7aATvvm-Xzc1_-61mP2LU2U28YaOEqvVVDB0,41
53
+ gr_libs/recognizer/utils/format.py,sha256=e0AnqtPeYoJsV9Z7cEBpgbzTM0hLNxFIjn07fQ3YbQw,492
54
+ tutorials/graml_minigrid_tutorial.py,sha256=iLs7mOYNAZ5wtxtSwiE8tvbLMIueQ5VmVmDnBBjWG_4,1589
55
+ tutorials/graml_panda_tutorial.py,sha256=DuHVDLe49qwgWouLxwalqdT1P4dlNOOMdgDc3ocNX5Y,1820
56
+ tutorials/graml_parking_tutorial.py,sha256=sQ496DNuAo9GZ_0iUZ_6Hqe5zFxIYZ_pBIHQscQvR4o,2501
57
+ tutorials/graml_point_maze_tutorial.py,sha256=TnLT9FdDj6AF8lm0lDIZum4ouPE5rye4RBH8z4Exj2Y,2713
58
+ tutorials/graql_minigrid_tutorial.py,sha256=VoXbEgL_hjQLfau6WohXxPK8rrv1VLA874F8PZ7ZtPk,1421
59
+ gr_libs-0.1.3.dist-info/METADATA,sha256=Jgcr7b3qVESwqOa_odcgVjqMQK4OF0doW2if37Uwe3g,10484
60
+ gr_libs-0.1.3.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
61
+ gr_libs-0.1.3.dist-info/top_level.txt,sha256=k7_l789QSJEr9JrtvsRMxNoTIDwNduq8mhIN-YoPJUM,29
62
+ gr_libs-0.1.3.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (77.0.3)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,3 @@
1
+ evaluation
2
+ gr_libs
3
+ tutorials
@@ -0,0 +1,30 @@
1
+ from gr_libs.environment.environment import QLEARNING
2
+ from gr_libs.metrics.metrics import stochastic_amplified_selection
3
+ from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
4
+ from gr_libs.ml.utils.format import random_subset_with_order
5
+ from gr_libs import ExpertBasedGraml
6
+
7
+ recognizer = ExpertBasedGraml(
8
+ domain_name="minigrid",
9
+ env_name="MiniGrid-SimpleCrossingS13N4"
10
+ )
11
+
12
+ recognizer.domain_learning_phase(base_goals=[(11,1), (11,11), (1,11), (7,11), (8,1), (10,6), (6,9), (11,3), (11,5)],
13
+ train_configs=[(QLEARNING, 100000) for _ in range(9)])
14
+
15
+ recognizer.goals_adaptation_phase(
16
+ dynamic_goals = [(11,1), (11,11), (1,11)],
17
+ dynamic_train_configs=[(QLEARNING, 100000) for _ in range(3)] # for expert sequence generation.
18
+ )
19
+ # TD3 is different from recognizer and expert algorithms, which are SAC #
20
+ actor = TabularQLearner(domain_name="minigrid", problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0", algorithm=QLEARNING, num_timesteps=100000)
21
+ actor.learn()
22
+ # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
23
+ full_sequence = actor.generate_observation(
24
+ action_selection_method=stochastic_amplified_selection,
25
+ random_optimalism=True, # the noise that's added to the actions
26
+ )
27
+
28
+ partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
29
+ closest_goal = recognizer.inference_phase(partial_sequence, (11,1), 0.5)
30
+ print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (11, 1)")