gr-libs 0.1.6.post1__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. evaluation/analyze_results_cross_alg_cross_domain.py +236 -246
  2. evaluation/create_minigrid_map_image.py +10 -6
  3. evaluation/file_system.py +16 -5
  4. evaluation/generate_experiments_results.py +123 -74
  5. evaluation/generate_experiments_results_new_ver1.py +227 -243
  6. evaluation/generate_experiments_results_new_ver2.py +317 -317
  7. evaluation/generate_task_specific_statistics_plots.py +481 -253
  8. evaluation/get_plans_images.py +41 -26
  9. evaluation/increasing_and_decreasing_.py +97 -56
  10. gr_libs/__init__.py +6 -1
  11. gr_libs/_version.py +2 -2
  12. gr_libs/environment/__init__.py +17 -9
  13. gr_libs/environment/environment.py +167 -39
  14. gr_libs/environment/utils/utils.py +22 -12
  15. gr_libs/metrics/__init__.py +5 -0
  16. gr_libs/metrics/metrics.py +76 -34
  17. gr_libs/ml/__init__.py +2 -0
  18. gr_libs/ml/agent.py +21 -6
  19. gr_libs/ml/base/__init__.py +1 -1
  20. gr_libs/ml/base/rl_agent.py +13 -10
  21. gr_libs/ml/consts.py +1 -1
  22. gr_libs/ml/neural/deep_rl_learner.py +433 -352
  23. gr_libs/ml/neural/utils/__init__.py +1 -1
  24. gr_libs/ml/neural/utils/dictlist.py +3 -3
  25. gr_libs/ml/neural/utils/penv.py +5 -2
  26. gr_libs/ml/planner/mcts/mcts_model.py +524 -302
  27. gr_libs/ml/planner/mcts/utils/__init__.py +1 -1
  28. gr_libs/ml/planner/mcts/utils/node.py +11 -7
  29. gr_libs/ml/planner/mcts/utils/tree.py +14 -10
  30. gr_libs/ml/sequential/__init__.py +1 -1
  31. gr_libs/ml/sequential/lstm_model.py +256 -175
  32. gr_libs/ml/tabular/state.py +7 -7
  33. gr_libs/ml/tabular/tabular_q_learner.py +123 -73
  34. gr_libs/ml/tabular/tabular_rl_agent.py +20 -19
  35. gr_libs/ml/utils/__init__.py +8 -2
  36. gr_libs/ml/utils/format.py +78 -70
  37. gr_libs/ml/utils/math.py +2 -1
  38. gr_libs/ml/utils/other.py +1 -1
  39. gr_libs/ml/utils/storage.py +95 -28
  40. gr_libs/problems/consts.py +1549 -1227
  41. gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +145 -80
  42. gr_libs/recognizer/graml/gr_dataset.py +209 -110
  43. gr_libs/recognizer/graml/graml_recognizer.py +431 -231
  44. gr_libs/recognizer/recognizer.py +38 -27
  45. gr_libs/recognizer/utils/__init__.py +1 -1
  46. gr_libs/recognizer/utils/format.py +8 -3
  47. {gr_libs-0.1.6.post1.dist-info → gr_libs-0.1.8.dist-info}/METADATA +1 -1
  48. gr_libs-0.1.8.dist-info/RECORD +70 -0
  49. {gr_libs-0.1.6.post1.dist-info → gr_libs-0.1.8.dist-info}/WHEEL +1 -1
  50. {gr_libs-0.1.6.post1.dist-info → gr_libs-0.1.8.dist-info}/top_level.txt +0 -1
  51. tests/test_gcdraco.py +10 -0
  52. tests/test_graml.py +8 -4
  53. tests/test_graql.py +2 -1
  54. tutorials/gcdraco_panda_tutorial.py +66 -0
  55. tutorials/gcdraco_parking_tutorial.py +61 -0
  56. tutorials/graml_minigrid_tutorial.py +42 -12
  57. tutorials/graml_panda_tutorial.py +35 -14
  58. tutorials/graml_parking_tutorial.py +37 -19
  59. tutorials/graml_point_maze_tutorial.py +33 -13
  60. tutorials/graql_minigrid_tutorial.py +31 -15
  61. CI/README.md +0 -12
  62. CI/docker_build_context/Dockerfile +0 -15
  63. gr_libs/recognizer/recognizer_doc.md +0 -61
  64. gr_libs-0.1.6.post1.dist-info/RECORD +0 -70
@@ -16,250 +16,450 @@ from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
16
16
  from gr_libs.recognizer.graml.gr_dataset import GRDataset, generate_datasets
17
17
  from gr_libs.ml.sequential.lstm_model import LstmObservations, train_metric_model
18
18
  from gr_libs.ml.utils.format import random_subset_with_order
19
- from gr_libs.ml.utils.storage import get_and_create, get_lstm_model_dir, get_embeddings_result_path, get_policy_sequences_result_path
19
+ from gr_libs.ml.utils.storage import (
20
+ get_and_create,
21
+ get_lstm_model_dir,
22
+ get_embeddings_result_path,
23
+ get_policy_sequences_result_path,
24
+ )
20
25
  from gr_libs.metrics import metrics
21
- from gr_libs.recognizer.recognizer import GaAdaptingRecognizer, GaAgentTrainerRecognizer, LearningRecognizer, Recognizer # import first, very dependent
26
+ from gr_libs.recognizer.recognizer import (
27
+ GaAdaptingRecognizer,
28
+ GaAgentTrainerRecognizer,
29
+ LearningRecognizer,
30
+ Recognizer,
31
+ ) # import first, very dependent
22
32
 
23
33
  ### TODO IMPLEMENT MORE SELECTION METHODS, MAKE SURE action_probs IS AS IT SEEMS: list of action-probability 'es ###
24
34
 
35
+
25
36
  def collate_fn(batch):
26
- first_traces, second_traces, is_same_goals = zip(*batch)
27
- # torch.stack takes tensor tuples (fixed size) and stacks them up in a matrix
28
- first_traces_padded = pad_sequence([torch.stack(sequence) for sequence in first_traces], batch_first=True)
29
- second_traces_padded = pad_sequence([torch.stack(sequence) for sequence in second_traces], batch_first=True)
30
- first_traces_lengths = [len(trace) for trace in first_traces]
31
- second_traces_lengths = [len(trace) for trace in second_traces]
32
- return first_traces_padded.to(utils.device), second_traces_padded.to(utils.device), torch.stack(is_same_goals).to(utils.device), first_traces_lengths, second_traces_lengths
33
-
34
- def load_weights(loaded_model : LstmObservations, path):
35
- # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
36
- loaded_model.load_state_dict(torch.load(path, map_location=utils.device))
37
- loaded_model.to(utils.device) # Ensure model is on the right device
38
- return loaded_model
39
-
40
- def save_weights(model : LstmObservations, path):
41
- directory = os.path.dirname(path)
42
- if not os.path.exists(directory):
43
- os.makedirs(directory)
44
- torch.save(model.state_dict(), path)
37
+ first_traces, second_traces, is_same_goals = zip(*batch)
38
+ # torch.stack takes tensor tuples (fixed size) and stacks them up in a matrix
39
+ first_traces_padded = pad_sequence(
40
+ [torch.stack(sequence) for sequence in first_traces], batch_first=True
41
+ )
42
+ second_traces_padded = pad_sequence(
43
+ [torch.stack(sequence) for sequence in second_traces], batch_first=True
44
+ )
45
+ first_traces_lengths = [len(trace) for trace in first_traces]
46
+ second_traces_lengths = [len(trace) for trace in second_traces]
47
+ return (
48
+ first_traces_padded.to(utils.device),
49
+ second_traces_padded.to(utils.device),
50
+ torch.stack(is_same_goals).to(utils.device),
51
+ first_traces_lengths,
52
+ second_traces_lengths,
53
+ )
54
+
55
+
56
+ def load_weights(loaded_model: LstmObservations, path):
57
+ # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
58
+ loaded_model.load_state_dict(torch.load(path, map_location=utils.device))
59
+ loaded_model.to(utils.device) # Ensure model is on the right device
60
+ return loaded_model
61
+
62
+
63
+ def save_weights(model: LstmObservations, path):
64
+ directory = os.path.dirname(path)
65
+ if not os.path.exists(directory):
66
+ os.makedirs(directory)
67
+ torch.save(model.state_dict(), path)
68
+
45
69
 
46
70
  class Graml(LearningRecognizer):
47
- def __init__(self, *args, **kwargs):
48
- super().__init__(*args, **kwargs)
49
- self.agents: List[ContextualAgent] = []
50
- self.train_func = train_metric_model; self.collate_func = collate_fn
51
-
52
- @abstractmethod
53
- def train_agents_on_base_goals(self, base_goals: List[str], train_configs: List):
54
- pass
55
-
56
- def domain_learning_phase(self, base_goals: List[str], train_configs: List):
57
- super().domain_learning_phase(base_goals, train_configs)
58
- self.train_agents_on_base_goals(base_goals, train_configs)
59
- # train the network so it will find a metric for the observations of the base agents such that traces of agents to different goals are far from one another
60
- self.model_directory = get_lstm_model_dir(domain_name=self.env_prop.domain_name, env_name=self.env_prop.name , model_name=self.env_prop.problem_list_to_str_tuple(self.original_problems), recognizer=self.__class__.__name__)
61
- last_path = r"lstm_model.pth"
62
- self.model_file_path = os.path.join(self.model_directory, last_path)
63
- self.model = LstmObservations(input_size=self.env_prop.get_lstm_props().input_size, hidden_size=self.env_prop.get_lstm_props().hidden_size)
64
- self.model.to(utils.device)
65
-
66
- if os.path.exists(self.model_file_path):
67
- print(f"Loading pre-existing lstm model in {self.model_file_path}")
68
- load_weights(loaded_model=self.model, path=self.model_file_path)
69
- else:
70
- print(f"{self.model_file_path} doesn't exist, training the model")
71
- train_samples, dev_samples = generate_datasets(num_samples=self.env_prop.get_lstm_props().num_samples,
72
- agents=self.agents,
73
- observation_creation_method=metrics.stochastic_amplified_selection,
74
- problems=self.original_problems,
75
- env_prop=self.env_prop,
76
- gc_goal_set=self.gc_goal_set if hasattr(self, 'gc_goal_set') else None,
77
- recognizer_name=self.__class__.__name__)
78
-
79
- train_dataset = GRDataset(len(train_samples), train_samples)
80
- dev_dataset = GRDataset(len(dev_samples), dev_samples)
81
- self.train_func(self.model, train_loader=DataLoader(train_dataset, batch_size=self.env_prop.get_lstm_props().batch_size, shuffle=False, collate_fn=self.collate_func),
82
- dev_loader=DataLoader(dev_dataset, batch_size=self.env_prop.get_lstm_props().batch_size, shuffle=False, collate_fn=self.collate_func))
83
- save_weights(model=self.model, path=self.model_file_path)
84
-
85
- def goals_adaptation_phase(self, dynamic_goals: List[EnvProperty]):
86
- self.is_first_inf_since_new_goals = True
87
- self.current_goals = dynamic_goals
88
- # start by training each rl agent on the base goal set
89
- self.embeddings_dict = {} # relevant if the embedding of the plan occurs during the goals adaptation phase
90
- self.plans_dict = {} # relevant if the embedding of the plan occurs during the inference phase
91
- for goal in self.current_goals:
92
- obss = self.generate_sequences_library(goal)
93
- self.plans_dict[str(goal)] = obss
94
-
95
- def get_goal_plan(self, goal):
96
- assert self.plans_dict, "plans_dict wasn't created during goals_adaptation_phase and now inference phase can't return the plans. when inference_same_length, keep the plans and not their embeddings during goals_adaptation_phase."
97
- return self.plans_dict[goal]
98
-
99
- def dump_plans(self, true_sequence, true_goal, percentage):
100
- assert self.plans_dict, "plans_dict wasn't created during goals_adaptation_phase and now inference phase can't return the plans. when inference_same_length, keep the plans and not their embeddings during goals_adaptation_phase."
101
- # Arrange storage
102
- embeddings_path = get_and_create(get_embeddings_result_path(domain_name=self.env_prop.domain_name, env_name=self.env_prop.name, recognizer=self.__class__.__name__))
103
- self.plans_dict[f"{true_goal}_true"] = true_sequence
104
-
105
- with open(embeddings_path + f'/{true_goal}_{percentage}_plans_dict.pkl', 'wb') as plans_file:
106
- to_dump = {}
107
- for goal, obss in self.plans_dict.items():
108
- if goal == f"{true_goal}_true":
109
- to_dump[goal] = self.agents[0].agent.simplify_observation(obss)
110
- else:
111
- to_dump[goal] = []
112
- for obs in obss:
113
- addition = self.agents[0].agent.simplify_observation(obs) if self.is_first_inf_since_new_goals else obs
114
- to_dump[goal].append(addition)
115
- dill.dump(to_dump, plans_file)
116
- self.plans_dict.pop(f"{true_goal}_true")
117
-
118
- def create_embeddings_dict(self):
119
- for goal, obss in self.plans_dict.items():
120
- self.embeddings_dict[goal] = []
121
- for (cons_seq, non_cons_seq) in obss:
122
- self.embeddings_dict[goal].append((self.model.embed_sequence(cons_seq), self.model.embed_sequence(non_cons_seq)))
123
-
124
- def inference_phase(self, inf_sequence, true_goal, percentage) -> str:
125
- embeddings_path = get_and_create(get_embeddings_result_path(domain_name=self.env_prop.domain_name, env_name=self.env_prop.name, recognizer=self.__class__.__name__))
126
- simplified_inf_sequence = self.agents[0].agent.simplify_observation(inf_sequence)
127
- new_embedding = self.model.embed_sequence(simplified_inf_sequence)
128
- assert self.plans_dict, "plans_dict wasn't created during goals_adaptation_phase and now inference phase can't embed the plans. when inference_same_length, keep the plans and not their embeddings during goals_adaptation_phase."
129
- if self.is_first_inf_since_new_goals:
130
- self.is_first_inf_since_new_goals = False
131
- self.update_sequences_library_inference_phase(inf_sequence)
132
- self.create_embeddings_dict()
133
-
134
- closest_goal, greatest_similarity = None, 0
135
- for (goal, embeddings) in self.embeddings_dict.items():
136
- sum_curr_similarities = 0
137
- for cons_embedding, non_cons_embedding in embeddings:
138
- sum_curr_similarities += max(torch.exp(-torch.sum(torch.abs(cons_embedding-new_embedding))), torch.exp(-torch.sum(torch.abs(non_cons_embedding-new_embedding))))
139
- mean_similarity = sum_curr_similarities/len(embeddings)
140
- if mean_similarity > greatest_similarity:
141
- closest_goal = goal
142
- greatest_similarity = mean_similarity
143
-
144
- self.embeddings_dict[f"{true_goal}_true"] = new_embedding
145
- if self.collect_statistics:
146
- with open(os.path.join(embeddings_path, f'{true_goal}_{percentage}_embeddings_dict.pkl'), 'wb') as embeddings_file:
147
- dill.dump(self.embeddings_dict, embeddings_file)
148
- self.embeddings_dict.pop(f"{true_goal}_true")
149
-
150
- return closest_goal
151
-
152
- @abstractmethod
153
- def generate_sequences_library(self, goal: str) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
154
- pass
155
-
156
- # this function duplicates every sequence and creates a consecutive and non-consecutive version of it
157
- def update_sequences_library_inference_phase(self, inf_sequence) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
158
- new_plans_dict = {}
159
- for goal, obss in self.plans_dict.items():
160
- new_obss = []
161
- for obs in obss:
162
- consecutive_partial_obs = random_subset_with_order(obs, len(inf_sequence), is_consecutive=True)
163
- non_consecutive_partial_obs = random_subset_with_order(obs, len(inf_sequence), is_consecutive=False)
164
- simplified_consecutive_partial_obs = self.agents[0].agent.simplify_observation(consecutive_partial_obs)
165
- simplified_non_consecutive_partial_obs = self.agents[0].agent.simplify_observation(non_consecutive_partial_obs)
166
- new_obss.append((simplified_consecutive_partial_obs, simplified_non_consecutive_partial_obs))
167
- new_plans_dict[goal] = new_obss # override old full observations with new partial observations with consecutive and non-consecutive versions.
168
- self.plans_dict = new_plans_dict
71
+ def __init__(self, *args, **kwargs):
72
+ super().__init__(*args, **kwargs)
73
+ self.agents: List[ContextualAgent] = []
74
+ self.train_func = train_metric_model
75
+ self.collate_func = collate_fn
76
+
77
+ @abstractmethod
78
+ def train_agents_on_base_goals(self, base_goals: List[str], train_configs: List):
79
+ pass
80
+
81
+ def domain_learning_phase(self, base_goals: List[str], train_configs: List):
82
+ super().domain_learning_phase(base_goals, train_configs)
83
+ self.train_agents_on_base_goals(base_goals, train_configs)
84
+ # train the network so it will find a metric for the observations of the base agents such that traces of agents to different goals are far from one another
85
+ self.model_directory = get_lstm_model_dir(
86
+ domain_name=self.env_prop.domain_name,
87
+ env_name=self.env_prop.name,
88
+ model_name=self.env_prop.problem_list_to_str_tuple(self.original_problems),
89
+ recognizer=self.__class__.__name__,
90
+ )
91
+ last_path = r"lstm_model.pth"
92
+ self.model_file_path = os.path.join(self.model_directory, last_path)
93
+ self.model = LstmObservations(
94
+ input_size=self.env_prop.get_lstm_props().input_size,
95
+ hidden_size=self.env_prop.get_lstm_props().hidden_size,
96
+ )
97
+ self.model.to(utils.device)
98
+
99
+ if os.path.exists(self.model_file_path):
100
+ print(f"Loading pre-existing lstm model in {self.model_file_path}")
101
+ load_weights(loaded_model=self.model, path=self.model_file_path)
102
+ else:
103
+ print(f"{self.model_file_path} doesn't exist, training the model")
104
+ train_samples, dev_samples = generate_datasets(
105
+ num_samples=self.env_prop.get_lstm_props().num_samples,
106
+ agents=self.agents,
107
+ observation_creation_method=metrics.stochastic_amplified_selection,
108
+ problems=self.original_problems,
109
+ env_prop=self.env_prop,
110
+ gc_goal_set=self.gc_goal_set if hasattr(self, "gc_goal_set") else None,
111
+ recognizer_name=self.__class__.__name__,
112
+ )
113
+
114
+ train_dataset = GRDataset(len(train_samples), train_samples)
115
+ dev_dataset = GRDataset(len(dev_samples), dev_samples)
116
+ self.train_func(
117
+ self.model,
118
+ train_loader=DataLoader(
119
+ train_dataset,
120
+ batch_size=self.env_prop.get_lstm_props().batch_size,
121
+ shuffle=False,
122
+ collate_fn=self.collate_func,
123
+ ),
124
+ dev_loader=DataLoader(
125
+ dev_dataset,
126
+ batch_size=self.env_prop.get_lstm_props().batch_size,
127
+ shuffle=False,
128
+ collate_fn=self.collate_func,
129
+ ),
130
+ )
131
+ save_weights(model=self.model, path=self.model_file_path)
132
+
133
+ def goals_adaptation_phase(self, dynamic_goals: List[EnvProperty], save_fig=False):
134
+ self.is_first_inf_since_new_goals = True
135
+ self.current_goals = dynamic_goals
136
+ # start by training each rl agent on the base goal set
137
+ self.embeddings_dict = (
138
+ {}
139
+ ) # relevant if the embedding of the plan occurs during the goals adaptation phase
140
+ self.plans_dict = (
141
+ {}
142
+ ) # relevant if the embedding of the plan occurs during the inference phase
143
+ for goal in self.current_goals:
144
+ obss = self.generate_sequences_library(goal, save_fig=save_fig)
145
+ self.plans_dict[str(goal)] = obss
146
+
147
+ def get_goal_plan(self, goal):
148
+ assert (
149
+ self.plans_dict
150
+ ), "plans_dict wasn't created during goals_adaptation_phase and now inference phase can't return the plans. when inference_same_length, keep the plans and not their embeddings during goals_adaptation_phase."
151
+ return self.plans_dict[goal]
152
+
153
+ def dump_plans(self, true_sequence, true_goal, percentage):
154
+ assert (
155
+ self.plans_dict
156
+ ), "plans_dict wasn't created during goals_adaptation_phase and now inference phase can't return the plans. when inference_same_length, keep the plans and not their embeddings during goals_adaptation_phase."
157
+ # Arrange storage
158
+ embeddings_path = get_and_create(
159
+ get_embeddings_result_path(
160
+ domain_name=self.env_prop.domain_name,
161
+ env_name=self.env_prop.name,
162
+ recognizer=self.__class__.__name__,
163
+ )
164
+ )
165
+ self.plans_dict[f"{true_goal}_true"] = true_sequence
166
+
167
+ with open(
168
+ embeddings_path + f"/{true_goal}_{percentage}_plans_dict.pkl", "wb"
169
+ ) as plans_file:
170
+ to_dump = {}
171
+ for goal, obss in self.plans_dict.items():
172
+ if goal == f"{true_goal}_true":
173
+ to_dump[goal] = self.agents[0].agent.simplify_observation(obss)
174
+ else:
175
+ to_dump[goal] = []
176
+ for obs in obss:
177
+ addition = (
178
+ self.agents[0].agent.simplify_observation(obs)
179
+ if self.is_first_inf_since_new_goals
180
+ else obs
181
+ )
182
+ to_dump[goal].append(addition)
183
+ dill.dump(to_dump, plans_file)
184
+ self.plans_dict.pop(f"{true_goal}_true")
185
+
186
+ def create_embeddings_dict(self):
187
+ for goal, obss in self.plans_dict.items():
188
+ self.embeddings_dict[goal] = []
189
+ for cons_seq, non_cons_seq in obss:
190
+ self.embeddings_dict[goal].append(
191
+ (
192
+ self.model.embed_sequence(cons_seq),
193
+ self.model.embed_sequence(non_cons_seq),
194
+ )
195
+ )
196
+
197
+ def inference_phase(self, inf_sequence, true_goal, percentage) -> str:
198
+ embeddings_path = get_and_create(
199
+ get_embeddings_result_path(
200
+ domain_name=self.env_prop.domain_name,
201
+ env_name=self.env_prop.name,
202
+ recognizer=self.__class__.__name__,
203
+ )
204
+ )
205
+ simplified_inf_sequence = self.agents[0].agent.simplify_observation(
206
+ inf_sequence
207
+ )
208
+ new_embedding = self.model.embed_sequence(simplified_inf_sequence)
209
+ assert (
210
+ self.plans_dict
211
+ ), "plans_dict wasn't created during goals_adaptation_phase and now inference phase can't embed the plans. when inference_same_length, keep the plans and not their embeddings during goals_adaptation_phase."
212
+ if self.is_first_inf_since_new_goals:
213
+ self.is_first_inf_since_new_goals = False
214
+ self.update_sequences_library_inference_phase(inf_sequence)
215
+ self.create_embeddings_dict()
216
+
217
+ closest_goal, greatest_similarity = None, 0
218
+ for goal, embeddings in self.embeddings_dict.items():
219
+ sum_curr_similarities = 0
220
+ for cons_embedding, non_cons_embedding in embeddings:
221
+ sum_curr_similarities += max(
222
+ torch.exp(-torch.sum(torch.abs(cons_embedding - new_embedding))),
223
+ torch.exp(
224
+ -torch.sum(torch.abs(non_cons_embedding - new_embedding))
225
+ ),
226
+ )
227
+ mean_similarity = sum_curr_similarities / len(embeddings)
228
+ if mean_similarity > greatest_similarity:
229
+ closest_goal = goal
230
+ greatest_similarity = mean_similarity
231
+
232
+ self.embeddings_dict[f"{true_goal}_true"] = new_embedding
233
+ if self.collect_statistics:
234
+ with open(
235
+ os.path.join(
236
+ embeddings_path, f"{true_goal}_{percentage}_embeddings_dict.pkl"
237
+ ),
238
+ "wb",
239
+ ) as embeddings_file:
240
+ dill.dump(self.embeddings_dict, embeddings_file)
241
+ self.embeddings_dict.pop(f"{true_goal}_true")
242
+
243
+ return closest_goal
244
+
245
+ @abstractmethod
246
+ def generate_sequences_library(
247
+ self, goal: str, save_fig=False
248
+ ) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
249
+ pass
250
+
251
+ # this function duplicates every sequence and creates a consecutive and non-consecutive version of it
252
+ def update_sequences_library_inference_phase(
253
+ self, inf_sequence
254
+ ) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
255
+ new_plans_dict = {}
256
+ for goal, obss in self.plans_dict.items():
257
+ new_obss = []
258
+ for obs in obss:
259
+ consecutive_partial_obs = random_subset_with_order(
260
+ obs, len(inf_sequence), is_consecutive=True
261
+ )
262
+ non_consecutive_partial_obs = random_subset_with_order(
263
+ obs, len(inf_sequence), is_consecutive=False
264
+ )
265
+ simplified_consecutive_partial_obs = self.agents[
266
+ 0
267
+ ].agent.simplify_observation(consecutive_partial_obs)
268
+ simplified_non_consecutive_partial_obs = self.agents[
269
+ 0
270
+ ].agent.simplify_observation(non_consecutive_partial_obs)
271
+ new_obss.append(
272
+ (
273
+ simplified_consecutive_partial_obs,
274
+ simplified_non_consecutive_partial_obs,
275
+ )
276
+ )
277
+ new_plans_dict[goal] = (
278
+ new_obss # override old full observations with new partial observations with consecutive and non-consecutive versions.
279
+ )
280
+ self.plans_dict = new_plans_dict
281
+
169
282
 
170
283
  class BGGraml(Graml):
171
- def __init__(self, *args, **kwargs):
172
- super().__init__(*args, **kwargs)
173
-
174
- def domain_learning_phase(self, base_goals: List[str], train_configs: List):
175
- assert len(train_configs) == len(base_goals), "There should be train configs for every goal in BGGraml."
176
- return super().domain_learning_phase(base_goals, train_configs)
177
-
178
- # In case we need goal-directed agent for every goal
179
- def train_agents_on_base_goals(self, base_goals: List[str], train_configs: List):
180
- self.original_problems = [self.env_prop.goal_to_problem_str(g) for g in base_goals]
181
- # start by training each rl agent on the base goal set
182
- for (problem, goal), (algorithm, num_timesteps) in zip(zip(self.original_problems, base_goals), train_configs):
183
- kwargs = {"domain_name":self.domain_name, "problem_name":problem}
184
- if algorithm != None: kwargs["algorithm"] = algorithm
185
- if num_timesteps != None: kwargs["num_timesteps"] = num_timesteps
186
- agent = self.rl_agent_type(**kwargs)
187
- agent.learn()
188
- self.agents.append(ContextualAgent(problem_name=problem, problem_goal=goal, agent=agent))
284
+ def __init__(self, *args, **kwargs):
285
+ super().__init__(*args, **kwargs)
286
+
287
+ def domain_learning_phase(self, base_goals: List[str], train_configs: List):
288
+ assert len(train_configs) == len(
289
+ base_goals
290
+ ), "There should be train configs for every goal in BGGraml."
291
+ return super().domain_learning_phase(base_goals, train_configs)
292
+
293
+ # In case we need goal-directed agent for every goal
294
+ def train_agents_on_base_goals(self, base_goals: List[str], train_configs: List):
295
+ self.original_problems = [
296
+ self.env_prop.goal_to_problem_str(g) for g in base_goals
297
+ ]
298
+ # start by training each rl agent on the base goal set
299
+ for (problem, goal), (algorithm, num_timesteps) in zip(
300
+ zip(self.original_problems, base_goals), train_configs
301
+ ):
302
+ kwargs = {
303
+ "domain_name": self.domain_name,
304
+ "problem_name": problem,
305
+ "env_prop": self.env_prop,
306
+ }
307
+ if algorithm != None:
308
+ kwargs["algorithm"] = algorithm
309
+ if num_timesteps != None:
310
+ kwargs["num_timesteps"] = num_timesteps
311
+ agent = self.rl_agent_type(**kwargs)
312
+ agent.learn()
313
+ self.agents.append(
314
+ ContextualAgent(problem_name=problem, problem_goal=goal, agent=agent)
315
+ )
316
+
189
317
 
190
318
  class MCTSBasedGraml(BGGraml, GaAdaptingRecognizer):
191
- def __init__(self, *args, **kwargs):
192
- super().__init__(*args, **kwargs)
193
- if self.rl_agent_type==None: self.rl_agent_type = TabularQLearner
319
+ def __init__(self, *args, **kwargs):
320
+ super().__init__(*args, **kwargs)
321
+ if self.rl_agent_type == None:
322
+ self.rl_agent_type = TabularQLearner
323
+
324
+ def generate_sequences_library(
325
+ self, goal: str, save_fig=False
326
+ ) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
327
+ problem_name = self.env_prop.goal_to_problem_str(goal)
328
+ img_path = os.path.join(
329
+ get_policy_sequences_result_path(
330
+ self.env_prop.domain_name, recognizer=self.__class__.__name__
331
+ ),
332
+ problem_name + "_MCTS",
333
+ )
334
+ return mcts_model.plan(
335
+ self.env_prop.name,
336
+ problem_name,
337
+ goal,
338
+ save_fig=save_fig,
339
+ img_path=img_path,
340
+ env_prop=self.env_prop,
341
+ )
194
342
 
195
- def generate_sequences_library(self, goal: str) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
196
- problem_name = self.env_prop.goal_to_problem_str(goal)
197
- img_path = os.path.join(get_policy_sequences_result_path(self.env_prop.domain_name, recognizer=self.__class__.__name__), problem_name + "_MCTS")
198
- return mcts_model.plan(self.env_prop.name, problem_name, goal, save_fig=True, img_path=img_path, env_prop=self.env_prop)
199
343
 
200
344
  class ExpertBasedGraml(BGGraml, GaAgentTrainerRecognizer):
201
- def __init__(self, *args, **kwargs):
202
- super().__init__(*args, **kwargs)
203
- if self.rl_agent_type==None:
204
- if self.env_prop.is_state_discrete() and self.env_prop.is_action_discrete():
205
- self.rl_agent_type = TabularQLearner
206
- else:
207
- self.rl_agent_type = DeepRLAgent
208
-
209
- def generate_sequences_library(self, goal: str) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
210
- problem_name = self.env_prop.goal_to_problem_str(goal)
211
- kwargs = {"domain_name":self.domain_name, "problem_name":problem_name}
212
- if self.dynamic_train_configs_dict[problem_name][0] != None: kwargs["algorithm"] = self.dynamic_train_configs_dict[problem_name][0]
213
- if self.dynamic_train_configs_dict[problem_name][1] != None: kwargs["num_timesteps"] = self.dynamic_train_configs_dict[problem_name][1]
214
- agent = self.rl_agent_type(**kwargs)
215
- agent.learn()
216
- fig_path = get_and_create(f"{os.path.abspath(os.path.join(get_policy_sequences_result_path(domain_name=self.env_prop.domain_name, env_name=self.env_prop.name, recognizer=self.__class__.__name__), problem_name))}_bg_sequence")
217
- return [agent.generate_observation(action_selection_method=metrics.greedy_selection, random_optimalism=False, save_fig=True, fig_path=fig_path, env_prop=self.env_prop)]
218
-
219
- def goals_adaptation_phase(self, dynamic_goals: List[str], dynamic_train_configs):
220
- self.dynamic_goals_problems = [self.env_prop.goal_to_problem_str(g) for g in dynamic_goals]
221
- self.dynamic_train_configs_dict = {problem:config for problem, config in zip(self.dynamic_goals_problems,dynamic_train_configs)}
222
- return super().goals_adaptation_phase(dynamic_goals)
345
+ def __init__(self, *args, **kwargs):
346
+ super().__init__(*args, **kwargs)
347
+ if self.rl_agent_type == None:
348
+ if self.env_prop.is_state_discrete() and self.env_prop.is_action_discrete():
349
+ self.rl_agent_type = TabularQLearner
350
+ else:
351
+ self.rl_agent_type = DeepRLAgent
352
+
353
+ def generate_sequences_library(
354
+ self, goal: str, save_fig=False
355
+ ) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
356
+ problem_name = self.env_prop.goal_to_problem_str(goal)
357
+ kwargs = {
358
+ "domain_name": self.domain_name,
359
+ "problem_name": problem_name,
360
+ "env_prop": self.env_prop,
361
+ }
362
+ if self.dynamic_train_configs_dict[problem_name][0] != None:
363
+ kwargs["algorithm"] = self.dynamic_train_configs_dict[problem_name][0]
364
+ if self.dynamic_train_configs_dict[problem_name][1] != None:
365
+ kwargs["num_timesteps"] = self.dynamic_train_configs_dict[problem_name][1]
366
+ agent = self.rl_agent_type(**kwargs)
367
+ agent.learn()
368
+ agent_kwargs = {
369
+ "action_selection_method": metrics.greedy_selection,
370
+ "random_optimalism": False,
371
+ "save_fig": save_fig,
372
+ }
373
+ if save_fig:
374
+ fig_path = get_and_create(
375
+ f"{os.path.abspath(os.path.join(get_policy_sequences_result_path(domain_name=self.env_prop.domain_name, env_name=self.env_prop.name, recognizer=self.__class__.__name__), problem_name))}_bg_sequence"
376
+ )
377
+ agent_kwargs["fig_path"] = fig_path
378
+ return [agent.generate_observation(**agent_kwargs)]
379
+
380
+ def goals_adaptation_phase(self, dynamic_goals: List[str], dynamic_train_configs):
381
+ self.dynamic_goals_problems = [
382
+ self.env_prop.goal_to_problem_str(g) for g in dynamic_goals
383
+ ]
384
+ self.dynamic_train_configs_dict = {
385
+ problem: config
386
+ for problem, config in zip(
387
+ self.dynamic_goals_problems, dynamic_train_configs
388
+ )
389
+ }
390
+ return super().goals_adaptation_phase(dynamic_goals)
391
+
223
392
 
224
393
  class GCGraml(Graml, GaAdaptingRecognizer):
225
- def __init__(self, *args, **kwargs):
226
- super().__init__(*args, **kwargs)
227
- if self.rl_agent_type==None: self.rl_agent_type = GCDeepRLAgent
228
- assert self.env_prop.gc_adaptable() and not self.env_prop.is_state_discrete() and not self.env_prop.is_action_discrete()
229
-
230
- def domain_learning_phase(self, base_goals: List[str], train_configs: List):
231
- assert len(train_configs) == 1, "There should be one train config for the sole gc agent in GCGraml."
232
- return super().domain_learning_phase(base_goals, train_configs)
233
-
234
- # In case we need goal-directed agent for every goal
235
- def train_agents_on_base_goals(self, base_goals: List[str], train_configs: List):
236
- self.gc_goal_set = base_goals
237
- self.original_problems = self.env_prop.name # needed for gr_dataset
238
- # start by training each rl agent on the base goal set
239
- kwargs = {"domain_name":self.domain_name, "problem_name":self.env_prop.name}
240
- algorithm, num_timesteps = train_configs[0] # should only be one, was asserted
241
- if algorithm != None: kwargs["algorithm"] = algorithm
242
- if num_timesteps != None: kwargs["num_timesteps"] = num_timesteps
243
- gc_agent = self.rl_agent_type(**kwargs)
244
- gc_agent.learn()
245
- self.agents.append(ContextualAgent(problem_name=self.env_prop.name, problem_goal="general", agent=gc_agent))
246
-
247
- def generate_sequences_library(self, goal: str) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
248
- problem_name = self.env_prop.goal_to_problem_str(goal)
249
- kwargs = {"domain_name":self.domain_name, "problem_name":self.env_prop.name} # problem name is env name in gc case
250
- if self.original_train_configs[0][0] != None: kwargs["algorithm"] = self.original_train_configs[0][0]
251
- if self.original_train_configs[0][1] != None: kwargs["num_timesteps"] = self.original_train_configs[0][1]
252
- agent = self.rl_agent_type(**kwargs)
253
- agent.learn()
254
- fig_path = get_and_create(f"{os.path.abspath(os.path.join(get_policy_sequences_result_path(domain_name=self.env_prop.domain_name, env_name=self.env_prop.name, recognizer=self.__class__.__name__), problem_name))}_gc_sequence")
255
- agent_kwargs = {
256
- "action_selection_method": metrics.stochastic_amplified_selection,
257
- "random_optimalism": True,
258
- "save_fig": True,
259
- "fig_path": fig_path
260
- }
261
- if self.env_prop.use_goal_directed_problem(): agent_kwargs["goal_directed_problem"] = problem_name
262
- else: agent_kwargs["goal_directed_goal"] = goal
263
- obss = []
264
- for _ in range(5): obss.append(agent.generate_observation(**agent_kwargs))
265
- return obss
394
+ def __init__(self, *args, **kwargs):
395
+ super().__init__(*args, **kwargs)
396
+ if self.rl_agent_type == None:
397
+ self.rl_agent_type = GCDeepRLAgent
398
+ assert (
399
+ self.env_prop.gc_adaptable()
400
+ and not self.env_prop.is_state_discrete()
401
+ and not self.env_prop.is_action_discrete()
402
+ )
403
+
404
+ def domain_learning_phase(self, base_goals: List[str], train_configs: List):
405
+ assert (
406
+ len(train_configs) == 1
407
+ ), "There should be one train config for the sole gc agent in GCGraml."
408
+ return super().domain_learning_phase(base_goals, train_configs)
409
+
410
+ # In case we need goal-directed agent for every goal
411
+ def train_agents_on_base_goals(self, base_goals: List[str], train_configs: List):
412
+ self.gc_goal_set = base_goals
413
+ self.original_problems = self.env_prop.name # needed for gr_dataset
414
+ # start by training each rl agent on the base goal set
415
+ kwargs = {
416
+ "domain_name": self.domain_name,
417
+ "problem_name": self.env_prop.name,
418
+ "env_prop": self.env_prop,
419
+ }
420
+ algorithm, num_timesteps = train_configs[0] # should only be one, was asserted
421
+ if algorithm != None:
422
+ kwargs["algorithm"] = algorithm
423
+ if num_timesteps != None:
424
+ kwargs["num_timesteps"] = num_timesteps
425
+ gc_agent = self.rl_agent_type(**kwargs)
426
+ gc_agent.learn()
427
+ self.agents.append(
428
+ ContextualAgent(
429
+ problem_name=self.env_prop.name, problem_goal="general", agent=gc_agent
430
+ )
431
+ )
432
+
433
+ def generate_sequences_library(
434
+ self, goal: str, save_fig=False
435
+ ) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
436
+ problem_name = self.env_prop.goal_to_problem_str(goal)
437
+ kwargs = {
438
+ "domain_name": self.domain_name,
439
+ "problem_name": self.env_prop.name,
440
+ "env_prop": self.env_prop,
441
+ } # problem name is env name in gc case
442
+ if self.original_train_configs[0][0] != None:
443
+ kwargs["algorithm"] = self.original_train_configs[0][0]
444
+ if self.original_train_configs[0][1] != None:
445
+ kwargs["num_timesteps"] = self.original_train_configs[0][1]
446
+ agent = self.rl_agent_type(**kwargs)
447
+ agent.learn()
448
+ agent_kwargs = {
449
+ "action_selection_method": metrics.stochastic_amplified_selection,
450
+ "random_optimalism": True,
451
+ "save_fig": save_fig,
452
+ }
453
+ if save_fig:
454
+ fig_path = get_and_create(
455
+ f"{os.path.abspath(os.path.join(get_policy_sequences_result_path(domain_name=self.env_prop.domain_name, env_name=self.env_prop.name, recognizer=self.__class__.__name__), problem_name))}_gc_sequence"
456
+ )
457
+ agent_kwargs["fig_path"] = fig_path
458
+ if self.env_prop.use_goal_directed_problem():
459
+ agent_kwargs["goal_directed_problem"] = problem_name
460
+ else:
461
+ agent_kwargs["goal_directed_goal"] = goal
462
+ obss = []
463
+ for _ in range(5):
464
+ obss.append(agent.generate_observation(**agent_kwargs))
465
+ return obss