gr-libs 0.1.8__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. gr_libs/__init__.py +3 -1
  2. gr_libs/_version.py +2 -2
  3. gr_libs/all_experiments.py +260 -0
  4. gr_libs/environment/__init__.py +14 -1
  5. gr_libs/environment/_utils/__init__.py +0 -0
  6. gr_libs/environment/{utils → _utils}/utils.py +1 -1
  7. gr_libs/environment/environment.py +278 -23
  8. gr_libs/evaluation/__init__.py +1 -0
  9. gr_libs/evaluation/generate_experiments_results.py +100 -0
  10. gr_libs/metrics/__init__.py +2 -0
  11. gr_libs/metrics/metrics.py +166 -31
  12. gr_libs/ml/__init__.py +1 -6
  13. gr_libs/ml/base/__init__.py +3 -1
  14. gr_libs/ml/base/rl_agent.py +68 -3
  15. gr_libs/ml/neural/__init__.py +1 -3
  16. gr_libs/ml/neural/deep_rl_learner.py +241 -84
  17. gr_libs/ml/neural/utils/__init__.py +1 -2
  18. gr_libs/ml/planner/mcts/{utils → _utils}/tree.py +1 -1
  19. gr_libs/ml/planner/mcts/mcts_model.py +71 -34
  20. gr_libs/ml/sequential/__init__.py +0 -1
  21. gr_libs/ml/sequential/{lstm_model.py → _lstm_model.py} +11 -14
  22. gr_libs/ml/tabular/__init__.py +1 -3
  23. gr_libs/ml/tabular/tabular_q_learner.py +27 -9
  24. gr_libs/ml/tabular/tabular_rl_agent.py +22 -9
  25. gr_libs/ml/utils/__init__.py +2 -9
  26. gr_libs/ml/utils/format.py +13 -90
  27. gr_libs/ml/utils/math.py +3 -2
  28. gr_libs/ml/utils/other.py +2 -2
  29. gr_libs/ml/utils/storage.py +41 -94
  30. gr_libs/odgr_executor.py +263 -0
  31. gr_libs/problems/consts.py +570 -292
  32. gr_libs/recognizer/{utils → _utils}/format.py +2 -2
  33. gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +127 -36
  34. gr_libs/recognizer/graml/{gr_dataset.py → _gr_dataset.py} +11 -11
  35. gr_libs/recognizer/graml/graml_recognizer.py +186 -35
  36. gr_libs/recognizer/recognizer.py +59 -10
  37. gr_libs/tutorials/draco_panda_tutorial.py +58 -0
  38. gr_libs/tutorials/draco_parking_tutorial.py +56 -0
  39. {tutorials → gr_libs/tutorials}/gcdraco_panda_tutorial.py +11 -11
  40. {tutorials → gr_libs/tutorials}/gcdraco_parking_tutorial.py +6 -8
  41. {tutorials → gr_libs/tutorials}/graml_minigrid_tutorial.py +18 -14
  42. {tutorials → gr_libs/tutorials}/graml_panda_tutorial.py +11 -12
  43. {tutorials → gr_libs/tutorials}/graml_parking_tutorial.py +8 -10
  44. {tutorials → gr_libs/tutorials}/graml_point_maze_tutorial.py +17 -3
  45. {tutorials → gr_libs/tutorials}/graql_minigrid_tutorial.py +2 -2
  46. {gr_libs-0.1.8.dist-info → gr_libs-0.2.5.dist-info}/METADATA +95 -29
  47. gr_libs-0.2.5.dist-info/RECORD +72 -0
  48. {gr_libs-0.1.8.dist-info → gr_libs-0.2.5.dist-info}/WHEEL +1 -1
  49. gr_libs-0.2.5.dist-info/top_level.txt +2 -0
  50. tests/test_draco.py +14 -0
  51. tests/test_gcdraco.py +2 -2
  52. tests/test_graml.py +4 -4
  53. tests/test_graql.py +1 -1
  54. tests/test_odgr_executor_expertbasedgraml.py +14 -0
  55. tests/test_odgr_executor_gcdraco.py +14 -0
  56. tests/test_odgr_executor_gcgraml.py +14 -0
  57. tests/test_odgr_executor_graql.py +14 -0
  58. evaluation/analyze_results_cross_alg_cross_domain.py +0 -267
  59. evaluation/create_minigrid_map_image.py +0 -38
  60. evaluation/file_system.py +0 -53
  61. evaluation/generate_experiments_results.py +0 -141
  62. evaluation/generate_experiments_results_new_ver1.py +0 -238
  63. evaluation/generate_experiments_results_new_ver2.py +0 -331
  64. evaluation/generate_task_specific_statistics_plots.py +0 -500
  65. evaluation/get_plans_images.py +0 -62
  66. evaluation/increasing_and_decreasing_.py +0 -104
  67. gr_libs/ml/neural/utils/penv.py +0 -60
  68. gr_libs-0.1.8.dist-info/RECORD +0 -70
  69. gr_libs-0.1.8.dist-info/top_level.txt +0 -4
  70. /gr_libs/{environment/utils/__init__.py → _evaluation/_generate_experiments_results.py} +0 -0
  71. /gr_libs/ml/planner/mcts/{utils → _utils}/__init__.py +0 -0
  72. /gr_libs/ml/planner/mcts/{utils → _utils}/node.py +0 -0
  73. /gr_libs/recognizer/{utils → _utils}/__init__.py +0 -0
@@ -1,9 +1,9 @@
1
+ from gr_libs.recognizer.gr_as_rl.gr_as_rl_recognizer import Draco, GCDraco, Graql
1
2
  from gr_libs.recognizer.graml.graml_recognizer import (
2
- GCGraml,
3
3
  ExpertBasedGraml,
4
+ GCGraml,
4
5
  MCTSBasedGraml,
5
6
  )
6
- from gr_libs.recognizer.gr_as_rl.gr_as_rl_recognizer import Graql, Draco, GCDraco
7
7
 
8
8
 
9
9
  def recognizer_str_to_obj(recognizer_str: str):
@@ -1,14 +1,9 @@
1
- from abc import abstractmethod
2
1
  import os
2
+
3
3
  import dill
4
- from typing import List, Type, Callable
5
4
  import numpy as np
6
- from gr_libs.environment.environment import EnvProperty, GCEnvProperty
7
- from gr_libs.environment.utils.utils import domain_to_env_property
8
- from gr_libs.metrics.metrics import (
9
- kl_divergence_norm_softmax,
10
- mean_wasserstein_distance,
11
- )
5
+
6
+ from gr_libs.metrics.metrics import kl_divergence_norm_softmax
12
7
  from gr_libs.ml.base import RLAgent
13
8
  from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
14
9
  from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
@@ -22,11 +17,40 @@ from gr_libs.recognizer.recognizer import (
22
17
 
23
18
 
24
19
  class GRAsRL(Recognizer):
20
+ """
21
+ GRAsRL class represents a goal recognition framework that using reinforcement learning.
22
+ It inherits from the Recognizer class and implements the goal recognition process, including the
23
+ Goal adaptation and the inference phase. It trains agents for each new goal, which makes it impractical
24
+ for realtime environments where goals mmight change.
25
+
26
+ Attributes:
27
+ agents (dict): A dictionary that maps problem names to RLAgent instances.
28
+ active_goals (List[str]): A list of active goals.
29
+ active_problems (List[str]): A list of active problem names.
30
+ action_space (gym.Space): The action space of the RLAgent.
31
+
32
+ Methods:
33
+ goals_adaptation_phase: Performs the goals adaptation phase.
34
+ prepare_inf_sequence: Prepares the inference sequence for goal-directed problems.
35
+ inference_phase: Performs the inference phase and returns the recognized goal.
36
+ choose_agent: Returns the RLAgent for a given problem name.
37
+ """
38
+
25
39
  def __init__(self, *args, **kwargs):
26
40
  super().__init__(*args, **kwargs)
27
41
  self.agents = {} # consider changing to ContextualAgent
28
42
 
29
- def goals_adaptation_phase(self, dynamic_goals: List[str], dynamic_train_configs):
43
+ def goals_adaptation_phase(self, dynamic_goals: list[str], dynamic_train_configs):
44
+ """
45
+ Performs the goals adaptation phase.
46
+
47
+ Args:
48
+ dynamic_goals (List[str]): A list of dynamic goals.
49
+ dynamic_train_configs: The dynamic training configurations.
50
+
51
+ Returns:
52
+ None
53
+ """
30
54
  super().goals_adaptation_phase(dynamic_goals, dynamic_train_configs)
31
55
  dynamic_goals_problems = [
32
56
  self.env_prop.goal_to_problem_str(goal) for goal in dynamic_goals
@@ -48,18 +72,46 @@ class GRAsRL(Recognizer):
48
72
  self.agents[problem_name] = agent
49
73
  self.action_space = next(iter(self.agents.values())).env.action_space
50
74
 
75
+ def prepare_inf_sequence(self, problem_name: str, inf_sequence):
76
+ """
77
+ Prepares the inference sequence for goal-directed problems.
78
+
79
+ Args:
80
+ problem_name (str): The name of the problem.
81
+ inf_sequence: The inference sequence.
82
+
83
+ Returns:
84
+ The prepared inference sequence.
85
+ """
86
+ if not self.env_prop.use_goal_directed_problem():
87
+ for obs in inf_sequence:
88
+ obs[0]["desired_goal"] = np.array(
89
+ [self.env_prop.str_to_goal(problem_name)],
90
+ dtype=obs[0]["desired_goal"].dtype,
91
+ )
92
+ return inf_sequence
93
+ return inf_sequence
94
+
51
95
  def inference_phase(self, inf_sequence, true_goal, percentage) -> str:
96
+ """
97
+ Performs the inference phase and returns the recognized goal.
98
+
99
+ Args:
100
+ inf_sequence: The inference sequence.
101
+ true_goal: The true goal.
102
+ percentage: The percentage.
103
+
104
+ Returns:
105
+ The recognized goal as a string.
106
+ """
52
107
  scores = []
53
108
  for problem_name in self.active_problems:
54
109
  agent = self.choose_agent(problem_name)
55
110
  if self.env_prop.gc_adaptable():
56
- assert (
57
- self.__class__.__name__ == "GCDraco"
58
- ), "This recognizer is not compatible with goal conditioned problems."
59
111
  inf_sequence = self.prepare_inf_sequence(problem_name, inf_sequence)
60
112
  score = self.evaluation_function(inf_sequence, agent, self.action_space)
61
113
  scores.append(score)
62
- # scores = metrics.softmin(np.array(scores))
114
+
63
115
  if self.collect_statistics:
64
116
  results_path = get_gr_as_rl_experiment_confidence_path(
65
117
  domain_name=self.env_prop.domain_name,
@@ -82,10 +134,24 @@ class GRAsRL(Recognizer):
82
134
  return str(self.active_goals[true_goal_index])
83
135
 
84
136
  def choose_agent(self, problem_name: str) -> RLAgent:
137
+ """
138
+ Returns the RLAgent for a given problem name.
139
+
140
+ Args:
141
+ problem_name (str): The name of the problem.
142
+
143
+ Returns:
144
+ The RLAgent instance.
145
+ """
85
146
  return self.agents[problem_name]
86
147
 
87
148
 
88
149
  class Graql(GRAsRL, GaAgentTrainerRecognizer):
150
+ """
151
+ Graql extends the GRAsRL framework and GaAgentTrainerRecognizer, since it trains new agents for every new goal and it adheres
152
+ to the goal recognition as reinforcement learning framework. It uses a tabular Q-learning agent for discrete state and action spaces.
153
+ """
154
+
89
155
  def __init__(self, *args, **kwargs):
90
156
  super().__init__(*args, **kwargs)
91
157
  assert (
@@ -93,12 +159,31 @@ class Graql(GRAsRL, GaAgentTrainerRecognizer):
93
159
  and self.env_prop.is_state_discrete()
94
160
  and self.env_prop.is_action_discrete()
95
161
  )
96
- if self.rl_agent_type == None:
162
+ if self.rl_agent_type is None:
97
163
  self.rl_agent_type = TabularQLearner
98
164
  self.evaluation_function = kl_divergence_norm_softmax
99
165
 
100
166
 
101
167
  class Draco(GRAsRL, GaAgentTrainerRecognizer):
168
+ """
169
+ Draco class represents a recognizer agent trained using the GRAsRL framework.
170
+ Like Graql, it trains new agents for every new goal and adheres to the goal recognition as reinforcement learning framework.
171
+ It uses a deep reinforcement learning agent for continuous state and action spaces.
172
+
173
+ Args:
174
+ *args: Variable length argument list.
175
+ **kwargs: Arbitrary keyword arguments.
176
+
177
+ Attributes:
178
+ rl_agent_type (type): Type of the reinforcement learning agent.
179
+ evaluation_function (callable): Function used for evaluation.
180
+
181
+ """
182
+
183
+ def __init__(self, *args, **kwargs):
184
+ super().__init__(*args, **kwargs)
185
+ # Add any additional initialization code here
186
+
102
187
  def __init__(self, *args, **kwargs):
103
188
  super().__init__(*args, **kwargs)
104
189
  assert (
@@ -108,15 +193,25 @@ class Draco(GRAsRL, GaAgentTrainerRecognizer):
108
193
  if self.rl_agent_type == None:
109
194
  self.rl_agent_type = DeepRLAgent
110
195
  self.evaluation_function = kwargs.get("evaluation_function")
111
- assert (
112
- self.evaluation_function is None
113
- or type(self.evaluation_function) != Callable
114
- )
196
+ if self.evaluation_function is None:
197
+ from gr_libs.metrics.metrics import mean_wasserstein_distance
115
198
 
199
+ self.evaluation_function = mean_wasserstein_distance
200
+ assert callable(
201
+ self.evaluation_function
202
+ ), "Evaluation function must be a callable function."
203
+
204
+
205
+ class GCDraco(GRAsRL, LearningRecognizer, GaAdaptingRecognizer):
206
+ """
207
+ GCDraco recognizer uses goal-conditioned reinforcement learning using the Draco algorithm.
208
+ It inherits from GRAsRL, LearningRecognizer, and GaAdaptingRecognizer.
209
+ It is designed for environments with continuous state and action spaces.
210
+ It uses a goal-conditioned deep reinforcement learning agent for training and inference, which
211
+ enables it to adapt to new goals during the goal adaptation phase without requiring retraining,
212
+ making it suitable for dynamic environments.
213
+ """
116
214
 
117
- class GCDraco(
118
- GRAsRL, LearningRecognizer, GaAdaptingRecognizer
119
- ): # TODO problem: it gets 2 goal_adaptation phase from parents, one with configs and one without.
120
215
  def __init__(self, *args, **kwargs):
121
216
  super().__init__(*args, **kwargs)
122
217
  assert (
@@ -127,12 +222,18 @@ class GCDraco(
127
222
  if self.rl_agent_type == None:
128
223
  self.rl_agent_type = GCDeepRLAgent
129
224
  self.evaluation_function = kwargs.get("evaluation_function")
130
- assert (
131
- self.evaluation_function is None
132
- or type(self.evaluation_function) != Callable
133
- )
225
+ if self.evaluation_function is None:
226
+ from gr_libs.metrics.metrics import mean_wasserstein_distance
227
+
228
+ self.evaluation_function = mean_wasserstein_distance
229
+ assert callable(
230
+ self.evaluation_function
231
+ ), "Evaluation function must be a callable function."
134
232
 
135
- def domain_learning_phase(self, base_goals: List[str], train_configs):
233
+ def domain_learning_phase(self, problems):
234
+ base = problems["gc"]
235
+ base_goals = base["goals"]
236
+ train_configs = base["train_configs"]
136
237
  super().domain_learning_phase(base_goals, train_configs)
137
238
  agent_kwargs = {
138
239
  "domain_name": self.env_prop.domain_name,
@@ -155,13 +256,3 @@ class GCDraco(
155
256
 
156
257
  def choose_agent(self, problem_name: str) -> RLAgent:
157
258
  return next(iter(self.agents.values()))
158
-
159
- def prepare_inf_sequence(self, problem_name: str, inf_sequence):
160
- if not self.env_prop.use_goal_directed_problem():
161
- for obs in inf_sequence:
162
- obs[0]["desired_goal"] = np.array(
163
- [self.env_prop.str_to_goal(problem_name)],
164
- dtype=obs[0]["desired_goal"].dtype,
165
- )
166
- return inf_sequence
167
- return inf_sequence
@@ -1,16 +1,16 @@
1
- import numpy as np
2
- from torch.utils.data import Dataset
1
+ import os
3
2
  import random
4
3
  from types import MethodType
5
- from typing import List
4
+
5
+ import dill
6
+ import numpy as np
7
+ import torch
8
+ from torch.utils.data import Dataset
9
+
6
10
  from gr_libs.environment.environment import EnvProperty
7
11
  from gr_libs.metrics.metrics import measure_average_sequence_distance
8
12
  from gr_libs.ml.base.rl_agent import ContextualAgent
9
13
  from gr_libs.ml.utils import get_siamese_dataset_path
10
- from gr_libs.ml.base import RLAgent
11
- import os
12
- import dill
13
- import torch
14
14
 
15
15
 
16
16
  class GRDataset(Dataset):
@@ -30,19 +30,19 @@ class GRDataset(Dataset):
30
30
  def check_diff_goals(first_agent_goal, second_agent_goal):
31
31
  try:
32
32
  assert first_agent_goal != second_agent_goal
33
- except Exception as e:
33
+ except Exception:
34
34
  try:
35
35
  assert any(first_agent_goal != second_agent_goal)
36
- except Exception as e:
36
+ except Exception:
37
37
  for arr1, arr2 in zip(first_agent_goal, second_agent_goal):
38
38
  assert any(elm1 != elm2 for elm1, elm2 in zip(arr1, arr2))
39
39
 
40
40
 
41
41
  def generate_datasets(
42
42
  num_samples,
43
- agents: List[ContextualAgent],
43
+ agents: list[ContextualAgent],
44
44
  observation_creation_method: MethodType,
45
- problems: List[str],
45
+ problems: list[str],
46
46
  env_prop: EnvProperty,
47
47
  recognizer_name: str,
48
48
  gc_goal_set=None,
@@ -1,39 +1,49 @@
1
- from abc import ABC, abstractmethod
2
- from collections import namedtuple
1
+ """ Collection of recognizers that use GRAML methods: metric learning for ODGR. """
2
+
3
3
  import os
4
- from gr_libs.environment.environment import EnvProperty, GCEnvProperty, LSTMProperties
5
- from gr_libs.ml import utils
6
- from gr_libs.ml.base import ContextualAgent
7
- from typing import List, Tuple
4
+ from abc import abstractmethod
5
+
6
+ import dill
8
7
  import numpy as np
9
- from torch.utils.data import DataLoader
10
- from torch.nn.utils.rnn import pad_sequence
11
8
  import torch
9
+ from torch.nn.utils.rnn import pad_sequence
10
+ from torch.utils.data import DataLoader
11
+
12
+ from gr_libs.environment.environment import EnvProperty
13
+ from gr_libs.metrics import metrics
14
+ from gr_libs.ml import utils
15
+ from gr_libs.ml.base import ContextualAgent
12
16
  from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
13
17
  from gr_libs.ml.planner.mcts import mcts_model
14
- import dill
18
+ from gr_libs.ml.sequential._lstm_model import LstmObservations, train_metric_model
15
19
  from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
16
- from gr_libs.recognizer.graml.gr_dataset import GRDataset, generate_datasets
17
- from gr_libs.ml.sequential.lstm_model import LstmObservations, train_metric_model
18
20
  from gr_libs.ml.utils.format import random_subset_with_order
19
21
  from gr_libs.ml.utils.storage import (
20
22
  get_and_create,
21
- get_lstm_model_dir,
22
23
  get_embeddings_result_path,
24
+ get_lstm_model_dir,
23
25
  get_policy_sequences_result_path,
24
26
  )
25
- from gr_libs.metrics import metrics
27
+ from gr_libs.recognizer.graml._gr_dataset import GRDataset, generate_datasets
26
28
  from gr_libs.recognizer.recognizer import (
27
29
  GaAdaptingRecognizer,
28
30
  GaAgentTrainerRecognizer,
29
31
  LearningRecognizer,
30
- Recognizer,
31
- ) # import first, very dependent
32
+ )
32
33
 
33
34
  ### TODO IMPLEMENT MORE SELECTION METHODS, MAKE SURE action_probs IS AS IT SEEMS: list of action-probability 'es ###
34
35
 
35
36
 
36
37
  def collate_fn(batch):
38
+ """
39
+ Collates a batch of data for training or evaluation.
40
+
41
+ Args:
42
+ batch (list): A list of tuples, where each tuple contains the first traces, second traces, and the label indicating whether the goals are the same.
43
+
44
+ Returns:
45
+ tuple: A tuple containing the padded first traces, padded second traces, labels, lengths of first traces, and lengths of second traces.
46
+ """
37
47
  first_traces, second_traces, is_same_goals = zip(*batch)
38
48
  # torch.stack takes tensor tuples (fixed size) and stacks them up in a matrix
39
49
  first_traces_padded = pad_sequence(
@@ -68,17 +78,52 @@ def save_weights(model: LstmObservations, path):
68
78
 
69
79
 
70
80
  class Graml(LearningRecognizer):
81
+ """
82
+ The Graml class is a subclass of LearningRecognizer and represents a recognizer that uses the Graml algorithm for goal recognition.
83
+ Graml learns a metric over observation sequences, over time: using a GC or a collection of agents, it creates a dataset and learns
84
+ the metric on it during the domain learning phase. During the goals adaptation phase, it creates or receives a library of sequences for each goal,
85
+ and maintains embeddings of them for the inference phase. The inference phase uses the learned metric to find the closest goal to a given sequence.
86
+
87
+ Attributes:
88
+ agents (list[ContextualAgent]): A list of contextual agents associated with the recognizer.
89
+ train_func: The function used for training the metric model.
90
+ collate_func: The function used for collating data in the training process.
91
+
92
+ Methods:
93
+ train_agents_on_base_goals(base_goals: list[str], train_configs: list): Trains the agents on the given base goals and train configurations.
94
+ domain_learning_phase(base_goals: list[str], train_configs: list): Performs the domain learning phase of the Graml algorithm.
95
+ goals_adaptation_phase(dynamic_goals: list[EnvProperty], save_fig=False): Performs the goals adaptation phase of the Graml algorithm.
96
+ get_goal_plan(goal): Retrieves the plan associated with the given goal.
97
+ dump_plans(true_sequence, true_goal, percentage): Dumps the plans to a file.
98
+ create_embeddings_dict(): Creates the embeddings dictionary for the plans.
99
+ inference_phase(inf_sequence, true_goal, percentage) -> str: Performs the inference phase of the Graml algorithm and returns the closest goal.
100
+ generate_sequences_library(goal: str, save_fig=False) -> list[list[tuple[np.ndarray, np.ndarray]]]: Generates the sequences library for the given goal.
101
+ update_sequences_library_inference_phase(inf_sequence) -> list[list[tuple[np.ndarray, np.ndarray]]]: Updates the sequences library during the inference phase.
102
+ """
103
+
71
104
  def __init__(self, *args, **kwargs):
105
+ """
106
+ Initialize the GramlRecognizer object.
107
+
108
+ Args:
109
+ *args: Variable length argument list.
110
+ **kwargs: Arbitrary keyword arguments.
111
+
112
+ Attributes:
113
+ agents (list[ContextualAgent]): List of contextual agents.
114
+ train_func: Training function for the metric model.
115
+ collate_func: Collate function for data batching.
116
+ """
72
117
  super().__init__(*args, **kwargs)
73
- self.agents: List[ContextualAgent] = []
118
+ self.agents: list[ContextualAgent] = []
74
119
  self.train_func = train_metric_model
75
120
  self.collate_func = collate_fn
76
121
 
77
122
  @abstractmethod
78
- def train_agents_on_base_goals(self, base_goals: List[str], train_configs: List):
123
+ def train_agents_on_base_goals(self, base_goals: list[str], train_configs: list):
79
124
  pass
80
125
 
81
- def domain_learning_phase(self, base_goals: List[str], train_configs: List):
126
+ def domain_learning_phase(self, base_goals: list[str], train_configs: list):
82
127
  super().domain_learning_phase(base_goals, train_configs)
83
128
  self.train_agents_on_base_goals(base_goals, train_configs)
84
129
  # train the network so it will find a metric for the observations of the base agents such that traces of agents to different goals are far from one another
@@ -130,7 +175,7 @@ class Graml(LearningRecognizer):
130
175
  )
131
176
  save_weights(model=self.model, path=self.model_file_path)
132
177
 
133
- def goals_adaptation_phase(self, dynamic_goals: List[EnvProperty], save_fig=False):
178
+ def goals_adaptation_phase(self, dynamic_goals: list[EnvProperty], save_fig=False):
134
179
  self.is_first_inf_since_new_goals = True
135
180
  self.current_goals = dynamic_goals
136
181
  # start by training each rl agent on the base goal set
@@ -245,13 +290,13 @@ class Graml(LearningRecognizer):
245
290
  @abstractmethod
246
291
  def generate_sequences_library(
247
292
  self, goal: str, save_fig=False
248
- ) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
293
+ ) -> list[list[tuple[np.ndarray, np.ndarray]]]:
249
294
  pass
250
295
 
251
296
  # this function duplicates every sequence and creates a consecutive and non-consecutive version of it
252
297
  def update_sequences_library_inference_phase(
253
298
  self, inf_sequence
254
- ) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
299
+ ) -> list[list[tuple[np.ndarray, np.ndarray]]]:
255
300
  new_plans_dict = {}
256
301
  for goal, obss in self.plans_dict.items():
257
302
  new_obss = []
@@ -281,17 +326,27 @@ class Graml(LearningRecognizer):
281
326
 
282
327
 
283
328
  class BGGraml(Graml):
329
+ """
330
+ BGGraml class represents a goal-directed agent for the BGGraml algorithm.
331
+
332
+ It extends the Graml class and provides additional methods for training agents on base goals.
333
+ """
334
+
284
335
  def __init__(self, *args, **kwargs):
285
336
  super().__init__(*args, **kwargs)
286
337
 
287
- def domain_learning_phase(self, base_goals: List[str], train_configs: List):
288
- assert len(train_configs) == len(
289
- base_goals
290
- ), "There should be train configs for every goal in BGGraml."
291
- return super().domain_learning_phase(base_goals, train_configs)
338
+ def domain_learning_phase(self, problems):
339
+ # Always use 'bg' for BGGraml
340
+ base = problems["bg"]
341
+ base_goals = base["goals"]
342
+ train_configs = base["train_configs"]
343
+ assert len(base_goals) == len(
344
+ train_configs
345
+ ), "base_goals and train_configs should have the same length"
346
+ super().domain_learning_phase(base_goals, train_configs)
292
347
 
293
348
  # In case we need goal-directed agent for every goal
294
- def train_agents_on_base_goals(self, base_goals: List[str], train_configs: List):
349
+ def train_agents_on_base_goals(self, base_goals: list[str], train_configs: list):
295
350
  self.original_problems = [
296
351
  self.env_prop.goal_to_problem_str(g) for g in base_goals
297
352
  ]
@@ -316,14 +371,40 @@ class BGGraml(Graml):
316
371
 
317
372
 
318
373
  class MCTSBasedGraml(BGGraml, GaAdaptingRecognizer):
374
+ """
375
+ MCTSBasedGraml is a class that represents a recognizer based on the MCTS algorithm.
376
+ It inherits from BGGraml and GaAdaptingRecognizer classes.
377
+
378
+ Attributes:
379
+ rl_agent_type (type): The type of reinforcement learning agent used.
380
+ """
381
+
319
382
  def __init__(self, *args, **kwargs):
383
+ """
384
+ Initialize the GramlRecognizer object.
385
+
386
+ Args:
387
+ *args: Variable length argument list.
388
+ **kwargs: Arbitrary keyword arguments.
389
+
390
+ """
320
391
  super().__init__(*args, **kwargs)
321
392
  if self.rl_agent_type == None:
322
393
  self.rl_agent_type = TabularQLearner
323
394
 
324
395
  def generate_sequences_library(
325
396
  self, goal: str, save_fig=False
326
- ) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
397
+ ) -> list[list[tuple[np.ndarray, np.ndarray]]]:
398
+ """
399
+ Generates a library of sequences for a given goal.
400
+
401
+ Args:
402
+ goal (str): The goal for which to generate sequences.
403
+ save_fig (bool, optional): Whether to save the generated figure. Defaults to False.
404
+
405
+ Returns:
406
+ list[list[tuple[np.ndarray, np.ndarray]]]: The generated sequences library.
407
+ """
327
408
  problem_name = self.env_prop.goal_to_problem_str(goal)
328
409
  img_path = os.path.join(
329
410
  get_policy_sequences_result_path(
@@ -342,7 +423,29 @@ class MCTSBasedGraml(BGGraml, GaAdaptingRecognizer):
342
423
 
343
424
 
344
425
  class ExpertBasedGraml(BGGraml, GaAgentTrainerRecognizer):
426
+ """
427
+ ExpertBasedGraml class represents a Graml recognizer that uses expert knowledge to generate sequences library and adapt goals.
428
+
429
+ Args:
430
+ *args: Variable length argument list.
431
+ **kwargs: Arbitrary keyword arguments.
432
+
433
+ Attributes:
434
+ rl_agent_type (type): The type of reinforcement learning agent used.
435
+ env_prop (EnvironmentProperties): The environment properties.
436
+ dynamic_train_configs_dict (dict): The dynamic training configurations for each problem.
437
+
438
+ """
439
+
345
440
  def __init__(self, *args, **kwargs):
441
+ """
442
+ Initialize the GRAML Recognizer.
443
+
444
+ Args:
445
+ *args: Variable length argument list.
446
+ **kwargs: Arbitrary keyword arguments.
447
+
448
+ """
346
449
  super().__init__(*args, **kwargs)
347
450
  if self.rl_agent_type == None:
348
451
  if self.env_prop.is_state_discrete() and self.env_prop.is_action_discrete():
@@ -352,7 +455,18 @@ class ExpertBasedGraml(BGGraml, GaAgentTrainerRecognizer):
352
455
 
353
456
  def generate_sequences_library(
354
457
  self, goal: str, save_fig=False
355
- ) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
458
+ ) -> list[list[tuple[np.ndarray, np.ndarray]]]:
459
+ """
460
+ Generates a sequences library for a given goal.
461
+
462
+ Args:
463
+ goal (str): The goal for which to generate the sequences library.
464
+ save_fig (bool, optional): Whether to save the figure. Defaults to False.
465
+
466
+ Returns:
467
+ list[list[tuple[np.ndarray, np.ndarray]]]: The generated sequences library.
468
+
469
+ """
356
470
  problem_name = self.env_prop.goal_to_problem_str(goal)
357
471
  kwargs = {
358
472
  "domain_name": self.domain_name,
@@ -377,7 +491,18 @@ class ExpertBasedGraml(BGGraml, GaAgentTrainerRecognizer):
377
491
  agent_kwargs["fig_path"] = fig_path
378
492
  return [agent.generate_observation(**agent_kwargs)]
379
493
 
380
- def goals_adaptation_phase(self, dynamic_goals: List[str], dynamic_train_configs):
494
+ def goals_adaptation_phase(self, dynamic_goals: list[str], dynamic_train_configs):
495
+ """
496
+ Performs the goals adaptation phase.
497
+
498
+ Args:
499
+ dynamic_goals (list[str]): The dynamic goals.
500
+ dynamic_train_configs: The dynamic training configurations.
501
+
502
+ Returns:
503
+ The result of the goals adaptation phase.
504
+
505
+ """
381
506
  self.dynamic_goals_problems = [
382
507
  self.env_prop.goal_to_problem_str(g) for g in dynamic_goals
383
508
  ]
@@ -391,6 +516,28 @@ class ExpertBasedGraml(BGGraml, GaAgentTrainerRecognizer):
391
516
 
392
517
 
393
518
  class GCGraml(Graml, GaAdaptingRecognizer):
519
+ """
520
+ GCGraml class represents a recognizer that uses the GCDeepRLAgent for domain learning and sequence generation.
521
+ It makes its adaptation phase quicker and require less assumptions, but the assumption of a GC agent is still needed and may result
522
+ in less optimal policies that generate the observations in the synthetic dataset, which could eventually lead to a less optimal metric.
523
+
524
+ Args:
525
+ Graml (class): Base class for Graml recognizers.
526
+ GaAdaptingRecognizer (class): Base class for GA adapting recognizers.
527
+
528
+ Attributes:
529
+ rl_agent_type (class): The type of RL agent to be used for learning and generation.
530
+ env_prop (object): The environment properties.
531
+ agents (list): List of contextual agents.
532
+
533
+ Methods:
534
+ __init__: Initializes the GCGraml recognizer.
535
+ domain_learning_phase: Performs the domain learning phase.
536
+ train_agents_on_base_goals: Trains the RL agents on the base goals.
537
+ generate_sequences_library: Generates sequences library for a specific goal.
538
+
539
+ """
540
+
394
541
  def __init__(self, *args, **kwargs):
395
542
  super().__init__(*args, **kwargs)
396
543
  if self.rl_agent_type == None:
@@ -401,14 +548,18 @@ class GCGraml(Graml, GaAdaptingRecognizer):
401
548
  and not self.env_prop.is_action_discrete()
402
549
  )
403
550
 
404
- def domain_learning_phase(self, base_goals: List[str], train_configs: List):
551
+ def domain_learning_phase(self, problems):
552
+ # Always use 'gc' for GCGraml
553
+ base = problems["gc"]
554
+ base_goals = base["goals"]
555
+ train_configs = base["train_configs"]
405
556
  assert (
406
557
  len(train_configs) == 1
407
- ), "There should be one train config for the sole gc agent in GCGraml."
408
- return super().domain_learning_phase(base_goals, train_configs)
558
+ ), "GCGraml should only have one train config for the base goals, it uses a single agent"
559
+ super().domain_learning_phase(base_goals, train_configs)
409
560
 
410
561
  # In case we need goal-directed agent for every goal
411
- def train_agents_on_base_goals(self, base_goals: List[str], train_configs: List):
562
+ def train_agents_on_base_goals(self, base_goals: list[str], train_configs: list):
412
563
  self.gc_goal_set = base_goals
413
564
  self.original_problems = self.env_prop.name # needed for gr_dataset
414
565
  # start by training each rl agent on the base goal set
@@ -432,7 +583,7 @@ class GCGraml(Graml, GaAdaptingRecognizer):
432
583
 
433
584
  def generate_sequences_library(
434
585
  self, goal: str, save_fig=False
435
- ) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
586
+ ) -> list[list[tuple[np.ndarray, np.ndarray]]]:
436
587
  problem_name = self.env_prop.goal_to_problem_str(goal)
437
588
  kwargs = {
438
589
  "domain_name": self.domain_name,