gr-libs 0.1.7.post0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. gr_libs/__init__.py +4 -1
  2. gr_libs/_evaluation/__init__.py +1 -0
  3. gr_libs/_evaluation/_analyze_results_cross_alg_cross_domain.py +260 -0
  4. gr_libs/_evaluation/_generate_experiments_results.py +141 -0
  5. gr_libs/_evaluation/_generate_task_specific_statistics_plots.py +497 -0
  6. gr_libs/_evaluation/_get_plans_images.py +61 -0
  7. gr_libs/_evaluation/_increasing_and_decreasing_.py +106 -0
  8. gr_libs/_version.py +2 -2
  9. gr_libs/all_experiments.py +294 -0
  10. gr_libs/environment/__init__.py +30 -9
  11. gr_libs/environment/_utils/utils.py +27 -0
  12. gr_libs/environment/environment.py +417 -54
  13. gr_libs/metrics/__init__.py +7 -0
  14. gr_libs/metrics/metrics.py +231 -54
  15. gr_libs/ml/__init__.py +2 -5
  16. gr_libs/ml/agent.py +21 -6
  17. gr_libs/ml/base/__init__.py +3 -1
  18. gr_libs/ml/base/rl_agent.py +81 -13
  19. gr_libs/ml/consts.py +1 -1
  20. gr_libs/ml/neural/__init__.py +1 -3
  21. gr_libs/ml/neural/deep_rl_learner.py +619 -378
  22. gr_libs/ml/neural/utils/__init__.py +1 -2
  23. gr_libs/ml/neural/utils/dictlist.py +3 -3
  24. gr_libs/ml/planner/mcts/{utils → _utils}/__init__.py +1 -1
  25. gr_libs/ml/planner/mcts/{utils → _utils}/node.py +11 -7
  26. gr_libs/ml/planner/mcts/{utils → _utils}/tree.py +15 -11
  27. gr_libs/ml/planner/mcts/mcts_model.py +571 -312
  28. gr_libs/ml/sequential/__init__.py +0 -1
  29. gr_libs/ml/sequential/_lstm_model.py +270 -0
  30. gr_libs/ml/tabular/__init__.py +1 -3
  31. gr_libs/ml/tabular/state.py +7 -7
  32. gr_libs/ml/tabular/tabular_q_learner.py +150 -82
  33. gr_libs/ml/tabular/tabular_rl_agent.py +42 -28
  34. gr_libs/ml/utils/__init__.py +2 -3
  35. gr_libs/ml/utils/format.py +28 -97
  36. gr_libs/ml/utils/math.py +5 -3
  37. gr_libs/ml/utils/other.py +3 -3
  38. gr_libs/ml/utils/storage.py +88 -81
  39. gr_libs/odgr_executor.py +268 -0
  40. gr_libs/problems/consts.py +1549 -1227
  41. gr_libs/recognizer/_utils/__init__.py +0 -0
  42. gr_libs/recognizer/_utils/format.py +18 -0
  43. gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +233 -88
  44. gr_libs/recognizer/graml/_gr_dataset.py +233 -0
  45. gr_libs/recognizer/graml/graml_recognizer.py +586 -252
  46. gr_libs/recognizer/recognizer.py +90 -30
  47. gr_libs/tutorials/draco_panda_tutorial.py +58 -0
  48. gr_libs/tutorials/draco_parking_tutorial.py +56 -0
  49. gr_libs/tutorials/gcdraco_panda_tutorial.py +62 -0
  50. gr_libs/tutorials/gcdraco_parking_tutorial.py +57 -0
  51. gr_libs/tutorials/graml_minigrid_tutorial.py +64 -0
  52. gr_libs/tutorials/graml_panda_tutorial.py +57 -0
  53. gr_libs/tutorials/graml_parking_tutorial.py +52 -0
  54. gr_libs/tutorials/graml_point_maze_tutorial.py +60 -0
  55. gr_libs/tutorials/graql_minigrid_tutorial.py +50 -0
  56. {gr_libs-0.1.7.post0.dist-info → gr_libs-0.2.2.dist-info}/METADATA +84 -29
  57. gr_libs-0.2.2.dist-info/RECORD +71 -0
  58. {gr_libs-0.1.7.post0.dist-info → gr_libs-0.2.2.dist-info}/WHEEL +1 -1
  59. gr_libs-0.2.2.dist-info/top_level.txt +2 -0
  60. tests/test_draco.py +14 -0
  61. tests/test_gcdraco.py +10 -0
  62. tests/test_graml.py +12 -8
  63. tests/test_graql.py +3 -2
  64. evaluation/analyze_results_cross_alg_cross_domain.py +0 -277
  65. evaluation/create_minigrid_map_image.py +0 -34
  66. evaluation/file_system.py +0 -42
  67. evaluation/generate_experiments_results.py +0 -92
  68. evaluation/generate_experiments_results_new_ver1.py +0 -254
  69. evaluation/generate_experiments_results_new_ver2.py +0 -331
  70. evaluation/generate_task_specific_statistics_plots.py +0 -272
  71. evaluation/get_plans_images.py +0 -47
  72. evaluation/increasing_and_decreasing_.py +0 -63
  73. gr_libs/environment/utils/utils.py +0 -17
  74. gr_libs/ml/neural/utils/penv.py +0 -57
  75. gr_libs/ml/sequential/lstm_model.py +0 -192
  76. gr_libs/recognizer/graml/gr_dataset.py +0 -134
  77. gr_libs/recognizer/utils/__init__.py +0 -1
  78. gr_libs/recognizer/utils/format.py +0 -13
  79. gr_libs-0.1.7.post0.dist-info/RECORD +0 -67
  80. gr_libs-0.1.7.post0.dist-info/top_level.txt +0 -4
  81. tutorials/graml_minigrid_tutorial.py +0 -34
  82. tutorials/graml_panda_tutorial.py +0 -41
  83. tutorials/graml_parking_tutorial.py +0 -39
  84. tutorials/graml_point_maze_tutorial.py +0 -39
  85. tutorials/graql_minigrid_tutorial.py +0 -34
  86. /gr_libs/environment/{utils → _utils}/__init__.py +0 -0
File without changes
@@ -0,0 +1,18 @@
1
+ from gr_libs.recognizer.gr_as_rl.gr_as_rl_recognizer import Draco, GCDraco, Graql
2
+ from gr_libs.recognizer.graml.graml_recognizer import (
3
+ ExpertBasedGraml,
4
+ GCGraml,
5
+ MCTSBasedGraml,
6
+ )
7
+
8
+
9
+ def recognizer_str_to_obj(recognizer_str: str):
10
+ recognizer_map = {
11
+ "GCGraml": GCGraml,
12
+ "ExpertBasedGraml": ExpertBasedGraml,
13
+ "MCTSBasedGraml": MCTSBasedGraml,
14
+ "Graql": Graql,
15
+ "Draco": Draco,
16
+ "GCDraco": GCDraco,
17
+ }
18
+ return recognizer_map.get(recognizer_str)
@@ -1,102 +1,247 @@
1
- from abc import abstractmethod
2
1
  import os
2
+
3
3
  import dill
4
- from typing import List, Type
5
4
  import numpy as np
6
- from gr_libs.environment.environment import EnvProperty, GCEnvProperty
7
- from gr_libs.environment.utils.utils import domain_to_env_property
8
- from gr_libs.metrics.metrics import kl_divergence_norm_softmax, mean_wasserstein_distance
5
+
6
+ from gr_libs.metrics.metrics import kl_divergence_norm_softmax
9
7
  from gr_libs.ml.base import RLAgent
10
8
  from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
11
9
  from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
12
10
  from gr_libs.ml.utils.storage import get_gr_as_rl_experiment_confidence_path
13
- from gr_libs.recognizer.recognizer import GaAdaptingRecognizer, GaAgentTrainerRecognizer, LearningRecognizer, Recognizer
11
+ from gr_libs.recognizer.recognizer import (
12
+ GaAdaptingRecognizer,
13
+ GaAgentTrainerRecognizer,
14
+ LearningRecognizer,
15
+ Recognizer,
16
+ )
17
+
14
18
 
15
19
  class GRAsRL(Recognizer):
16
- def __init__(self, *args, **kwargs):
17
- super().__init__(*args, **kwargs)
18
- self.agents = {} # consider changing to ContextualAgent
19
-
20
- def goals_adaptation_phase(self, dynamic_goals: List[str], dynamic_train_configs):
21
- super().goals_adaptation_phase(dynamic_goals, dynamic_train_configs)
22
- dynamic_goals_problems = [self.env_prop.goal_to_problem_str(goal) for goal in dynamic_goals]
23
- self.active_goals = dynamic_goals
24
- self.active_problems = dynamic_goals_problems
25
- for problem_name, config in zip(dynamic_goals_problems, dynamic_train_configs):
26
- agent_kwargs = {"domain_name": self.env_prop.domain_name,
27
- "problem_name": problem_name}
28
- if config[0]: agent_kwargs["algorithm"] = config[0]
29
- if config[1]: agent_kwargs["num_timesteps"] = config[1]
30
- agent = self.rl_agent_type(**agent_kwargs)
31
- agent.learn()
32
- self.agents[problem_name] = agent
33
- self.action_space = next(iter(self.agents.values())).env.action_space
34
-
35
- def inference_phase(self, inf_sequence, true_goal, percentage) -> str:
36
- scores = []
37
- for problem_name in self.active_problems:
38
- agent = self.choose_agent(problem_name)
39
- if self.env_prop.gc_adaptable():
40
- assert self.__class__.__name__ == "GCDraco", "This recognizer is not compatible with goal conditioned problems."
41
- inf_sequence = self.prepare_inf_sequence(problem_name, inf_sequence)
42
- score = self.evaluation_function(inf_sequence, agent, self.action_space)
43
- scores.append(score)
44
- #scores = metrics.softmin(np.array(scores))
45
- if self.collect_statistics:
46
- results_path = get_gr_as_rl_experiment_confidence_path(domain_name=self.env_prop.domain_name, env_name=self.env_prop.name, recognizer=self.__class__.__name__)
47
- if not os.path.exists(results_path): os.makedirs(results_path)
48
- with open(results_path + f'/true_{true_goal}_{percentage}_scores.pkl', 'wb') as scores_file:
49
- dill.dump([(str(goal), score) for (goal, score) in zip(self.active_goals, scores)], scores_file)
50
- div, true_goal_index = min((div, goal) for (goal, div) in enumerate(scores))
51
- return str(self.active_goals[true_goal_index])
52
-
53
- def choose_agent(self, problem_name:str) -> RLAgent:
54
- return self.agents[problem_name]
20
+ """
21
+ GRAsRL class represents a goal recognition framework that using reinforcement learning.
22
+ It inherits from the Recognizer class and implements the goal recognition process, including the
23
+ Goal adaptation and the inference phase. It trains agents for each new goal, which makes it impractical
24
+ for realtime environments where goals mmight change.
25
+
26
+ Attributes:
27
+ agents (dict): A dictionary that maps problem names to RLAgent instances.
28
+ active_goals (List[str]): A list of active goals.
29
+ active_problems (List[str]): A list of active problem names.
30
+ action_space (gym.Space): The action space of the RLAgent.
31
+
32
+ Methods:
33
+ goals_adaptation_phase: Performs the goals adaptation phase.
34
+ prepare_inf_sequence: Prepares the inference sequence for goal-directed problems.
35
+ inference_phase: Performs the inference phase and returns the recognized goal.
36
+ choose_agent: Returns the RLAgent for a given problem name.
37
+ """
38
+
39
+ def __init__(self, *args, **kwargs):
40
+ super().__init__(*args, **kwargs)
41
+ self.agents = {} # consider changing to ContextualAgent
42
+
43
+ def goals_adaptation_phase(self, dynamic_goals: list[str], dynamic_train_configs):
44
+ """
45
+ Performs the goals adaptation phase.
46
+
47
+ Args:
48
+ dynamic_goals (List[str]): A list of dynamic goals.
49
+ dynamic_train_configs: The dynamic training configurations.
50
+
51
+ Returns:
52
+ None
53
+ """
54
+ super().goals_adaptation_phase(dynamic_goals, dynamic_train_configs)
55
+ dynamic_goals_problems = [
56
+ self.env_prop.goal_to_problem_str(goal) for goal in dynamic_goals
57
+ ]
58
+ self.active_goals = dynamic_goals
59
+ self.active_problems = dynamic_goals_problems
60
+ for problem_name, config in zip(dynamic_goals_problems, dynamic_train_configs):
61
+ agent_kwargs = {
62
+ "domain_name": self.env_prop.domain_name,
63
+ "problem_name": problem_name,
64
+ "env_prop": self.env_prop,
65
+ }
66
+ if config[0]:
67
+ agent_kwargs["algorithm"] = config[0]
68
+ if config[1]:
69
+ agent_kwargs["num_timesteps"] = config[1]
70
+ agent = self.rl_agent_type(**agent_kwargs)
71
+ agent.learn()
72
+ self.agents[problem_name] = agent
73
+ self.action_space = next(iter(self.agents.values())).env.action_space
74
+
75
+ def prepare_inf_sequence(self, problem_name: str, inf_sequence):
76
+ """
77
+ Prepares the inference sequence for goal-directed problems.
78
+
79
+ Args:
80
+ problem_name (str): The name of the problem.
81
+ inf_sequence: The inference sequence.
82
+
83
+ Returns:
84
+ The prepared inference sequence.
85
+ """
86
+ if not self.env_prop.use_goal_directed_problem():
87
+ for obs in inf_sequence:
88
+ obs[0]["desired_goal"] = np.array(
89
+ [self.env_prop.str_to_goal(problem_name)],
90
+ dtype=obs[0]["desired_goal"].dtype,
91
+ )
92
+ return inf_sequence
93
+ return inf_sequence
94
+
95
+ def inference_phase(self, inf_sequence, true_goal, percentage) -> str:
96
+ """
97
+ Performs the inference phase and returns the recognized goal.
98
+
99
+ Args:
100
+ inf_sequence: The inference sequence.
101
+ true_goal: The true goal.
102
+ percentage: The percentage.
103
+
104
+ Returns:
105
+ The recognized goal as a string.
106
+ """
107
+ scores = []
108
+ for problem_name in self.active_problems:
109
+ agent = self.choose_agent(problem_name)
110
+ if self.env_prop.gc_adaptable():
111
+ inf_sequence = self.prepare_inf_sequence(problem_name, inf_sequence)
112
+ score = self.evaluation_function(inf_sequence, agent, self.action_space)
113
+ scores.append(score)
114
+
115
+ if self.collect_statistics:
116
+ results_path = get_gr_as_rl_experiment_confidence_path(
117
+ domain_name=self.env_prop.domain_name,
118
+ env_name=self.env_prop.name,
119
+ recognizer=self.__class__.__name__,
120
+ )
121
+ if not os.path.exists(results_path):
122
+ os.makedirs(results_path)
123
+ with open(
124
+ results_path + f"/true_{true_goal}_{percentage}_scores.pkl", "wb"
125
+ ) as scores_file:
126
+ dill.dump(
127
+ [
128
+ (str(goal), score)
129
+ for (goal, score) in zip(self.active_goals, scores)
130
+ ],
131
+ scores_file,
132
+ )
133
+ div, true_goal_index = min((div, goal) for (goal, div) in enumerate(scores))
134
+ return str(self.active_goals[true_goal_index])
135
+
136
+ def choose_agent(self, problem_name: str) -> RLAgent:
137
+ """
138
+ Returns the RLAgent for a given problem name.
139
+
140
+ Args:
141
+ problem_name (str): The name of the problem.
142
+
143
+ Returns:
144
+ The RLAgent instance.
145
+ """
146
+ return self.agents[problem_name]
55
147
 
56
148
 
57
149
  class Graql(GRAsRL, GaAgentTrainerRecognizer):
58
- def __init__(self, *args, **kwargs):
59
- super().__init__(*args, **kwargs)
60
- assert not self.env_prop.gc_adaptable() and self.env_prop.is_state_discrete() and self.env_prop.is_action_discrete()
61
- if self.rl_agent_type==None: self.rl_agent_type = TabularQLearner
62
- self.evaluation_function = kl_divergence_norm_softmax
150
+ """
151
+ Graql extends the GRAsRL framework and GaAgentTrainerRecognizer, since it trains new agents for every new goal and it adheres
152
+ to the goal recognition as reinforcement learning framework. It uses a tabular Q-learning agent for discrete state and action spaces.
153
+ """
154
+
155
+ def __init__(self, *args, **kwargs):
156
+ super().__init__(*args, **kwargs)
157
+ assert (
158
+ not self.env_prop.gc_adaptable()
159
+ and self.env_prop.is_state_discrete()
160
+ and self.env_prop.is_action_discrete()
161
+ )
162
+ if self.rl_agent_type is None:
163
+ self.rl_agent_type = TabularQLearner
164
+ self.evaluation_function = kl_divergence_norm_softmax
165
+
63
166
 
64
167
  class Draco(GRAsRL, GaAgentTrainerRecognizer):
65
- def __init__(self, *args, **kwargs):
66
- super().__init__(*args, **kwargs)
67
- assert not self.env_prop.is_state_discrete() and not self.env_prop.is_action_discrete()
68
- if self.rl_agent_type==None: self.rl_agent_type = DeepRLAgent
69
- self.evaluation_function = mean_wasserstein_distance
70
-
71
- class GCDraco(GRAsRL, LearningRecognizer, GaAdaptingRecognizer): # TODO problem: it gets 2 goal_adaptation phase from parents, one with configs and one without.
72
- def __init__(self, *args, **kwargs):
73
- super().__init__(*args, **kwargs)
74
- assert self.env_prop.gc_adaptable() and not self.env_prop.is_state_discrete() and not self.env_prop.is_action_discrete()
75
- self.evaluation_function = mean_wasserstein_distance
76
- if self.rl_agent_type==None: self.rl_agent_type = GCDeepRLAgent
77
-
78
- def domain_learning_phase(self, base_goals: List[str], train_configs):
79
- super().domain_learning_phase(base_goals, train_configs)
80
- agent_kwargs = {"domain_name": self.env_prop.domain_name,
81
- "problem_name": self.env_prop.name,
82
- "algorithm": self.original_train_configs[0][0],
83
- "num_timesteps": self.original_train_configs[0][1]}
84
- agent = self.rl_agent_type(**agent_kwargs)
85
- agent.learn()
86
- self.agents[self.env_prop.name] = agent
87
- self.action_space = agent.env.action_space
88
-
89
- # this method currently does nothing but optimizations can be made here.
90
- def goals_adaptation_phase(self, dynamic_goals):
91
- self.active_goals = dynamic_goals
92
- self.active_problems = [self.env_prop.goal_to_problem_str(goal) for goal in dynamic_goals]
93
-
94
- def choose_agent(self, problem_name:str) -> RLAgent:
95
- return next(iter(self.agents.values()))
96
-
97
- def prepare_inf_sequence(self, problem_name: str, inf_sequence):
98
- if not self.env_prop.use_goal_directed_problem():
99
- for obs in inf_sequence:
100
- obs[0]['desired_goal'] = np.array([self.env_prop.str_to_goal(problem_name)], dtype=obs[0]['desired_goal'].dtype)
101
- return inf_sequence
102
- return inf_sequence
168
+ """
169
+ Draco class represents a recognizer agent trained using the GRAsRL framework.
170
+ Like Graql, it trains new agents for every new goal and adheres to the goal recognition as reinforcement learning framework.
171
+ It uses a deep reinforcement learning agent for continuous state and action spaces.
172
+
173
+ Args:
174
+ *args: Variable length argument list.
175
+ **kwargs: Arbitrary keyword arguments.
176
+
177
+ Attributes:
178
+ rl_agent_type (type): Type of the reinforcement learning agent.
179
+ evaluation_function (callable): Function used for evaluation.
180
+
181
+ """
182
+
183
+ def __init__(self, *args, **kwargs):
184
+ super().__init__(*args, **kwargs)
185
+ # Add any additional initialization code here
186
+
187
+ def __init__(self, *args, **kwargs):
188
+ super().__init__(*args, **kwargs)
189
+ assert (
190
+ not self.env_prop.is_state_discrete()
191
+ and not self.env_prop.is_action_discrete()
192
+ )
193
+ if self.rl_agent_type == None:
194
+ self.rl_agent_type = DeepRLAgent
195
+ self.evaluation_function = kwargs.get("evaluation_function")
196
+ assert callable(
197
+ self.evaluation_function
198
+ ), "Evaluation function must be a callable function."
199
+
200
+
201
+ class GCDraco(GRAsRL, LearningRecognizer, GaAdaptingRecognizer):
202
+ """
203
+ GCDraco recognizer uses goal-conditioned reinforcement learning using the Draco algorithm.
204
+ It inherits from GRAsRL, LearningRecognizer, and GaAdaptingRecognizer.
205
+ It is designed for environments with continuous state and action spaces.
206
+ It uses a goal-conditioned deep reinforcement learning agent for training and inference, which
207
+ enables it to adapt to new goals during the goal adaptation phase without requiring retraining,
208
+ making it suitable for dynamic environments.
209
+ """
210
+
211
+ def __init__(self, *args, **kwargs):
212
+ super().__init__(*args, **kwargs)
213
+ assert (
214
+ self.env_prop.gc_adaptable()
215
+ and not self.env_prop.is_state_discrete()
216
+ and not self.env_prop.is_action_discrete()
217
+ )
218
+ if self.rl_agent_type == None:
219
+ self.rl_agent_type = GCDeepRLAgent
220
+ self.evaluation_function = kwargs.get("evaluation_function")
221
+ assert callable(
222
+ self.evaluation_function
223
+ ), "Evaluation function must be a callable function."
224
+
225
+ def domain_learning_phase(self, base_goals: list[str], train_configs):
226
+ super().domain_learning_phase(base_goals, train_configs)
227
+ agent_kwargs = {
228
+ "domain_name": self.env_prop.domain_name,
229
+ "problem_name": self.env_prop.name,
230
+ "algorithm": self.original_train_configs[0][0],
231
+ "num_timesteps": self.original_train_configs[0][1],
232
+ "env_prop": self.env_prop,
233
+ }
234
+ agent = self.rl_agent_type(**agent_kwargs)
235
+ agent.learn()
236
+ self.agents[self.env_prop.name] = agent
237
+ self.action_space = agent.env.action_space
238
+
239
+ # this method currently does nothing but optimizations can be made here.
240
+ def goals_adaptation_phase(self, dynamic_goals):
241
+ self.active_goals = dynamic_goals
242
+ self.active_problems = [
243
+ self.env_prop.goal_to_problem_str(goal) for goal in dynamic_goals
244
+ ]
245
+
246
+ def choose_agent(self, problem_name: str) -> RLAgent:
247
+ return next(iter(self.agents.values()))
@@ -0,0 +1,233 @@
1
+ import os
2
+ import random
3
+ from types import MethodType
4
+
5
+ import dill
6
+ import numpy as np
7
+ import torch
8
+ from torch.utils.data import Dataset
9
+
10
+ from gr_libs.environment.environment import EnvProperty
11
+ from gr_libs.metrics.metrics import measure_average_sequence_distance
12
+ from gr_libs.ml.base.rl_agent import ContextualAgent
13
+ from gr_libs.ml.utils import get_siamese_dataset_path
14
+
15
+
16
+ class GRDataset(Dataset):
17
+ def __init__(self, num_samples, samples):
18
+ self.num_samples = num_samples
19
+ self.samples = samples
20
+
21
+ def __len__(self):
22
+ return self.num_samples
23
+
24
+ def __getitem__(self, idx):
25
+ return self.samples[
26
+ idx
27
+ ] # returns a tuple - as appended in 'generate_dataset' last line
28
+
29
+
30
+ def check_diff_goals(first_agent_goal, second_agent_goal):
31
+ try:
32
+ assert first_agent_goal != second_agent_goal
33
+ except Exception:
34
+ try:
35
+ assert any(first_agent_goal != second_agent_goal)
36
+ except Exception:
37
+ for arr1, arr2 in zip(first_agent_goal, second_agent_goal):
38
+ assert any(elm1 != elm2 for elm1, elm2 in zip(arr1, arr2))
39
+
40
+
41
+ def generate_datasets(
42
+ num_samples,
43
+ agents: list[ContextualAgent],
44
+ observation_creation_method: MethodType,
45
+ problems: list[str],
46
+ env_prop: EnvProperty,
47
+ recognizer_name: str,
48
+ gc_goal_set=None,
49
+ ):
50
+ if gc_goal_set:
51
+ model_name = env_prop.name
52
+ else:
53
+ model_name = env_prop.problem_list_to_str_tuple(problems)
54
+ dataset_directory = get_siamese_dataset_path(
55
+ domain_name=env_prop.domain_name,
56
+ env_name=env_prop.name,
57
+ model_name=model_name,
58
+ recognizer=recognizer_name,
59
+ )
60
+ dataset_train_path, dataset_dev_path = os.path.join(
61
+ dataset_directory, "train.pkl"
62
+ ), os.path.join(dataset_directory, "dev.pkl")
63
+ if os.path.exists(dataset_train_path) and os.path.exists(dataset_dev_path):
64
+ print(f"Loading pre-existing datasets in {dataset_directory}")
65
+ with open(dataset_train_path, "rb") as train_file:
66
+ train_samples = dill.load(train_file)
67
+ with open(dataset_dev_path, "rb") as dev_file:
68
+ dev_samples = dill.load(dev_file)
69
+ else:
70
+ print(f"{dataset_directory} doesn't exist, generating datasets")
71
+ if not os.path.exists(dataset_directory):
72
+ os.makedirs(dataset_directory)
73
+ all_samples = []
74
+ for i in range(num_samples):
75
+ if (
76
+ gc_goal_set != None
77
+ ): # TODO change to having one flow for both cases and injecting according to gc_goal_set or not
78
+ assert (
79
+ env_prop.gc_adaptable() == True
80
+ ), "shouldn't specify a goal directed representation if not generating datasets with a general agent."
81
+ is_same_goal = (
82
+ np.random.choice(
83
+ [1, 0],
84
+ 1,
85
+ p=[
86
+ 1 / max(len(gc_goal_set), 6),
87
+ 1 - 1 / max(len(gc_goal_set), 6),
88
+ ],
89
+ )
90
+ )[0]
91
+ first_is_consecutive = np.random.choice([True, False], 1, p=[0.5, 0.5])[
92
+ 0
93
+ ]
94
+ first_random_index = np.random.randint(
95
+ 0, len(gc_goal_set)
96
+ ) # works for lists of every object type, while np.choice only works for 1d arrays
97
+ first_agent_goal = gc_goal_set[
98
+ first_random_index
99
+ ] # could be either a real goal or a goal-directed problem name
100
+ # first_agent_goal = np.random.choice(gc_goal_set)
101
+ first_trace_percentage = random.choice(
102
+ [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
103
+ )
104
+ first_observation = []
105
+ first_agent_kwargs = {
106
+ "action_selection_method": observation_creation_method,
107
+ "percentage": first_trace_percentage,
108
+ "is_consecutive": first_is_consecutive,
109
+ "save_fig": False,
110
+ }
111
+ while first_observation == []:
112
+ # needs to be different than agents[0] problem_name, it should be from the gc_goal_set.
113
+ # but the problem is with the panda because it
114
+ if env_prop.use_goal_directed_problem():
115
+ first_agent_kwargs["goal_directed_problem"] = first_agent_goal
116
+ else:
117
+ first_agent_kwargs["goal_directed_goal"] = first_agent_goal
118
+ first_observation = agents[0].agent.generate_partial_observation(
119
+ **first_agent_kwargs
120
+ )
121
+ first_observation = agents[0].agent.simplify_observation(
122
+ first_observation
123
+ )
124
+
125
+ second_is_consecutive = np.random.choice(
126
+ [True, False], 1, p=[0.5, 0.5]
127
+ )[0]
128
+ second_agent_goal = first_agent_goal
129
+ second_random_index = first_random_index
130
+ if not is_same_goal:
131
+ second_random_index = np.random.choice(
132
+ [i for i in range(len(gc_goal_set)) if i != first_random_index]
133
+ )
134
+ assert first_random_index != second_random_index
135
+ second_agent_goal = gc_goal_set[second_random_index]
136
+ if not is_same_goal:
137
+ check_diff_goals(first_agent_goal, second_agent_goal)
138
+ second_trace_percentage = first_trace_percentage
139
+ second_observation = []
140
+ second_agent_kwargs = {
141
+ "action_selection_method": observation_creation_method,
142
+ "percentage": second_trace_percentage,
143
+ "is_consecutive": second_is_consecutive,
144
+ "save_fig": False,
145
+ }
146
+ while second_observation == []:
147
+ if env_prop.use_goal_directed_problem() == True:
148
+ second_agent_kwargs["goal_directed_problem"] = second_agent_goal
149
+ else:
150
+ second_agent_kwargs["goal_directed_goal"] = second_agent_goal
151
+ second_observation = agents[0].agent.generate_partial_observation(
152
+ **second_agent_kwargs
153
+ )
154
+ second_observation = agents[0].agent.simplify_observation(
155
+ second_observation
156
+ )
157
+ else:
158
+ is_same_goal = (
159
+ np.random.choice(
160
+ [1, 0],
161
+ 1,
162
+ p=[1 / max(len(agents), 6), 1 - 1 / max(len(agents), 6)],
163
+ )
164
+ )[0]
165
+ first_is_consecutive = np.random.choice([True, False], 1, p=[0.5, 0.5])[
166
+ 0
167
+ ]
168
+ first_agent = np.random.choice(agents)
169
+ first_trace_percentage = random.choice(
170
+ [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
171
+ )
172
+ first_observation = first_agent.agent.generate_partial_observation(
173
+ action_selection_method=observation_creation_method,
174
+ percentage=first_trace_percentage,
175
+ is_consecutive=first_is_consecutive,
176
+ save_fig=False,
177
+ random_optimalism=True,
178
+ )
179
+ first_observation = first_agent.agent.simplify_observation(
180
+ first_observation
181
+ )
182
+
183
+ second_agent = first_agent
184
+ if not is_same_goal:
185
+ second_agent = np.random.choice(
186
+ [agent for agent in agents if agent != first_agent]
187
+ )
188
+ assert second_agent != first_agent
189
+ second_is_consecutive = np.random.choice(
190
+ [True, False], 1, p=[0.5, 0.5]
191
+ )[0]
192
+ second_trace_percentage = first_trace_percentage
193
+ second_observation = second_agent.agent.generate_partial_observation(
194
+ action_selection_method=observation_creation_method,
195
+ percentage=second_trace_percentage,
196
+ is_consecutive=second_is_consecutive,
197
+ save_fig=False,
198
+ random_optimalism=True,
199
+ )
200
+ second_observation = second_agent.agent.simplify_observation(
201
+ second_observation
202
+ )
203
+ if is_same_goal:
204
+ observations_distance = measure_average_sequence_distance(
205
+ first_observation, second_observation
206
+ ) # for debugging mate
207
+ all_samples.append(
208
+ (
209
+ [
210
+ torch.tensor(observation, dtype=torch.float32)
211
+ for observation in first_observation
212
+ ],
213
+ [
214
+ torch.tensor(observation, dtype=torch.float32)
215
+ for observation in second_observation
216
+ ],
217
+ torch.tensor(is_same_goal, dtype=torch.float32),
218
+ )
219
+ )
220
+ # all_samples.append((first_observation, second_observation, torch.tensor(is_same_goal, dtype=torch.float32)))
221
+ if i % 1000 == 0:
222
+ print(f"generated {i} samples")
223
+
224
+ total_samples = len(all_samples)
225
+ train_size = int(0.8 * total_samples)
226
+ train_samples = all_samples[:train_size]
227
+ dev_samples = all_samples[train_size:]
228
+ with open(dataset_train_path, "wb") as train_file:
229
+ dill.dump(train_samples, train_file)
230
+ with open(dataset_dev_path, "wb") as dev_file:
231
+ dill.dump(dev_samples, dev_file)
232
+
233
+ return train_samples, dev_samples