gr-libs 0.1.7.post0__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gr_libs/__init__.py +4 -1
- gr_libs/_evaluation/__init__.py +1 -0
- gr_libs/_evaluation/_analyze_results_cross_alg_cross_domain.py +260 -0
- gr_libs/_evaluation/_generate_experiments_results.py +141 -0
- gr_libs/_evaluation/_generate_task_specific_statistics_plots.py +497 -0
- gr_libs/_evaluation/_get_plans_images.py +61 -0
- gr_libs/_evaluation/_increasing_and_decreasing_.py +106 -0
- gr_libs/_version.py +2 -2
- gr_libs/all_experiments.py +294 -0
- gr_libs/environment/__init__.py +30 -9
- gr_libs/environment/_utils/utils.py +27 -0
- gr_libs/environment/environment.py +417 -54
- gr_libs/metrics/__init__.py +7 -0
- gr_libs/metrics/metrics.py +231 -54
- gr_libs/ml/__init__.py +2 -5
- gr_libs/ml/agent.py +21 -6
- gr_libs/ml/base/__init__.py +3 -1
- gr_libs/ml/base/rl_agent.py +81 -13
- gr_libs/ml/consts.py +1 -1
- gr_libs/ml/neural/__init__.py +1 -3
- gr_libs/ml/neural/deep_rl_learner.py +619 -378
- gr_libs/ml/neural/utils/__init__.py +1 -2
- gr_libs/ml/neural/utils/dictlist.py +3 -3
- gr_libs/ml/planner/mcts/{utils → _utils}/__init__.py +1 -1
- gr_libs/ml/planner/mcts/{utils → _utils}/node.py +11 -7
- gr_libs/ml/planner/mcts/{utils → _utils}/tree.py +15 -11
- gr_libs/ml/planner/mcts/mcts_model.py +571 -312
- gr_libs/ml/sequential/__init__.py +0 -1
- gr_libs/ml/sequential/_lstm_model.py +270 -0
- gr_libs/ml/tabular/__init__.py +1 -3
- gr_libs/ml/tabular/state.py +7 -7
- gr_libs/ml/tabular/tabular_q_learner.py +150 -82
- gr_libs/ml/tabular/tabular_rl_agent.py +42 -28
- gr_libs/ml/utils/__init__.py +2 -3
- gr_libs/ml/utils/format.py +28 -97
- gr_libs/ml/utils/math.py +5 -3
- gr_libs/ml/utils/other.py +3 -3
- gr_libs/ml/utils/storage.py +88 -81
- gr_libs/odgr_executor.py +268 -0
- gr_libs/problems/consts.py +1549 -1227
- gr_libs/recognizer/_utils/__init__.py +0 -0
- gr_libs/recognizer/_utils/format.py +18 -0
- gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +233 -88
- gr_libs/recognizer/graml/_gr_dataset.py +233 -0
- gr_libs/recognizer/graml/graml_recognizer.py +586 -252
- gr_libs/recognizer/recognizer.py +90 -30
- gr_libs/tutorials/draco_panda_tutorial.py +58 -0
- gr_libs/tutorials/draco_parking_tutorial.py +56 -0
- gr_libs/tutorials/gcdraco_panda_tutorial.py +62 -0
- gr_libs/tutorials/gcdraco_parking_tutorial.py +57 -0
- gr_libs/tutorials/graml_minigrid_tutorial.py +64 -0
- gr_libs/tutorials/graml_panda_tutorial.py +57 -0
- gr_libs/tutorials/graml_parking_tutorial.py +52 -0
- gr_libs/tutorials/graml_point_maze_tutorial.py +60 -0
- gr_libs/tutorials/graql_minigrid_tutorial.py +50 -0
- {gr_libs-0.1.7.post0.dist-info → gr_libs-0.2.2.dist-info}/METADATA +84 -29
- gr_libs-0.2.2.dist-info/RECORD +71 -0
- {gr_libs-0.1.7.post0.dist-info → gr_libs-0.2.2.dist-info}/WHEEL +1 -1
- gr_libs-0.2.2.dist-info/top_level.txt +2 -0
- tests/test_draco.py +14 -0
- tests/test_gcdraco.py +10 -0
- tests/test_graml.py +12 -8
- tests/test_graql.py +3 -2
- evaluation/analyze_results_cross_alg_cross_domain.py +0 -277
- evaluation/create_minigrid_map_image.py +0 -34
- evaluation/file_system.py +0 -42
- evaluation/generate_experiments_results.py +0 -92
- evaluation/generate_experiments_results_new_ver1.py +0 -254
- evaluation/generate_experiments_results_new_ver2.py +0 -331
- evaluation/generate_task_specific_statistics_plots.py +0 -272
- evaluation/get_plans_images.py +0 -47
- evaluation/increasing_and_decreasing_.py +0 -63
- gr_libs/environment/utils/utils.py +0 -17
- gr_libs/ml/neural/utils/penv.py +0 -57
- gr_libs/ml/sequential/lstm_model.py +0 -192
- gr_libs/recognizer/graml/gr_dataset.py +0 -134
- gr_libs/recognizer/utils/__init__.py +0 -1
- gr_libs/recognizer/utils/format.py +0 -13
- gr_libs-0.1.7.post0.dist-info/RECORD +0 -67
- gr_libs-0.1.7.post0.dist-info/top_level.txt +0 -4
- tutorials/graml_minigrid_tutorial.py +0 -34
- tutorials/graml_panda_tutorial.py +0 -41
- tutorials/graml_parking_tutorial.py +0 -39
- tutorials/graml_point_maze_tutorial.py +0 -39
- tutorials/graql_minigrid_tutorial.py +0 -34
- /gr_libs/environment/{utils → _utils}/__init__.py +0 -0
File without changes
|
@@ -0,0 +1,18 @@
|
|
1
|
+
from gr_libs.recognizer.gr_as_rl.gr_as_rl_recognizer import Draco, GCDraco, Graql
|
2
|
+
from gr_libs.recognizer.graml.graml_recognizer import (
|
3
|
+
ExpertBasedGraml,
|
4
|
+
GCGraml,
|
5
|
+
MCTSBasedGraml,
|
6
|
+
)
|
7
|
+
|
8
|
+
|
9
|
+
def recognizer_str_to_obj(recognizer_str: str):
|
10
|
+
recognizer_map = {
|
11
|
+
"GCGraml": GCGraml,
|
12
|
+
"ExpertBasedGraml": ExpertBasedGraml,
|
13
|
+
"MCTSBasedGraml": MCTSBasedGraml,
|
14
|
+
"Graql": Graql,
|
15
|
+
"Draco": Draco,
|
16
|
+
"GCDraco": GCDraco,
|
17
|
+
}
|
18
|
+
return recognizer_map.get(recognizer_str)
|
@@ -1,102 +1,247 @@
|
|
1
|
-
from abc import abstractmethod
|
2
1
|
import os
|
2
|
+
|
3
3
|
import dill
|
4
|
-
from typing import List, Type
|
5
4
|
import numpy as np
|
6
|
-
|
7
|
-
from gr_libs.
|
8
|
-
from gr_libs.metrics.metrics import kl_divergence_norm_softmax, mean_wasserstein_distance
|
5
|
+
|
6
|
+
from gr_libs.metrics.metrics import kl_divergence_norm_softmax
|
9
7
|
from gr_libs.ml.base import RLAgent
|
10
8
|
from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
|
11
9
|
from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
|
12
10
|
from gr_libs.ml.utils.storage import get_gr_as_rl_experiment_confidence_path
|
13
|
-
from gr_libs.recognizer.recognizer import
|
11
|
+
from gr_libs.recognizer.recognizer import (
|
12
|
+
GaAdaptingRecognizer,
|
13
|
+
GaAgentTrainerRecognizer,
|
14
|
+
LearningRecognizer,
|
15
|
+
Recognizer,
|
16
|
+
)
|
17
|
+
|
14
18
|
|
15
19
|
class GRAsRL(Recognizer):
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
20
|
+
"""
|
21
|
+
GRAsRL class represents a goal recognition framework that using reinforcement learning.
|
22
|
+
It inherits from the Recognizer class and implements the goal recognition process, including the
|
23
|
+
Goal adaptation and the inference phase. It trains agents for each new goal, which makes it impractical
|
24
|
+
for realtime environments where goals mmight change.
|
25
|
+
|
26
|
+
Attributes:
|
27
|
+
agents (dict): A dictionary that maps problem names to RLAgent instances.
|
28
|
+
active_goals (List[str]): A list of active goals.
|
29
|
+
active_problems (List[str]): A list of active problem names.
|
30
|
+
action_space (gym.Space): The action space of the RLAgent.
|
31
|
+
|
32
|
+
Methods:
|
33
|
+
goals_adaptation_phase: Performs the goals adaptation phase.
|
34
|
+
prepare_inf_sequence: Prepares the inference sequence for goal-directed problems.
|
35
|
+
inference_phase: Performs the inference phase and returns the recognized goal.
|
36
|
+
choose_agent: Returns the RLAgent for a given problem name.
|
37
|
+
"""
|
38
|
+
|
39
|
+
def __init__(self, *args, **kwargs):
|
40
|
+
super().__init__(*args, **kwargs)
|
41
|
+
self.agents = {} # consider changing to ContextualAgent
|
42
|
+
|
43
|
+
def goals_adaptation_phase(self, dynamic_goals: list[str], dynamic_train_configs):
|
44
|
+
"""
|
45
|
+
Performs the goals adaptation phase.
|
46
|
+
|
47
|
+
Args:
|
48
|
+
dynamic_goals (List[str]): A list of dynamic goals.
|
49
|
+
dynamic_train_configs: The dynamic training configurations.
|
50
|
+
|
51
|
+
Returns:
|
52
|
+
None
|
53
|
+
"""
|
54
|
+
super().goals_adaptation_phase(dynamic_goals, dynamic_train_configs)
|
55
|
+
dynamic_goals_problems = [
|
56
|
+
self.env_prop.goal_to_problem_str(goal) for goal in dynamic_goals
|
57
|
+
]
|
58
|
+
self.active_goals = dynamic_goals
|
59
|
+
self.active_problems = dynamic_goals_problems
|
60
|
+
for problem_name, config in zip(dynamic_goals_problems, dynamic_train_configs):
|
61
|
+
agent_kwargs = {
|
62
|
+
"domain_name": self.env_prop.domain_name,
|
63
|
+
"problem_name": problem_name,
|
64
|
+
"env_prop": self.env_prop,
|
65
|
+
}
|
66
|
+
if config[0]:
|
67
|
+
agent_kwargs["algorithm"] = config[0]
|
68
|
+
if config[1]:
|
69
|
+
agent_kwargs["num_timesteps"] = config[1]
|
70
|
+
agent = self.rl_agent_type(**agent_kwargs)
|
71
|
+
agent.learn()
|
72
|
+
self.agents[problem_name] = agent
|
73
|
+
self.action_space = next(iter(self.agents.values())).env.action_space
|
74
|
+
|
75
|
+
def prepare_inf_sequence(self, problem_name: str, inf_sequence):
|
76
|
+
"""
|
77
|
+
Prepares the inference sequence for goal-directed problems.
|
78
|
+
|
79
|
+
Args:
|
80
|
+
problem_name (str): The name of the problem.
|
81
|
+
inf_sequence: The inference sequence.
|
82
|
+
|
83
|
+
Returns:
|
84
|
+
The prepared inference sequence.
|
85
|
+
"""
|
86
|
+
if not self.env_prop.use_goal_directed_problem():
|
87
|
+
for obs in inf_sequence:
|
88
|
+
obs[0]["desired_goal"] = np.array(
|
89
|
+
[self.env_prop.str_to_goal(problem_name)],
|
90
|
+
dtype=obs[0]["desired_goal"].dtype,
|
91
|
+
)
|
92
|
+
return inf_sequence
|
93
|
+
return inf_sequence
|
94
|
+
|
95
|
+
def inference_phase(self, inf_sequence, true_goal, percentage) -> str:
|
96
|
+
"""
|
97
|
+
Performs the inference phase and returns the recognized goal.
|
98
|
+
|
99
|
+
Args:
|
100
|
+
inf_sequence: The inference sequence.
|
101
|
+
true_goal: The true goal.
|
102
|
+
percentage: The percentage.
|
103
|
+
|
104
|
+
Returns:
|
105
|
+
The recognized goal as a string.
|
106
|
+
"""
|
107
|
+
scores = []
|
108
|
+
for problem_name in self.active_problems:
|
109
|
+
agent = self.choose_agent(problem_name)
|
110
|
+
if self.env_prop.gc_adaptable():
|
111
|
+
inf_sequence = self.prepare_inf_sequence(problem_name, inf_sequence)
|
112
|
+
score = self.evaluation_function(inf_sequence, agent, self.action_space)
|
113
|
+
scores.append(score)
|
114
|
+
|
115
|
+
if self.collect_statistics:
|
116
|
+
results_path = get_gr_as_rl_experiment_confidence_path(
|
117
|
+
domain_name=self.env_prop.domain_name,
|
118
|
+
env_name=self.env_prop.name,
|
119
|
+
recognizer=self.__class__.__name__,
|
120
|
+
)
|
121
|
+
if not os.path.exists(results_path):
|
122
|
+
os.makedirs(results_path)
|
123
|
+
with open(
|
124
|
+
results_path + f"/true_{true_goal}_{percentage}_scores.pkl", "wb"
|
125
|
+
) as scores_file:
|
126
|
+
dill.dump(
|
127
|
+
[
|
128
|
+
(str(goal), score)
|
129
|
+
for (goal, score) in zip(self.active_goals, scores)
|
130
|
+
],
|
131
|
+
scores_file,
|
132
|
+
)
|
133
|
+
div, true_goal_index = min((div, goal) for (goal, div) in enumerate(scores))
|
134
|
+
return str(self.active_goals[true_goal_index])
|
135
|
+
|
136
|
+
def choose_agent(self, problem_name: str) -> RLAgent:
|
137
|
+
"""
|
138
|
+
Returns the RLAgent for a given problem name.
|
139
|
+
|
140
|
+
Args:
|
141
|
+
problem_name (str): The name of the problem.
|
142
|
+
|
143
|
+
Returns:
|
144
|
+
The RLAgent instance.
|
145
|
+
"""
|
146
|
+
return self.agents[problem_name]
|
55
147
|
|
56
148
|
|
57
149
|
class Graql(GRAsRL, GaAgentTrainerRecognizer):
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
150
|
+
"""
|
151
|
+
Graql extends the GRAsRL framework and GaAgentTrainerRecognizer, since it trains new agents for every new goal and it adheres
|
152
|
+
to the goal recognition as reinforcement learning framework. It uses a tabular Q-learning agent for discrete state and action spaces.
|
153
|
+
"""
|
154
|
+
|
155
|
+
def __init__(self, *args, **kwargs):
|
156
|
+
super().__init__(*args, **kwargs)
|
157
|
+
assert (
|
158
|
+
not self.env_prop.gc_adaptable()
|
159
|
+
and self.env_prop.is_state_discrete()
|
160
|
+
and self.env_prop.is_action_discrete()
|
161
|
+
)
|
162
|
+
if self.rl_agent_type is None:
|
163
|
+
self.rl_agent_type = TabularQLearner
|
164
|
+
self.evaluation_function = kl_divergence_norm_softmax
|
165
|
+
|
63
166
|
|
64
167
|
class Draco(GRAsRL, GaAgentTrainerRecognizer):
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
168
|
+
"""
|
169
|
+
Draco class represents a recognizer agent trained using the GRAsRL framework.
|
170
|
+
Like Graql, it trains new agents for every new goal and adheres to the goal recognition as reinforcement learning framework.
|
171
|
+
It uses a deep reinforcement learning agent for continuous state and action spaces.
|
172
|
+
|
173
|
+
Args:
|
174
|
+
*args: Variable length argument list.
|
175
|
+
**kwargs: Arbitrary keyword arguments.
|
176
|
+
|
177
|
+
Attributes:
|
178
|
+
rl_agent_type (type): Type of the reinforcement learning agent.
|
179
|
+
evaluation_function (callable): Function used for evaluation.
|
180
|
+
|
181
|
+
"""
|
182
|
+
|
183
|
+
def __init__(self, *args, **kwargs):
|
184
|
+
super().__init__(*args, **kwargs)
|
185
|
+
# Add any additional initialization code here
|
186
|
+
|
187
|
+
def __init__(self, *args, **kwargs):
|
188
|
+
super().__init__(*args, **kwargs)
|
189
|
+
assert (
|
190
|
+
not self.env_prop.is_state_discrete()
|
191
|
+
and not self.env_prop.is_action_discrete()
|
192
|
+
)
|
193
|
+
if self.rl_agent_type == None:
|
194
|
+
self.rl_agent_type = DeepRLAgent
|
195
|
+
self.evaluation_function = kwargs.get("evaluation_function")
|
196
|
+
assert callable(
|
197
|
+
self.evaluation_function
|
198
|
+
), "Evaluation function must be a callable function."
|
199
|
+
|
200
|
+
|
201
|
+
class GCDraco(GRAsRL, LearningRecognizer, GaAdaptingRecognizer):
|
202
|
+
"""
|
203
|
+
GCDraco recognizer uses goal-conditioned reinforcement learning using the Draco algorithm.
|
204
|
+
It inherits from GRAsRL, LearningRecognizer, and GaAdaptingRecognizer.
|
205
|
+
It is designed for environments with continuous state and action spaces.
|
206
|
+
It uses a goal-conditioned deep reinforcement learning agent for training and inference, which
|
207
|
+
enables it to adapt to new goals during the goal adaptation phase without requiring retraining,
|
208
|
+
making it suitable for dynamic environments.
|
209
|
+
"""
|
210
|
+
|
211
|
+
def __init__(self, *args, **kwargs):
|
212
|
+
super().__init__(*args, **kwargs)
|
213
|
+
assert (
|
214
|
+
self.env_prop.gc_adaptable()
|
215
|
+
and not self.env_prop.is_state_discrete()
|
216
|
+
and not self.env_prop.is_action_discrete()
|
217
|
+
)
|
218
|
+
if self.rl_agent_type == None:
|
219
|
+
self.rl_agent_type = GCDeepRLAgent
|
220
|
+
self.evaluation_function = kwargs.get("evaluation_function")
|
221
|
+
assert callable(
|
222
|
+
self.evaluation_function
|
223
|
+
), "Evaluation function must be a callable function."
|
224
|
+
|
225
|
+
def domain_learning_phase(self, base_goals: list[str], train_configs):
|
226
|
+
super().domain_learning_phase(base_goals, train_configs)
|
227
|
+
agent_kwargs = {
|
228
|
+
"domain_name": self.env_prop.domain_name,
|
229
|
+
"problem_name": self.env_prop.name,
|
230
|
+
"algorithm": self.original_train_configs[0][0],
|
231
|
+
"num_timesteps": self.original_train_configs[0][1],
|
232
|
+
"env_prop": self.env_prop,
|
233
|
+
}
|
234
|
+
agent = self.rl_agent_type(**agent_kwargs)
|
235
|
+
agent.learn()
|
236
|
+
self.agents[self.env_prop.name] = agent
|
237
|
+
self.action_space = agent.env.action_space
|
238
|
+
|
239
|
+
# this method currently does nothing but optimizations can be made here.
|
240
|
+
def goals_adaptation_phase(self, dynamic_goals):
|
241
|
+
self.active_goals = dynamic_goals
|
242
|
+
self.active_problems = [
|
243
|
+
self.env_prop.goal_to_problem_str(goal) for goal in dynamic_goals
|
244
|
+
]
|
245
|
+
|
246
|
+
def choose_agent(self, problem_name: str) -> RLAgent:
|
247
|
+
return next(iter(self.agents.values()))
|
@@ -0,0 +1,233 @@
|
|
1
|
+
import os
|
2
|
+
import random
|
3
|
+
from types import MethodType
|
4
|
+
|
5
|
+
import dill
|
6
|
+
import numpy as np
|
7
|
+
import torch
|
8
|
+
from torch.utils.data import Dataset
|
9
|
+
|
10
|
+
from gr_libs.environment.environment import EnvProperty
|
11
|
+
from gr_libs.metrics.metrics import measure_average_sequence_distance
|
12
|
+
from gr_libs.ml.base.rl_agent import ContextualAgent
|
13
|
+
from gr_libs.ml.utils import get_siamese_dataset_path
|
14
|
+
|
15
|
+
|
16
|
+
class GRDataset(Dataset):
|
17
|
+
def __init__(self, num_samples, samples):
|
18
|
+
self.num_samples = num_samples
|
19
|
+
self.samples = samples
|
20
|
+
|
21
|
+
def __len__(self):
|
22
|
+
return self.num_samples
|
23
|
+
|
24
|
+
def __getitem__(self, idx):
|
25
|
+
return self.samples[
|
26
|
+
idx
|
27
|
+
] # returns a tuple - as appended in 'generate_dataset' last line
|
28
|
+
|
29
|
+
|
30
|
+
def check_diff_goals(first_agent_goal, second_agent_goal):
|
31
|
+
try:
|
32
|
+
assert first_agent_goal != second_agent_goal
|
33
|
+
except Exception:
|
34
|
+
try:
|
35
|
+
assert any(first_agent_goal != second_agent_goal)
|
36
|
+
except Exception:
|
37
|
+
for arr1, arr2 in zip(first_agent_goal, second_agent_goal):
|
38
|
+
assert any(elm1 != elm2 for elm1, elm2 in zip(arr1, arr2))
|
39
|
+
|
40
|
+
|
41
|
+
def generate_datasets(
|
42
|
+
num_samples,
|
43
|
+
agents: list[ContextualAgent],
|
44
|
+
observation_creation_method: MethodType,
|
45
|
+
problems: list[str],
|
46
|
+
env_prop: EnvProperty,
|
47
|
+
recognizer_name: str,
|
48
|
+
gc_goal_set=None,
|
49
|
+
):
|
50
|
+
if gc_goal_set:
|
51
|
+
model_name = env_prop.name
|
52
|
+
else:
|
53
|
+
model_name = env_prop.problem_list_to_str_tuple(problems)
|
54
|
+
dataset_directory = get_siamese_dataset_path(
|
55
|
+
domain_name=env_prop.domain_name,
|
56
|
+
env_name=env_prop.name,
|
57
|
+
model_name=model_name,
|
58
|
+
recognizer=recognizer_name,
|
59
|
+
)
|
60
|
+
dataset_train_path, dataset_dev_path = os.path.join(
|
61
|
+
dataset_directory, "train.pkl"
|
62
|
+
), os.path.join(dataset_directory, "dev.pkl")
|
63
|
+
if os.path.exists(dataset_train_path) and os.path.exists(dataset_dev_path):
|
64
|
+
print(f"Loading pre-existing datasets in {dataset_directory}")
|
65
|
+
with open(dataset_train_path, "rb") as train_file:
|
66
|
+
train_samples = dill.load(train_file)
|
67
|
+
with open(dataset_dev_path, "rb") as dev_file:
|
68
|
+
dev_samples = dill.load(dev_file)
|
69
|
+
else:
|
70
|
+
print(f"{dataset_directory} doesn't exist, generating datasets")
|
71
|
+
if not os.path.exists(dataset_directory):
|
72
|
+
os.makedirs(dataset_directory)
|
73
|
+
all_samples = []
|
74
|
+
for i in range(num_samples):
|
75
|
+
if (
|
76
|
+
gc_goal_set != None
|
77
|
+
): # TODO change to having one flow for both cases and injecting according to gc_goal_set or not
|
78
|
+
assert (
|
79
|
+
env_prop.gc_adaptable() == True
|
80
|
+
), "shouldn't specify a goal directed representation if not generating datasets with a general agent."
|
81
|
+
is_same_goal = (
|
82
|
+
np.random.choice(
|
83
|
+
[1, 0],
|
84
|
+
1,
|
85
|
+
p=[
|
86
|
+
1 / max(len(gc_goal_set), 6),
|
87
|
+
1 - 1 / max(len(gc_goal_set), 6),
|
88
|
+
],
|
89
|
+
)
|
90
|
+
)[0]
|
91
|
+
first_is_consecutive = np.random.choice([True, False], 1, p=[0.5, 0.5])[
|
92
|
+
0
|
93
|
+
]
|
94
|
+
first_random_index = np.random.randint(
|
95
|
+
0, len(gc_goal_set)
|
96
|
+
) # works for lists of every object type, while np.choice only works for 1d arrays
|
97
|
+
first_agent_goal = gc_goal_set[
|
98
|
+
first_random_index
|
99
|
+
] # could be either a real goal or a goal-directed problem name
|
100
|
+
# first_agent_goal = np.random.choice(gc_goal_set)
|
101
|
+
first_trace_percentage = random.choice(
|
102
|
+
[0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
|
103
|
+
)
|
104
|
+
first_observation = []
|
105
|
+
first_agent_kwargs = {
|
106
|
+
"action_selection_method": observation_creation_method,
|
107
|
+
"percentage": first_trace_percentage,
|
108
|
+
"is_consecutive": first_is_consecutive,
|
109
|
+
"save_fig": False,
|
110
|
+
}
|
111
|
+
while first_observation == []:
|
112
|
+
# needs to be different than agents[0] problem_name, it should be from the gc_goal_set.
|
113
|
+
# but the problem is with the panda because it
|
114
|
+
if env_prop.use_goal_directed_problem():
|
115
|
+
first_agent_kwargs["goal_directed_problem"] = first_agent_goal
|
116
|
+
else:
|
117
|
+
first_agent_kwargs["goal_directed_goal"] = first_agent_goal
|
118
|
+
first_observation = agents[0].agent.generate_partial_observation(
|
119
|
+
**first_agent_kwargs
|
120
|
+
)
|
121
|
+
first_observation = agents[0].agent.simplify_observation(
|
122
|
+
first_observation
|
123
|
+
)
|
124
|
+
|
125
|
+
second_is_consecutive = np.random.choice(
|
126
|
+
[True, False], 1, p=[0.5, 0.5]
|
127
|
+
)[0]
|
128
|
+
second_agent_goal = first_agent_goal
|
129
|
+
second_random_index = first_random_index
|
130
|
+
if not is_same_goal:
|
131
|
+
second_random_index = np.random.choice(
|
132
|
+
[i for i in range(len(gc_goal_set)) if i != first_random_index]
|
133
|
+
)
|
134
|
+
assert first_random_index != second_random_index
|
135
|
+
second_agent_goal = gc_goal_set[second_random_index]
|
136
|
+
if not is_same_goal:
|
137
|
+
check_diff_goals(first_agent_goal, second_agent_goal)
|
138
|
+
second_trace_percentage = first_trace_percentage
|
139
|
+
second_observation = []
|
140
|
+
second_agent_kwargs = {
|
141
|
+
"action_selection_method": observation_creation_method,
|
142
|
+
"percentage": second_trace_percentage,
|
143
|
+
"is_consecutive": second_is_consecutive,
|
144
|
+
"save_fig": False,
|
145
|
+
}
|
146
|
+
while second_observation == []:
|
147
|
+
if env_prop.use_goal_directed_problem() == True:
|
148
|
+
second_agent_kwargs["goal_directed_problem"] = second_agent_goal
|
149
|
+
else:
|
150
|
+
second_agent_kwargs["goal_directed_goal"] = second_agent_goal
|
151
|
+
second_observation = agents[0].agent.generate_partial_observation(
|
152
|
+
**second_agent_kwargs
|
153
|
+
)
|
154
|
+
second_observation = agents[0].agent.simplify_observation(
|
155
|
+
second_observation
|
156
|
+
)
|
157
|
+
else:
|
158
|
+
is_same_goal = (
|
159
|
+
np.random.choice(
|
160
|
+
[1, 0],
|
161
|
+
1,
|
162
|
+
p=[1 / max(len(agents), 6), 1 - 1 / max(len(agents), 6)],
|
163
|
+
)
|
164
|
+
)[0]
|
165
|
+
first_is_consecutive = np.random.choice([True, False], 1, p=[0.5, 0.5])[
|
166
|
+
0
|
167
|
+
]
|
168
|
+
first_agent = np.random.choice(agents)
|
169
|
+
first_trace_percentage = random.choice(
|
170
|
+
[0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
|
171
|
+
)
|
172
|
+
first_observation = first_agent.agent.generate_partial_observation(
|
173
|
+
action_selection_method=observation_creation_method,
|
174
|
+
percentage=first_trace_percentage,
|
175
|
+
is_consecutive=first_is_consecutive,
|
176
|
+
save_fig=False,
|
177
|
+
random_optimalism=True,
|
178
|
+
)
|
179
|
+
first_observation = first_agent.agent.simplify_observation(
|
180
|
+
first_observation
|
181
|
+
)
|
182
|
+
|
183
|
+
second_agent = first_agent
|
184
|
+
if not is_same_goal:
|
185
|
+
second_agent = np.random.choice(
|
186
|
+
[agent for agent in agents if agent != first_agent]
|
187
|
+
)
|
188
|
+
assert second_agent != first_agent
|
189
|
+
second_is_consecutive = np.random.choice(
|
190
|
+
[True, False], 1, p=[0.5, 0.5]
|
191
|
+
)[0]
|
192
|
+
second_trace_percentage = first_trace_percentage
|
193
|
+
second_observation = second_agent.agent.generate_partial_observation(
|
194
|
+
action_selection_method=observation_creation_method,
|
195
|
+
percentage=second_trace_percentage,
|
196
|
+
is_consecutive=second_is_consecutive,
|
197
|
+
save_fig=False,
|
198
|
+
random_optimalism=True,
|
199
|
+
)
|
200
|
+
second_observation = second_agent.agent.simplify_observation(
|
201
|
+
second_observation
|
202
|
+
)
|
203
|
+
if is_same_goal:
|
204
|
+
observations_distance = measure_average_sequence_distance(
|
205
|
+
first_observation, second_observation
|
206
|
+
) # for debugging mate
|
207
|
+
all_samples.append(
|
208
|
+
(
|
209
|
+
[
|
210
|
+
torch.tensor(observation, dtype=torch.float32)
|
211
|
+
for observation in first_observation
|
212
|
+
],
|
213
|
+
[
|
214
|
+
torch.tensor(observation, dtype=torch.float32)
|
215
|
+
for observation in second_observation
|
216
|
+
],
|
217
|
+
torch.tensor(is_same_goal, dtype=torch.float32),
|
218
|
+
)
|
219
|
+
)
|
220
|
+
# all_samples.append((first_observation, second_observation, torch.tensor(is_same_goal, dtype=torch.float32)))
|
221
|
+
if i % 1000 == 0:
|
222
|
+
print(f"generated {i} samples")
|
223
|
+
|
224
|
+
total_samples = len(all_samples)
|
225
|
+
train_size = int(0.8 * total_samples)
|
226
|
+
train_samples = all_samples[:train_size]
|
227
|
+
dev_samples = all_samples[train_size:]
|
228
|
+
with open(dataset_train_path, "wb") as train_file:
|
229
|
+
dill.dump(train_samples, train_file)
|
230
|
+
with open(dataset_dev_path, "wb") as dev_file:
|
231
|
+
dill.dump(dev_samples, dev_file)
|
232
|
+
|
233
|
+
return train_samples, dev_samples
|