gr-libs 0.1.7.post0__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evaluation/analyze_results_cross_alg_cross_domain.py +236 -246
- evaluation/create_minigrid_map_image.py +10 -6
- evaluation/file_system.py +16 -5
- evaluation/generate_experiments_results.py +123 -74
- evaluation/generate_experiments_results_new_ver1.py +227 -243
- evaluation/generate_experiments_results_new_ver2.py +317 -317
- evaluation/generate_task_specific_statistics_plots.py +481 -253
- evaluation/get_plans_images.py +41 -26
- evaluation/increasing_and_decreasing_.py +97 -56
- gr_libs/__init__.py +2 -1
- gr_libs/_version.py +2 -2
- gr_libs/environment/__init__.py +16 -8
- gr_libs/environment/environment.py +167 -39
- gr_libs/environment/utils/utils.py +22 -12
- gr_libs/metrics/__init__.py +5 -0
- gr_libs/metrics/metrics.py +76 -34
- gr_libs/ml/__init__.py +2 -0
- gr_libs/ml/agent.py +21 -6
- gr_libs/ml/base/__init__.py +1 -1
- gr_libs/ml/base/rl_agent.py +13 -10
- gr_libs/ml/consts.py +1 -1
- gr_libs/ml/neural/deep_rl_learner.py +433 -352
- gr_libs/ml/neural/utils/__init__.py +1 -1
- gr_libs/ml/neural/utils/dictlist.py +3 -3
- gr_libs/ml/neural/utils/penv.py +5 -2
- gr_libs/ml/planner/mcts/mcts_model.py +524 -302
- gr_libs/ml/planner/mcts/utils/__init__.py +1 -1
- gr_libs/ml/planner/mcts/utils/node.py +11 -7
- gr_libs/ml/planner/mcts/utils/tree.py +14 -10
- gr_libs/ml/sequential/__init__.py +1 -1
- gr_libs/ml/sequential/lstm_model.py +256 -175
- gr_libs/ml/tabular/state.py +7 -7
- gr_libs/ml/tabular/tabular_q_learner.py +123 -73
- gr_libs/ml/tabular/tabular_rl_agent.py +20 -19
- gr_libs/ml/utils/__init__.py +8 -2
- gr_libs/ml/utils/format.py +78 -70
- gr_libs/ml/utils/math.py +2 -1
- gr_libs/ml/utils/other.py +1 -1
- gr_libs/ml/utils/storage.py +88 -28
- gr_libs/problems/consts.py +1549 -1227
- gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +145 -80
- gr_libs/recognizer/graml/gr_dataset.py +209 -110
- gr_libs/recognizer/graml/graml_recognizer.py +431 -240
- gr_libs/recognizer/recognizer.py +38 -27
- gr_libs/recognizer/utils/__init__.py +1 -1
- gr_libs/recognizer/utils/format.py +8 -3
- {gr_libs-0.1.7.post0.dist-info → gr_libs-0.1.8.dist-info}/METADATA +1 -1
- gr_libs-0.1.8.dist-info/RECORD +70 -0
- {gr_libs-0.1.7.post0.dist-info → gr_libs-0.1.8.dist-info}/WHEEL +1 -1
- tests/test_gcdraco.py +10 -0
- tests/test_graml.py +8 -4
- tests/test_graql.py +2 -1
- tutorials/gcdraco_panda_tutorial.py +66 -0
- tutorials/gcdraco_parking_tutorial.py +61 -0
- tutorials/graml_minigrid_tutorial.py +42 -12
- tutorials/graml_panda_tutorial.py +35 -14
- tutorials/graml_parking_tutorial.py +37 -20
- tutorials/graml_point_maze_tutorial.py +33 -13
- tutorials/graql_minigrid_tutorial.py +31 -15
- gr_libs-0.1.7.post0.dist-info/RECORD +0 -67
- {gr_libs-0.1.7.post0.dist-info → gr_libs-0.1.8.dist-info}/top_level.txt +0 -0
@@ -1,102 +1,167 @@
|
|
1
1
|
from abc import abstractmethod
|
2
2
|
import os
|
3
3
|
import dill
|
4
|
-
from typing import List, Type
|
4
|
+
from typing import List, Type, Callable
|
5
5
|
import numpy as np
|
6
6
|
from gr_libs.environment.environment import EnvProperty, GCEnvProperty
|
7
7
|
from gr_libs.environment.utils.utils import domain_to_env_property
|
8
|
-
from gr_libs.metrics.metrics import
|
8
|
+
from gr_libs.metrics.metrics import (
|
9
|
+
kl_divergence_norm_softmax,
|
10
|
+
mean_wasserstein_distance,
|
11
|
+
)
|
9
12
|
from gr_libs.ml.base import RLAgent
|
10
13
|
from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
|
11
14
|
from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
|
12
15
|
from gr_libs.ml.utils.storage import get_gr_as_rl_experiment_confidence_path
|
13
|
-
from gr_libs.recognizer.recognizer import
|
16
|
+
from gr_libs.recognizer.recognizer import (
|
17
|
+
GaAdaptingRecognizer,
|
18
|
+
GaAgentTrainerRecognizer,
|
19
|
+
LearningRecognizer,
|
20
|
+
Recognizer,
|
21
|
+
)
|
22
|
+
|
14
23
|
|
15
24
|
class GRAsRL(Recognizer):
|
16
|
-
|
17
|
-
|
18
|
-
|
25
|
+
def __init__(self, *args, **kwargs):
|
26
|
+
super().__init__(*args, **kwargs)
|
27
|
+
self.agents = {} # consider changing to ContextualAgent
|
28
|
+
|
29
|
+
def goals_adaptation_phase(self, dynamic_goals: List[str], dynamic_train_configs):
|
30
|
+
super().goals_adaptation_phase(dynamic_goals, dynamic_train_configs)
|
31
|
+
dynamic_goals_problems = [
|
32
|
+
self.env_prop.goal_to_problem_str(goal) for goal in dynamic_goals
|
33
|
+
]
|
34
|
+
self.active_goals = dynamic_goals
|
35
|
+
self.active_problems = dynamic_goals_problems
|
36
|
+
for problem_name, config in zip(dynamic_goals_problems, dynamic_train_configs):
|
37
|
+
agent_kwargs = {
|
38
|
+
"domain_name": self.env_prop.domain_name,
|
39
|
+
"problem_name": problem_name,
|
40
|
+
"env_prop": self.env_prop,
|
41
|
+
}
|
42
|
+
if config[0]:
|
43
|
+
agent_kwargs["algorithm"] = config[0]
|
44
|
+
if config[1]:
|
45
|
+
agent_kwargs["num_timesteps"] = config[1]
|
46
|
+
agent = self.rl_agent_type(**agent_kwargs)
|
47
|
+
agent.learn()
|
48
|
+
self.agents[problem_name] = agent
|
49
|
+
self.action_space = next(iter(self.agents.values())).env.action_space
|
19
50
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
51
|
+
def inference_phase(self, inf_sequence, true_goal, percentage) -> str:
|
52
|
+
scores = []
|
53
|
+
for problem_name in self.active_problems:
|
54
|
+
agent = self.choose_agent(problem_name)
|
55
|
+
if self.env_prop.gc_adaptable():
|
56
|
+
assert (
|
57
|
+
self.__class__.__name__ == "GCDraco"
|
58
|
+
), "This recognizer is not compatible with goal conditioned problems."
|
59
|
+
inf_sequence = self.prepare_inf_sequence(problem_name, inf_sequence)
|
60
|
+
score = self.evaluation_function(inf_sequence, agent, self.action_space)
|
61
|
+
scores.append(score)
|
62
|
+
# scores = metrics.softmin(np.array(scores))
|
63
|
+
if self.collect_statistics:
|
64
|
+
results_path = get_gr_as_rl_experiment_confidence_path(
|
65
|
+
domain_name=self.env_prop.domain_name,
|
66
|
+
env_name=self.env_prop.name,
|
67
|
+
recognizer=self.__class__.__name__,
|
68
|
+
)
|
69
|
+
if not os.path.exists(results_path):
|
70
|
+
os.makedirs(results_path)
|
71
|
+
with open(
|
72
|
+
results_path + f"/true_{true_goal}_{percentage}_scores.pkl", "wb"
|
73
|
+
) as scores_file:
|
74
|
+
dill.dump(
|
75
|
+
[
|
76
|
+
(str(goal), score)
|
77
|
+
for (goal, score) in zip(self.active_goals, scores)
|
78
|
+
],
|
79
|
+
scores_file,
|
80
|
+
)
|
81
|
+
div, true_goal_index = min((div, goal) for (goal, div) in enumerate(scores))
|
82
|
+
return str(self.active_goals[true_goal_index])
|
34
83
|
|
35
|
-
|
36
|
-
|
37
|
-
for problem_name in self.active_problems:
|
38
|
-
agent = self.choose_agent(problem_name)
|
39
|
-
if self.env_prop.gc_adaptable():
|
40
|
-
assert self.__class__.__name__ == "GCDraco", "This recognizer is not compatible with goal conditioned problems."
|
41
|
-
inf_sequence = self.prepare_inf_sequence(problem_name, inf_sequence)
|
42
|
-
score = self.evaluation_function(inf_sequence, agent, self.action_space)
|
43
|
-
scores.append(score)
|
44
|
-
#scores = metrics.softmin(np.array(scores))
|
45
|
-
if self.collect_statistics:
|
46
|
-
results_path = get_gr_as_rl_experiment_confidence_path(domain_name=self.env_prop.domain_name, env_name=self.env_prop.name, recognizer=self.__class__.__name__)
|
47
|
-
if not os.path.exists(results_path): os.makedirs(results_path)
|
48
|
-
with open(results_path + f'/true_{true_goal}_{percentage}_scores.pkl', 'wb') as scores_file:
|
49
|
-
dill.dump([(str(goal), score) for (goal, score) in zip(self.active_goals, scores)], scores_file)
|
50
|
-
div, true_goal_index = min((div, goal) for (goal, div) in enumerate(scores))
|
51
|
-
return str(self.active_goals[true_goal_index])
|
52
|
-
|
53
|
-
def choose_agent(self, problem_name:str) -> RLAgent:
|
54
|
-
return self.agents[problem_name]
|
84
|
+
def choose_agent(self, problem_name: str) -> RLAgent:
|
85
|
+
return self.agents[problem_name]
|
55
86
|
|
56
87
|
|
57
88
|
class Graql(GRAsRL, GaAgentTrainerRecognizer):
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
89
|
+
def __init__(self, *args, **kwargs):
|
90
|
+
super().__init__(*args, **kwargs)
|
91
|
+
assert (
|
92
|
+
not self.env_prop.gc_adaptable()
|
93
|
+
and self.env_prop.is_state_discrete()
|
94
|
+
and self.env_prop.is_action_discrete()
|
95
|
+
)
|
96
|
+
if self.rl_agent_type == None:
|
97
|
+
self.rl_agent_type = TabularQLearner
|
98
|
+
self.evaluation_function = kl_divergence_norm_softmax
|
99
|
+
|
63
100
|
|
64
101
|
class Draco(GRAsRL, GaAgentTrainerRecognizer):
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
102
|
+
def __init__(self, *args, **kwargs):
|
103
|
+
super().__init__(*args, **kwargs)
|
104
|
+
assert (
|
105
|
+
not self.env_prop.is_state_discrete()
|
106
|
+
and not self.env_prop.is_action_discrete()
|
107
|
+
)
|
108
|
+
if self.rl_agent_type == None:
|
109
|
+
self.rl_agent_type = DeepRLAgent
|
110
|
+
self.evaluation_function = kwargs.get("evaluation_function")
|
111
|
+
assert (
|
112
|
+
self.evaluation_function is None
|
113
|
+
or type(self.evaluation_function) != Callable
|
114
|
+
)
|
115
|
+
|
116
|
+
|
117
|
+
class GCDraco(
|
118
|
+
GRAsRL, LearningRecognizer, GaAdaptingRecognizer
|
119
|
+
): # TODO problem: it gets 2 goal_adaptation phase from parents, one with configs and one without.
|
120
|
+
def __init__(self, *args, **kwargs):
|
121
|
+
super().__init__(*args, **kwargs)
|
122
|
+
assert (
|
123
|
+
self.env_prop.gc_adaptable()
|
124
|
+
and not self.env_prop.is_state_discrete()
|
125
|
+
and not self.env_prop.is_action_discrete()
|
126
|
+
)
|
127
|
+
if self.rl_agent_type == None:
|
128
|
+
self.rl_agent_type = GCDeepRLAgent
|
129
|
+
self.evaluation_function = kwargs.get("evaluation_function")
|
130
|
+
assert (
|
131
|
+
self.evaluation_function is None
|
132
|
+
or type(self.evaluation_function) != Callable
|
133
|
+
)
|
134
|
+
|
135
|
+
def domain_learning_phase(self, base_goals: List[str], train_configs):
|
136
|
+
super().domain_learning_phase(base_goals, train_configs)
|
137
|
+
agent_kwargs = {
|
138
|
+
"domain_name": self.env_prop.domain_name,
|
139
|
+
"problem_name": self.env_prop.name,
|
140
|
+
"algorithm": self.original_train_configs[0][0],
|
141
|
+
"num_timesteps": self.original_train_configs[0][1],
|
142
|
+
"env_prop": self.env_prop,
|
143
|
+
}
|
144
|
+
agent = self.rl_agent_type(**agent_kwargs)
|
145
|
+
agent.learn()
|
146
|
+
self.agents[self.env_prop.name] = agent
|
147
|
+
self.action_space = agent.env.action_space
|
70
148
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
149
|
+
# this method currently does nothing but optimizations can be made here.
|
150
|
+
def goals_adaptation_phase(self, dynamic_goals):
|
151
|
+
self.active_goals = dynamic_goals
|
152
|
+
self.active_problems = [
|
153
|
+
self.env_prop.goal_to_problem_str(goal) for goal in dynamic_goals
|
154
|
+
]
|
77
155
|
|
78
|
-
|
79
|
-
|
80
|
-
agent_kwargs = {"domain_name": self.env_prop.domain_name,
|
81
|
-
"problem_name": self.env_prop.name,
|
82
|
-
"algorithm": self.original_train_configs[0][0],
|
83
|
-
"num_timesteps": self.original_train_configs[0][1]}
|
84
|
-
agent = self.rl_agent_type(**agent_kwargs)
|
85
|
-
agent.learn()
|
86
|
-
self.agents[self.env_prop.name] = agent
|
87
|
-
self.action_space = agent.env.action_space
|
156
|
+
def choose_agent(self, problem_name: str) -> RLAgent:
|
157
|
+
return next(iter(self.agents.values()))
|
88
158
|
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
if not self.env_prop.use_goal_directed_problem():
|
99
|
-
for obs in inf_sequence:
|
100
|
-
obs[0]['desired_goal'] = np.array([self.env_prop.str_to_goal(problem_name)], dtype=obs[0]['desired_goal'].dtype)
|
101
|
-
return inf_sequence
|
102
|
-
return inf_sequence
|
159
|
+
def prepare_inf_sequence(self, problem_name: str, inf_sequence):
|
160
|
+
if not self.env_prop.use_goal_directed_problem():
|
161
|
+
for obs in inf_sequence:
|
162
|
+
obs[0]["desired_goal"] = np.array(
|
163
|
+
[self.env_prop.str_to_goal(problem_name)],
|
164
|
+
dtype=obs[0]["desired_goal"].dtype,
|
165
|
+
)
|
166
|
+
return inf_sequence
|
167
|
+
return inf_sequence
|
@@ -12,123 +12,222 @@ import os
|
|
12
12
|
import dill
|
13
13
|
import torch
|
14
14
|
|
15
|
+
|
15
16
|
class GRDataset(Dataset):
|
16
|
-
|
17
|
-
|
18
|
-
|
17
|
+
def __init__(self, num_samples, samples):
|
18
|
+
self.num_samples = num_samples
|
19
|
+
self.samples = samples
|
20
|
+
|
21
|
+
def __len__(self):
|
22
|
+
return self.num_samples
|
19
23
|
|
20
|
-
|
21
|
-
|
24
|
+
def __getitem__(self, idx):
|
25
|
+
return self.samples[
|
26
|
+
idx
|
27
|
+
] # returns a tuple - as appended in 'generate_dataset' last line
|
22
28
|
|
23
|
-
def __getitem__(self, idx):
|
24
|
-
return self.samples[idx] # returns a tuple - as appended in 'generate_dataset' last line
|
25
29
|
|
26
30
|
def check_diff_goals(first_agent_goal, second_agent_goal):
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
31
|
+
try:
|
32
|
+
assert first_agent_goal != second_agent_goal
|
33
|
+
except Exception as e:
|
34
|
+
try:
|
35
|
+
assert any(first_agent_goal != second_agent_goal)
|
36
|
+
except Exception as e:
|
37
|
+
for arr1, arr2 in zip(first_agent_goal, second_agent_goal):
|
38
|
+
assert any(elm1 != elm2 for elm1, elm2 in zip(arr1, arr2))
|
39
|
+
|
35
40
|
|
36
|
-
def generate_datasets(
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
41
|
+
def generate_datasets(
|
42
|
+
num_samples,
|
43
|
+
agents: List[ContextualAgent],
|
44
|
+
observation_creation_method: MethodType,
|
45
|
+
problems: List[str],
|
46
|
+
env_prop: EnvProperty,
|
47
|
+
recognizer_name: str,
|
48
|
+
gc_goal_set=None,
|
49
|
+
):
|
50
|
+
if gc_goal_set:
|
51
|
+
model_name = env_prop.name
|
52
|
+
else:
|
53
|
+
model_name = env_prop.problem_list_to_str_tuple(problems)
|
54
|
+
dataset_directory = get_siamese_dataset_path(
|
55
|
+
domain_name=env_prop.domain_name,
|
56
|
+
env_name=env_prop.name,
|
57
|
+
model_name=model_name,
|
58
|
+
recognizer=recognizer_name,
|
59
|
+
)
|
60
|
+
dataset_train_path, dataset_dev_path = os.path.join(
|
61
|
+
dataset_directory, "train.pkl"
|
62
|
+
), os.path.join(dataset_directory, "dev.pkl")
|
63
|
+
if os.path.exists(dataset_train_path) and os.path.exists(dataset_dev_path):
|
64
|
+
print(f"Loading pre-existing datasets in {dataset_directory}")
|
65
|
+
with open(dataset_train_path, "rb") as train_file:
|
66
|
+
train_samples = dill.load(train_file)
|
67
|
+
with open(dataset_dev_path, "rb") as dev_file:
|
68
|
+
dev_samples = dill.load(dev_file)
|
69
|
+
else:
|
70
|
+
print(f"{dataset_directory} doesn't exist, generating datasets")
|
71
|
+
if not os.path.exists(dataset_directory):
|
72
|
+
os.makedirs(dataset_directory)
|
73
|
+
all_samples = []
|
74
|
+
for i in range(num_samples):
|
75
|
+
if (
|
76
|
+
gc_goal_set != None
|
77
|
+
): # TODO change to having one flow for both cases and injecting according to gc_goal_set or not
|
78
|
+
assert (
|
79
|
+
env_prop.gc_adaptable() == True
|
80
|
+
), "shouldn't specify a goal directed representation if not generating datasets with a general agent."
|
81
|
+
is_same_goal = (
|
82
|
+
np.random.choice(
|
83
|
+
[1, 0],
|
84
|
+
1,
|
85
|
+
p=[
|
86
|
+
1 / max(len(gc_goal_set), 6),
|
87
|
+
1 - 1 / max(len(gc_goal_set), 6),
|
88
|
+
],
|
89
|
+
)
|
90
|
+
)[0]
|
91
|
+
first_is_consecutive = np.random.choice([True, False], 1, p=[0.5, 0.5])[
|
92
|
+
0
|
93
|
+
]
|
94
|
+
first_random_index = np.random.randint(
|
95
|
+
0, len(gc_goal_set)
|
96
|
+
) # works for lists of every object type, while np.choice only works for 1d arrays
|
97
|
+
first_agent_goal = gc_goal_set[
|
98
|
+
first_random_index
|
99
|
+
] # could be either a real goal or a goal-directed problem name
|
100
|
+
# first_agent_goal = np.random.choice(gc_goal_set)
|
101
|
+
first_trace_percentage = random.choice(
|
102
|
+
[0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
|
103
|
+
)
|
104
|
+
first_observation = []
|
105
|
+
first_agent_kwargs = {
|
106
|
+
"action_selection_method": observation_creation_method,
|
107
|
+
"percentage": first_trace_percentage,
|
108
|
+
"is_consecutive": first_is_consecutive,
|
109
|
+
"save_fig": False,
|
110
|
+
}
|
111
|
+
while first_observation == []:
|
112
|
+
# needs to be different than agents[0] problem_name, it should be from the gc_goal_set.
|
113
|
+
# but the problem is with the panda because it
|
114
|
+
if env_prop.use_goal_directed_problem():
|
115
|
+
first_agent_kwargs["goal_directed_problem"] = first_agent_goal
|
116
|
+
else:
|
117
|
+
first_agent_kwargs["goal_directed_goal"] = first_agent_goal
|
118
|
+
first_observation = agents[0].agent.generate_partial_observation(
|
119
|
+
**first_agent_kwargs
|
120
|
+
)
|
121
|
+
first_observation = agents[0].agent.simplify_observation(
|
122
|
+
first_observation
|
123
|
+
)
|
75
124
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
125
|
+
second_is_consecutive = np.random.choice(
|
126
|
+
[True, False], 1, p=[0.5, 0.5]
|
127
|
+
)[0]
|
128
|
+
second_agent_goal = first_agent_goal
|
129
|
+
second_random_index = first_random_index
|
130
|
+
if not is_same_goal:
|
131
|
+
second_random_index = np.random.choice(
|
132
|
+
[i for i in range(len(gc_goal_set)) if i != first_random_index]
|
133
|
+
)
|
134
|
+
assert first_random_index != second_random_index
|
135
|
+
second_agent_goal = gc_goal_set[second_random_index]
|
136
|
+
if not is_same_goal:
|
137
|
+
check_diff_goals(first_agent_goal, second_agent_goal)
|
138
|
+
second_trace_percentage = first_trace_percentage
|
139
|
+
second_observation = []
|
140
|
+
second_agent_kwargs = {
|
141
|
+
"action_selection_method": observation_creation_method,
|
142
|
+
"percentage": second_trace_percentage,
|
143
|
+
"is_consecutive": second_is_consecutive,
|
144
|
+
"save_fig": False,
|
145
|
+
}
|
146
|
+
while second_observation == []:
|
147
|
+
if env_prop.use_goal_directed_problem() == True:
|
148
|
+
second_agent_kwargs["goal_directed_problem"] = second_agent_goal
|
149
|
+
else:
|
150
|
+
second_agent_kwargs["goal_directed_goal"] = second_agent_goal
|
151
|
+
second_observation = agents[0].agent.generate_partial_observation(
|
152
|
+
**second_agent_kwargs
|
153
|
+
)
|
154
|
+
second_observation = agents[0].agent.simplify_observation(
|
155
|
+
second_observation
|
156
|
+
)
|
157
|
+
else:
|
158
|
+
is_same_goal = (
|
159
|
+
np.random.choice(
|
160
|
+
[1, 0],
|
161
|
+
1,
|
162
|
+
p=[1 / max(len(agents), 6), 1 - 1 / max(len(agents), 6)],
|
163
|
+
)
|
164
|
+
)[0]
|
165
|
+
first_is_consecutive = np.random.choice([True, False], 1, p=[0.5, 0.5])[
|
166
|
+
0
|
167
|
+
]
|
168
|
+
first_agent = np.random.choice(agents)
|
169
|
+
first_trace_percentage = random.choice(
|
170
|
+
[0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
|
171
|
+
)
|
172
|
+
first_observation = first_agent.agent.generate_partial_observation(
|
173
|
+
action_selection_method=observation_creation_method,
|
174
|
+
percentage=first_trace_percentage,
|
175
|
+
is_consecutive=first_is_consecutive,
|
176
|
+
save_fig=False,
|
177
|
+
random_optimalism=True,
|
178
|
+
)
|
179
|
+
first_observation = first_agent.agent.simplify_observation(
|
180
|
+
first_observation
|
181
|
+
)
|
122
182
|
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
183
|
+
second_agent = first_agent
|
184
|
+
if not is_same_goal:
|
185
|
+
second_agent = np.random.choice(
|
186
|
+
[agent for agent in agents if agent != first_agent]
|
187
|
+
)
|
188
|
+
assert second_agent != first_agent
|
189
|
+
second_is_consecutive = np.random.choice(
|
190
|
+
[True, False], 1, p=[0.5, 0.5]
|
191
|
+
)[0]
|
192
|
+
second_trace_percentage = first_trace_percentage
|
193
|
+
second_observation = second_agent.agent.generate_partial_observation(
|
194
|
+
action_selection_method=observation_creation_method,
|
195
|
+
percentage=second_trace_percentage,
|
196
|
+
is_consecutive=second_is_consecutive,
|
197
|
+
save_fig=False,
|
198
|
+
random_optimalism=True,
|
199
|
+
)
|
200
|
+
second_observation = second_agent.agent.simplify_observation(
|
201
|
+
second_observation
|
202
|
+
)
|
203
|
+
if is_same_goal:
|
204
|
+
observations_distance = measure_average_sequence_distance(
|
205
|
+
first_observation, second_observation
|
206
|
+
) # for debugging mate
|
207
|
+
all_samples.append(
|
208
|
+
(
|
209
|
+
[
|
210
|
+
torch.tensor(observation, dtype=torch.float32)
|
211
|
+
for observation in first_observation
|
212
|
+
],
|
213
|
+
[
|
214
|
+
torch.tensor(observation, dtype=torch.float32)
|
215
|
+
for observation in second_observation
|
216
|
+
],
|
217
|
+
torch.tensor(is_same_goal, dtype=torch.float32),
|
218
|
+
)
|
219
|
+
)
|
220
|
+
# all_samples.append((first_observation, second_observation, torch.tensor(is_same_goal, dtype=torch.float32)))
|
221
|
+
if i % 1000 == 0:
|
222
|
+
print(f"generated {i} samples")
|
131
223
|
|
132
|
-
|
224
|
+
total_samples = len(all_samples)
|
225
|
+
train_size = int(0.8 * total_samples)
|
226
|
+
train_samples = all_samples[:train_size]
|
227
|
+
dev_samples = all_samples[train_size:]
|
228
|
+
with open(dataset_train_path, "wb") as train_file:
|
229
|
+
dill.dump(train_samples, train_file)
|
230
|
+
with open(dataset_dev_path, "wb") as dev_file:
|
231
|
+
dill.dump(dev_samples, dev_file)
|
133
232
|
|
134
|
-
|
233
|
+
return train_samples, dev_samples
|