gr-libs 0.1.7.post0__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. evaluation/analyze_results_cross_alg_cross_domain.py +236 -246
  2. evaluation/create_minigrid_map_image.py +10 -6
  3. evaluation/file_system.py +16 -5
  4. evaluation/generate_experiments_results.py +123 -74
  5. evaluation/generate_experiments_results_new_ver1.py +227 -243
  6. evaluation/generate_experiments_results_new_ver2.py +317 -317
  7. evaluation/generate_task_specific_statistics_plots.py +481 -253
  8. evaluation/get_plans_images.py +41 -26
  9. evaluation/increasing_and_decreasing_.py +97 -56
  10. gr_libs/__init__.py +2 -1
  11. gr_libs/_version.py +2 -2
  12. gr_libs/environment/__init__.py +16 -8
  13. gr_libs/environment/environment.py +167 -39
  14. gr_libs/environment/utils/utils.py +22 -12
  15. gr_libs/metrics/__init__.py +5 -0
  16. gr_libs/metrics/metrics.py +76 -34
  17. gr_libs/ml/__init__.py +2 -0
  18. gr_libs/ml/agent.py +21 -6
  19. gr_libs/ml/base/__init__.py +1 -1
  20. gr_libs/ml/base/rl_agent.py +13 -10
  21. gr_libs/ml/consts.py +1 -1
  22. gr_libs/ml/neural/deep_rl_learner.py +433 -352
  23. gr_libs/ml/neural/utils/__init__.py +1 -1
  24. gr_libs/ml/neural/utils/dictlist.py +3 -3
  25. gr_libs/ml/neural/utils/penv.py +5 -2
  26. gr_libs/ml/planner/mcts/mcts_model.py +524 -302
  27. gr_libs/ml/planner/mcts/utils/__init__.py +1 -1
  28. gr_libs/ml/planner/mcts/utils/node.py +11 -7
  29. gr_libs/ml/planner/mcts/utils/tree.py +14 -10
  30. gr_libs/ml/sequential/__init__.py +1 -1
  31. gr_libs/ml/sequential/lstm_model.py +256 -175
  32. gr_libs/ml/tabular/state.py +7 -7
  33. gr_libs/ml/tabular/tabular_q_learner.py +123 -73
  34. gr_libs/ml/tabular/tabular_rl_agent.py +20 -19
  35. gr_libs/ml/utils/__init__.py +8 -2
  36. gr_libs/ml/utils/format.py +78 -70
  37. gr_libs/ml/utils/math.py +2 -1
  38. gr_libs/ml/utils/other.py +1 -1
  39. gr_libs/ml/utils/storage.py +88 -28
  40. gr_libs/problems/consts.py +1549 -1227
  41. gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +145 -80
  42. gr_libs/recognizer/graml/gr_dataset.py +209 -110
  43. gr_libs/recognizer/graml/graml_recognizer.py +431 -240
  44. gr_libs/recognizer/recognizer.py +38 -27
  45. gr_libs/recognizer/utils/__init__.py +1 -1
  46. gr_libs/recognizer/utils/format.py +8 -3
  47. {gr_libs-0.1.7.post0.dist-info → gr_libs-0.1.8.dist-info}/METADATA +1 -1
  48. gr_libs-0.1.8.dist-info/RECORD +70 -0
  49. {gr_libs-0.1.7.post0.dist-info → gr_libs-0.1.8.dist-info}/WHEEL +1 -1
  50. tests/test_gcdraco.py +10 -0
  51. tests/test_graml.py +8 -4
  52. tests/test_graql.py +2 -1
  53. tutorials/gcdraco_panda_tutorial.py +66 -0
  54. tutorials/gcdraco_parking_tutorial.py +61 -0
  55. tutorials/graml_minigrid_tutorial.py +42 -12
  56. tutorials/graml_panda_tutorial.py +35 -14
  57. tutorials/graml_parking_tutorial.py +37 -20
  58. tutorials/graml_point_maze_tutorial.py +33 -13
  59. tutorials/graql_minigrid_tutorial.py +31 -15
  60. gr_libs-0.1.7.post0.dist-info/RECORD +0 -67
  61. {gr_libs-0.1.7.post0.dist-info → gr_libs-0.1.8.dist-info}/top_level.txt +0 -0
@@ -1,102 +1,167 @@
1
1
  from abc import abstractmethod
2
2
  import os
3
3
  import dill
4
- from typing import List, Type
4
+ from typing import List, Type, Callable
5
5
  import numpy as np
6
6
  from gr_libs.environment.environment import EnvProperty, GCEnvProperty
7
7
  from gr_libs.environment.utils.utils import domain_to_env_property
8
- from gr_libs.metrics.metrics import kl_divergence_norm_softmax, mean_wasserstein_distance
8
+ from gr_libs.metrics.metrics import (
9
+ kl_divergence_norm_softmax,
10
+ mean_wasserstein_distance,
11
+ )
9
12
  from gr_libs.ml.base import RLAgent
10
13
  from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
11
14
  from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
12
15
  from gr_libs.ml.utils.storage import get_gr_as_rl_experiment_confidence_path
13
- from gr_libs.recognizer.recognizer import GaAdaptingRecognizer, GaAgentTrainerRecognizer, LearningRecognizer, Recognizer
16
+ from gr_libs.recognizer.recognizer import (
17
+ GaAdaptingRecognizer,
18
+ GaAgentTrainerRecognizer,
19
+ LearningRecognizer,
20
+ Recognizer,
21
+ )
22
+
14
23
 
15
24
  class GRAsRL(Recognizer):
16
- def __init__(self, *args, **kwargs):
17
- super().__init__(*args, **kwargs)
18
- self.agents = {} # consider changing to ContextualAgent
25
+ def __init__(self, *args, **kwargs):
26
+ super().__init__(*args, **kwargs)
27
+ self.agents = {} # consider changing to ContextualAgent
28
+
29
+ def goals_adaptation_phase(self, dynamic_goals: List[str], dynamic_train_configs):
30
+ super().goals_adaptation_phase(dynamic_goals, dynamic_train_configs)
31
+ dynamic_goals_problems = [
32
+ self.env_prop.goal_to_problem_str(goal) for goal in dynamic_goals
33
+ ]
34
+ self.active_goals = dynamic_goals
35
+ self.active_problems = dynamic_goals_problems
36
+ for problem_name, config in zip(dynamic_goals_problems, dynamic_train_configs):
37
+ agent_kwargs = {
38
+ "domain_name": self.env_prop.domain_name,
39
+ "problem_name": problem_name,
40
+ "env_prop": self.env_prop,
41
+ }
42
+ if config[0]:
43
+ agent_kwargs["algorithm"] = config[0]
44
+ if config[1]:
45
+ agent_kwargs["num_timesteps"] = config[1]
46
+ agent = self.rl_agent_type(**agent_kwargs)
47
+ agent.learn()
48
+ self.agents[problem_name] = agent
49
+ self.action_space = next(iter(self.agents.values())).env.action_space
19
50
 
20
- def goals_adaptation_phase(self, dynamic_goals: List[str], dynamic_train_configs):
21
- super().goals_adaptation_phase(dynamic_goals, dynamic_train_configs)
22
- dynamic_goals_problems = [self.env_prop.goal_to_problem_str(goal) for goal in dynamic_goals]
23
- self.active_goals = dynamic_goals
24
- self.active_problems = dynamic_goals_problems
25
- for problem_name, config in zip(dynamic_goals_problems, dynamic_train_configs):
26
- agent_kwargs = {"domain_name": self.env_prop.domain_name,
27
- "problem_name": problem_name}
28
- if config[0]: agent_kwargs["algorithm"] = config[0]
29
- if config[1]: agent_kwargs["num_timesteps"] = config[1]
30
- agent = self.rl_agent_type(**agent_kwargs)
31
- agent.learn()
32
- self.agents[problem_name] = agent
33
- self.action_space = next(iter(self.agents.values())).env.action_space
51
+ def inference_phase(self, inf_sequence, true_goal, percentage) -> str:
52
+ scores = []
53
+ for problem_name in self.active_problems:
54
+ agent = self.choose_agent(problem_name)
55
+ if self.env_prop.gc_adaptable():
56
+ assert (
57
+ self.__class__.__name__ == "GCDraco"
58
+ ), "This recognizer is not compatible with goal conditioned problems."
59
+ inf_sequence = self.prepare_inf_sequence(problem_name, inf_sequence)
60
+ score = self.evaluation_function(inf_sequence, agent, self.action_space)
61
+ scores.append(score)
62
+ # scores = metrics.softmin(np.array(scores))
63
+ if self.collect_statistics:
64
+ results_path = get_gr_as_rl_experiment_confidence_path(
65
+ domain_name=self.env_prop.domain_name,
66
+ env_name=self.env_prop.name,
67
+ recognizer=self.__class__.__name__,
68
+ )
69
+ if not os.path.exists(results_path):
70
+ os.makedirs(results_path)
71
+ with open(
72
+ results_path + f"/true_{true_goal}_{percentage}_scores.pkl", "wb"
73
+ ) as scores_file:
74
+ dill.dump(
75
+ [
76
+ (str(goal), score)
77
+ for (goal, score) in zip(self.active_goals, scores)
78
+ ],
79
+ scores_file,
80
+ )
81
+ div, true_goal_index = min((div, goal) for (goal, div) in enumerate(scores))
82
+ return str(self.active_goals[true_goal_index])
34
83
 
35
- def inference_phase(self, inf_sequence, true_goal, percentage) -> str:
36
- scores = []
37
- for problem_name in self.active_problems:
38
- agent = self.choose_agent(problem_name)
39
- if self.env_prop.gc_adaptable():
40
- assert self.__class__.__name__ == "GCDraco", "This recognizer is not compatible with goal conditioned problems."
41
- inf_sequence = self.prepare_inf_sequence(problem_name, inf_sequence)
42
- score = self.evaluation_function(inf_sequence, agent, self.action_space)
43
- scores.append(score)
44
- #scores = metrics.softmin(np.array(scores))
45
- if self.collect_statistics:
46
- results_path = get_gr_as_rl_experiment_confidence_path(domain_name=self.env_prop.domain_name, env_name=self.env_prop.name, recognizer=self.__class__.__name__)
47
- if not os.path.exists(results_path): os.makedirs(results_path)
48
- with open(results_path + f'/true_{true_goal}_{percentage}_scores.pkl', 'wb') as scores_file:
49
- dill.dump([(str(goal), score) for (goal, score) in zip(self.active_goals, scores)], scores_file)
50
- div, true_goal_index = min((div, goal) for (goal, div) in enumerate(scores))
51
- return str(self.active_goals[true_goal_index])
52
-
53
- def choose_agent(self, problem_name:str) -> RLAgent:
54
- return self.agents[problem_name]
84
+ def choose_agent(self, problem_name: str) -> RLAgent:
85
+ return self.agents[problem_name]
55
86
 
56
87
 
57
88
  class Graql(GRAsRL, GaAgentTrainerRecognizer):
58
- def __init__(self, *args, **kwargs):
59
- super().__init__(*args, **kwargs)
60
- assert not self.env_prop.gc_adaptable() and self.env_prop.is_state_discrete() and self.env_prop.is_action_discrete()
61
- if self.rl_agent_type==None: self.rl_agent_type = TabularQLearner
62
- self.evaluation_function = kl_divergence_norm_softmax
89
+ def __init__(self, *args, **kwargs):
90
+ super().__init__(*args, **kwargs)
91
+ assert (
92
+ not self.env_prop.gc_adaptable()
93
+ and self.env_prop.is_state_discrete()
94
+ and self.env_prop.is_action_discrete()
95
+ )
96
+ if self.rl_agent_type == None:
97
+ self.rl_agent_type = TabularQLearner
98
+ self.evaluation_function = kl_divergence_norm_softmax
99
+
63
100
 
64
101
  class Draco(GRAsRL, GaAgentTrainerRecognizer):
65
- def __init__(self, *args, **kwargs):
66
- super().__init__(*args, **kwargs)
67
- assert not self.env_prop.is_state_discrete() and not self.env_prop.is_action_discrete()
68
- if self.rl_agent_type==None: self.rl_agent_type = DeepRLAgent
69
- self.evaluation_function = mean_wasserstein_distance
102
+ def __init__(self, *args, **kwargs):
103
+ super().__init__(*args, **kwargs)
104
+ assert (
105
+ not self.env_prop.is_state_discrete()
106
+ and not self.env_prop.is_action_discrete()
107
+ )
108
+ if self.rl_agent_type == None:
109
+ self.rl_agent_type = DeepRLAgent
110
+ self.evaluation_function = kwargs.get("evaluation_function")
111
+ assert (
112
+ self.evaluation_function is None
113
+ or type(self.evaluation_function) != Callable
114
+ )
115
+
116
+
117
+ class GCDraco(
118
+ GRAsRL, LearningRecognizer, GaAdaptingRecognizer
119
+ ): # TODO problem: it gets 2 goal_adaptation phase from parents, one with configs and one without.
120
+ def __init__(self, *args, **kwargs):
121
+ super().__init__(*args, **kwargs)
122
+ assert (
123
+ self.env_prop.gc_adaptable()
124
+ and not self.env_prop.is_state_discrete()
125
+ and not self.env_prop.is_action_discrete()
126
+ )
127
+ if self.rl_agent_type == None:
128
+ self.rl_agent_type = GCDeepRLAgent
129
+ self.evaluation_function = kwargs.get("evaluation_function")
130
+ assert (
131
+ self.evaluation_function is None
132
+ or type(self.evaluation_function) != Callable
133
+ )
134
+
135
+ def domain_learning_phase(self, base_goals: List[str], train_configs):
136
+ super().domain_learning_phase(base_goals, train_configs)
137
+ agent_kwargs = {
138
+ "domain_name": self.env_prop.domain_name,
139
+ "problem_name": self.env_prop.name,
140
+ "algorithm": self.original_train_configs[0][0],
141
+ "num_timesteps": self.original_train_configs[0][1],
142
+ "env_prop": self.env_prop,
143
+ }
144
+ agent = self.rl_agent_type(**agent_kwargs)
145
+ agent.learn()
146
+ self.agents[self.env_prop.name] = agent
147
+ self.action_space = agent.env.action_space
70
148
 
71
- class GCDraco(GRAsRL, LearningRecognizer, GaAdaptingRecognizer): # TODO problem: it gets 2 goal_adaptation phase from parents, one with configs and one without.
72
- def __init__(self, *args, **kwargs):
73
- super().__init__(*args, **kwargs)
74
- assert self.env_prop.gc_adaptable() and not self.env_prop.is_state_discrete() and not self.env_prop.is_action_discrete()
75
- self.evaluation_function = mean_wasserstein_distance
76
- if self.rl_agent_type==None: self.rl_agent_type = GCDeepRLAgent
149
+ # this method currently does nothing but optimizations can be made here.
150
+ def goals_adaptation_phase(self, dynamic_goals):
151
+ self.active_goals = dynamic_goals
152
+ self.active_problems = [
153
+ self.env_prop.goal_to_problem_str(goal) for goal in dynamic_goals
154
+ ]
77
155
 
78
- def domain_learning_phase(self, base_goals: List[str], train_configs):
79
- super().domain_learning_phase(base_goals, train_configs)
80
- agent_kwargs = {"domain_name": self.env_prop.domain_name,
81
- "problem_name": self.env_prop.name,
82
- "algorithm": self.original_train_configs[0][0],
83
- "num_timesteps": self.original_train_configs[0][1]}
84
- agent = self.rl_agent_type(**agent_kwargs)
85
- agent.learn()
86
- self.agents[self.env_prop.name] = agent
87
- self.action_space = agent.env.action_space
156
+ def choose_agent(self, problem_name: str) -> RLAgent:
157
+ return next(iter(self.agents.values()))
88
158
 
89
- # this method currently does nothing but optimizations can be made here.
90
- def goals_adaptation_phase(self, dynamic_goals):
91
- self.active_goals = dynamic_goals
92
- self.active_problems = [self.env_prop.goal_to_problem_str(goal) for goal in dynamic_goals]
93
-
94
- def choose_agent(self, problem_name:str) -> RLAgent:
95
- return next(iter(self.agents.values()))
96
-
97
- def prepare_inf_sequence(self, problem_name: str, inf_sequence):
98
- if not self.env_prop.use_goal_directed_problem():
99
- for obs in inf_sequence:
100
- obs[0]['desired_goal'] = np.array([self.env_prop.str_to_goal(problem_name)], dtype=obs[0]['desired_goal'].dtype)
101
- return inf_sequence
102
- return inf_sequence
159
+ def prepare_inf_sequence(self, problem_name: str, inf_sequence):
160
+ if not self.env_prop.use_goal_directed_problem():
161
+ for obs in inf_sequence:
162
+ obs[0]["desired_goal"] = np.array(
163
+ [self.env_prop.str_to_goal(problem_name)],
164
+ dtype=obs[0]["desired_goal"].dtype,
165
+ )
166
+ return inf_sequence
167
+ return inf_sequence
@@ -12,123 +12,222 @@ import os
12
12
  import dill
13
13
  import torch
14
14
 
15
+
15
16
  class GRDataset(Dataset):
16
- def __init__(self, num_samples, samples):
17
- self.num_samples = num_samples
18
- self.samples = samples
17
+ def __init__(self, num_samples, samples):
18
+ self.num_samples = num_samples
19
+ self.samples = samples
20
+
21
+ def __len__(self):
22
+ return self.num_samples
19
23
 
20
- def __len__(self):
21
- return self.num_samples
24
+ def __getitem__(self, idx):
25
+ return self.samples[
26
+ idx
27
+ ] # returns a tuple - as appended in 'generate_dataset' last line
22
28
 
23
- def __getitem__(self, idx):
24
- return self.samples[idx] # returns a tuple - as appended in 'generate_dataset' last line
25
29
 
26
30
  def check_diff_goals(first_agent_goal, second_agent_goal):
27
- try:
28
- assert first_agent_goal != second_agent_goal
29
- except Exception as e:
30
- try:
31
- assert any(first_agent_goal != second_agent_goal)
32
- except Exception as e:
33
- for arr1, arr2 in zip(first_agent_goal, second_agent_goal):
34
- assert any(elm1!=elm2 for elm1, elm2 in zip(arr1, arr2))
31
+ try:
32
+ assert first_agent_goal != second_agent_goal
33
+ except Exception as e:
34
+ try:
35
+ assert any(first_agent_goal != second_agent_goal)
36
+ except Exception as e:
37
+ for arr1, arr2 in zip(first_agent_goal, second_agent_goal):
38
+ assert any(elm1 != elm2 for elm1, elm2 in zip(arr1, arr2))
39
+
35
40
 
36
- def generate_datasets(num_samples, agents: List[ContextualAgent], observation_creation_method : MethodType, problems: List[str], env_prop:EnvProperty, recognizer_name:str, gc_goal_set=None):
37
- if gc_goal_set: model_name = env_prop.name
38
- else: model_name = env_prop.problem_list_to_str_tuple(problems)
39
- dataset_directory = get_siamese_dataset_path(domain_name=env_prop.domain_name, env_name=env_prop.name, model_name=model_name, recognizer=recognizer_name)
40
- dataset_train_path, dataset_dev_path = os.path.join(dataset_directory, 'train.pkl'), os.path.join(dataset_directory, 'dev.pkl')
41
- if os.path.exists(dataset_train_path) and os.path.exists(dataset_dev_path):
42
- print(f"Loading pre-existing datasets in {dataset_directory}")
43
- with open(dataset_train_path, 'rb') as train_file:
44
- train_samples = dill.load(train_file)
45
- with open(dataset_dev_path, 'rb') as dev_file:
46
- dev_samples = dill.load(dev_file)
47
- else:
48
- print(f"{dataset_directory} doesn't exist, generating datasets")
49
- if not os.path.exists(dataset_directory):
50
- os.makedirs(dataset_directory)
51
- all_samples = []
52
- for i in range(num_samples):
53
- if gc_goal_set != None: # TODO change to having one flow for both cases and injecting according to gc_goal_set or not
54
- assert env_prop.gc_adaptable() == True, "shouldn't specify a goal directed representation if not generating datasets with a general agent."
55
- is_same_goal = (np.random.choice([1, 0], 1, p=[1/max(len(gc_goal_set), 6), 1 - 1/max(len(gc_goal_set), 6)]))[0]
56
- first_is_consecutive = np.random.choice([True, False], 1, p=[0.5, 0.5])[0]
57
- first_random_index = np.random.randint(0, len(gc_goal_set)) # works for lists of every object type, while np.choice only works for 1d arrays
58
- first_agent_goal = gc_goal_set[first_random_index] # could be either a real goal or a goal-directed problem name
59
- #first_agent_goal = np.random.choice(gc_goal_set)
60
- first_trace_percentage = random.choice([0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1])
61
- first_observation = []
62
- first_agent_kwargs = {
63
- "action_selection_method": observation_creation_method,
64
- "percentage": first_trace_percentage,
65
- "is_consecutive": first_is_consecutive,
66
- "save_fig": False
67
- }
68
- while first_observation == []:
69
- # needs to be different than agents[0] problem_name, it should be from the gc_goal_set.
70
- # but the problem is with the panda because it
71
- if env_prop.use_goal_directed_problem(): first_agent_kwargs["goal_directed_problem"] = first_agent_goal
72
- else: first_agent_kwargs["goal_directed_goal"] = first_agent_goal
73
- first_observation = agents[0].agent.generate_partial_observation(**first_agent_kwargs)
74
- first_observation = agents[0].agent.simplify_observation(first_observation)
41
+ def generate_datasets(
42
+ num_samples,
43
+ agents: List[ContextualAgent],
44
+ observation_creation_method: MethodType,
45
+ problems: List[str],
46
+ env_prop: EnvProperty,
47
+ recognizer_name: str,
48
+ gc_goal_set=None,
49
+ ):
50
+ if gc_goal_set:
51
+ model_name = env_prop.name
52
+ else:
53
+ model_name = env_prop.problem_list_to_str_tuple(problems)
54
+ dataset_directory = get_siamese_dataset_path(
55
+ domain_name=env_prop.domain_name,
56
+ env_name=env_prop.name,
57
+ model_name=model_name,
58
+ recognizer=recognizer_name,
59
+ )
60
+ dataset_train_path, dataset_dev_path = os.path.join(
61
+ dataset_directory, "train.pkl"
62
+ ), os.path.join(dataset_directory, "dev.pkl")
63
+ if os.path.exists(dataset_train_path) and os.path.exists(dataset_dev_path):
64
+ print(f"Loading pre-existing datasets in {dataset_directory}")
65
+ with open(dataset_train_path, "rb") as train_file:
66
+ train_samples = dill.load(train_file)
67
+ with open(dataset_dev_path, "rb") as dev_file:
68
+ dev_samples = dill.load(dev_file)
69
+ else:
70
+ print(f"{dataset_directory} doesn't exist, generating datasets")
71
+ if not os.path.exists(dataset_directory):
72
+ os.makedirs(dataset_directory)
73
+ all_samples = []
74
+ for i in range(num_samples):
75
+ if (
76
+ gc_goal_set != None
77
+ ): # TODO change to having one flow for both cases and injecting according to gc_goal_set or not
78
+ assert (
79
+ env_prop.gc_adaptable() == True
80
+ ), "shouldn't specify a goal directed representation if not generating datasets with a general agent."
81
+ is_same_goal = (
82
+ np.random.choice(
83
+ [1, 0],
84
+ 1,
85
+ p=[
86
+ 1 / max(len(gc_goal_set), 6),
87
+ 1 - 1 / max(len(gc_goal_set), 6),
88
+ ],
89
+ )
90
+ )[0]
91
+ first_is_consecutive = np.random.choice([True, False], 1, p=[0.5, 0.5])[
92
+ 0
93
+ ]
94
+ first_random_index = np.random.randint(
95
+ 0, len(gc_goal_set)
96
+ ) # works for lists of every object type, while np.choice only works for 1d arrays
97
+ first_agent_goal = gc_goal_set[
98
+ first_random_index
99
+ ] # could be either a real goal or a goal-directed problem name
100
+ # first_agent_goal = np.random.choice(gc_goal_set)
101
+ first_trace_percentage = random.choice(
102
+ [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
103
+ )
104
+ first_observation = []
105
+ first_agent_kwargs = {
106
+ "action_selection_method": observation_creation_method,
107
+ "percentage": first_trace_percentage,
108
+ "is_consecutive": first_is_consecutive,
109
+ "save_fig": False,
110
+ }
111
+ while first_observation == []:
112
+ # needs to be different than agents[0] problem_name, it should be from the gc_goal_set.
113
+ # but the problem is with the panda because it
114
+ if env_prop.use_goal_directed_problem():
115
+ first_agent_kwargs["goal_directed_problem"] = first_agent_goal
116
+ else:
117
+ first_agent_kwargs["goal_directed_goal"] = first_agent_goal
118
+ first_observation = agents[0].agent.generate_partial_observation(
119
+ **first_agent_kwargs
120
+ )
121
+ first_observation = agents[0].agent.simplify_observation(
122
+ first_observation
123
+ )
75
124
 
76
- second_is_consecutive = np.random.choice([True, False], 1, p=[0.5, 0.5])[0]
77
- second_agent_goal = first_agent_goal
78
- second_random_index = first_random_index
79
- if not is_same_goal:
80
- second_random_index = np.random.choice([i for i in range(len(gc_goal_set)) if i != first_random_index])
81
- assert first_random_index != second_random_index
82
- second_agent_goal = gc_goal_set[second_random_index]
83
- if not is_same_goal: check_diff_goals(first_agent_goal, second_agent_goal)
84
- second_trace_percentage = first_trace_percentage
85
- second_observation = []
86
- second_agent_kwargs = {
87
- "action_selection_method": observation_creation_method,
88
- "percentage": second_trace_percentage,
89
- "is_consecutive": second_is_consecutive,
90
- "save_fig": False
91
- }
92
- while second_observation == []:
93
- if env_prop.use_goal_directed_problem() == True: second_agent_kwargs["goal_directed_problem"] = second_agent_goal
94
- else: second_agent_kwargs["goal_directed_goal"] = second_agent_goal
95
- second_observation = agents[0].agent.generate_partial_observation(**second_agent_kwargs)
96
- second_observation = agents[0].agent.simplify_observation(second_observation)
97
- else:
98
- is_same_goal = (np.random.choice([1, 0], 1, p=[1/max(len(agents), 6), 1 - 1/max(len(agents), 6)]))[0]
99
- first_is_consecutive = np.random.choice([True, False], 1, p=[0.5, 0.5])[0]
100
- first_agent = np.random.choice(agents)
101
- first_trace_percentage = random.choice([0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1])
102
- first_observation = first_agent.agent.generate_partial_observation(action_selection_method=observation_creation_method, percentage=first_trace_percentage, is_consecutive=first_is_consecutive, save_fig=False, random_optimalism=True)
103
- first_observation = first_agent.agent.simplify_observation(first_observation)
104
-
105
- second_agent = first_agent
106
- if not is_same_goal:
107
- second_agent = np.random.choice([agent for agent in agents if agent != first_agent])
108
- assert second_agent != first_agent
109
- second_is_consecutive = np.random.choice([True, False], 1, p=[0.5, 0.5])[0]
110
- second_trace_percentage = first_trace_percentage
111
- second_observation = second_agent.agent.generate_partial_observation(action_selection_method=observation_creation_method, percentage=second_trace_percentage, is_consecutive=second_is_consecutive, save_fig=False, random_optimalism=True)
112
- second_observation = second_agent.agent.simplify_observation(second_observation)
113
- if is_same_goal:
114
- observations_distance = measure_average_sequence_distance(first_observation, second_observation) # for debugging mate
115
- all_samples.append((
116
- [torch.tensor(observation, dtype=torch.float32) for observation in first_observation],
117
- [torch.tensor(observation, dtype=torch.float32) for observation in second_observation],
118
- torch.tensor(is_same_goal, dtype=torch.float32)))
119
- # all_samples.append((first_observation, second_observation, torch.tensor(is_same_goal, dtype=torch.float32)))
120
- if i % 1000 == 0:
121
- print(f'generated {i} samples')
125
+ second_is_consecutive = np.random.choice(
126
+ [True, False], 1, p=[0.5, 0.5]
127
+ )[0]
128
+ second_agent_goal = first_agent_goal
129
+ second_random_index = first_random_index
130
+ if not is_same_goal:
131
+ second_random_index = np.random.choice(
132
+ [i for i in range(len(gc_goal_set)) if i != first_random_index]
133
+ )
134
+ assert first_random_index != second_random_index
135
+ second_agent_goal = gc_goal_set[second_random_index]
136
+ if not is_same_goal:
137
+ check_diff_goals(first_agent_goal, second_agent_goal)
138
+ second_trace_percentage = first_trace_percentage
139
+ second_observation = []
140
+ second_agent_kwargs = {
141
+ "action_selection_method": observation_creation_method,
142
+ "percentage": second_trace_percentage,
143
+ "is_consecutive": second_is_consecutive,
144
+ "save_fig": False,
145
+ }
146
+ while second_observation == []:
147
+ if env_prop.use_goal_directed_problem() == True:
148
+ second_agent_kwargs["goal_directed_problem"] = second_agent_goal
149
+ else:
150
+ second_agent_kwargs["goal_directed_goal"] = second_agent_goal
151
+ second_observation = agents[0].agent.generate_partial_observation(
152
+ **second_agent_kwargs
153
+ )
154
+ second_observation = agents[0].agent.simplify_observation(
155
+ second_observation
156
+ )
157
+ else:
158
+ is_same_goal = (
159
+ np.random.choice(
160
+ [1, 0],
161
+ 1,
162
+ p=[1 / max(len(agents), 6), 1 - 1 / max(len(agents), 6)],
163
+ )
164
+ )[0]
165
+ first_is_consecutive = np.random.choice([True, False], 1, p=[0.5, 0.5])[
166
+ 0
167
+ ]
168
+ first_agent = np.random.choice(agents)
169
+ first_trace_percentage = random.choice(
170
+ [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
171
+ )
172
+ first_observation = first_agent.agent.generate_partial_observation(
173
+ action_selection_method=observation_creation_method,
174
+ percentage=first_trace_percentage,
175
+ is_consecutive=first_is_consecutive,
176
+ save_fig=False,
177
+ random_optimalism=True,
178
+ )
179
+ first_observation = first_agent.agent.simplify_observation(
180
+ first_observation
181
+ )
122
182
 
123
- total_samples = len(all_samples)
124
- train_size = int(0.8 * total_samples)
125
- train_samples = all_samples[:train_size]
126
- dev_samples = all_samples[train_size:]
127
- with open(dataset_train_path, 'wb') as train_file:
128
- dill.dump(train_samples, train_file)
129
- with open(dataset_dev_path, 'wb') as dev_file:
130
- dill.dump(dev_samples, dev_file)
183
+ second_agent = first_agent
184
+ if not is_same_goal:
185
+ second_agent = np.random.choice(
186
+ [agent for agent in agents if agent != first_agent]
187
+ )
188
+ assert second_agent != first_agent
189
+ second_is_consecutive = np.random.choice(
190
+ [True, False], 1, p=[0.5, 0.5]
191
+ )[0]
192
+ second_trace_percentage = first_trace_percentage
193
+ second_observation = second_agent.agent.generate_partial_observation(
194
+ action_selection_method=observation_creation_method,
195
+ percentage=second_trace_percentage,
196
+ is_consecutive=second_is_consecutive,
197
+ save_fig=False,
198
+ random_optimalism=True,
199
+ )
200
+ second_observation = second_agent.agent.simplify_observation(
201
+ second_observation
202
+ )
203
+ if is_same_goal:
204
+ observations_distance = measure_average_sequence_distance(
205
+ first_observation, second_observation
206
+ ) # for debugging mate
207
+ all_samples.append(
208
+ (
209
+ [
210
+ torch.tensor(observation, dtype=torch.float32)
211
+ for observation in first_observation
212
+ ],
213
+ [
214
+ torch.tensor(observation, dtype=torch.float32)
215
+ for observation in second_observation
216
+ ],
217
+ torch.tensor(is_same_goal, dtype=torch.float32),
218
+ )
219
+ )
220
+ # all_samples.append((first_observation, second_observation, torch.tensor(is_same_goal, dtype=torch.float32)))
221
+ if i % 1000 == 0:
222
+ print(f"generated {i} samples")
131
223
 
132
- return train_samples, dev_samples
224
+ total_samples = len(all_samples)
225
+ train_size = int(0.8 * total_samples)
226
+ train_samples = all_samples[:train_size]
227
+ dev_samples = all_samples[train_size:]
228
+ with open(dataset_train_path, "wb") as train_file:
229
+ dill.dump(train_samples, train_file)
230
+ with open(dataset_dev_path, "wb") as dev_file:
231
+ dill.dump(dev_samples, dev_file)
133
232
 
134
-
233
+ return train_samples, dev_samples