PyPI - gr-libs - Versions diffs - 0.1.7.post0__py3-none-any.whl → 0.1.8__py3-none-any.whl - Mend

gr-libs 0.1.7.post0py3-none-any.whl → 0.1.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

evaluation/analyze_results_cross_alg_cross_domain.py +236 -246
evaluation/create_minigrid_map_image.py +10 -6
evaluation/file_system.py +16 -5
evaluation/generate_experiments_results.py +123 -74
evaluation/generate_experiments_results_new_ver1.py +227 -243
evaluation/generate_experiments_results_new_ver2.py +317 -317
evaluation/generate_task_specific_statistics_plots.py +481 -253
evaluation/get_plans_images.py +41 -26
evaluation/increasing_and_decreasing_.py +97 -56
gr_libs/__init__.py +2 -1
gr_libs/_version.py +2 -2
gr_libs/environment/__init__.py +16 -8
gr_libs/environment/environment.py +167 -39
gr_libs/environment/utils/utils.py +22 -12
gr_libs/metrics/__init__.py +5 -0
gr_libs/metrics/metrics.py +76 -34
gr_libs/ml/__init__.py +2 -0
gr_libs/ml/agent.py +21 -6
gr_libs/ml/base/__init__.py +1 -1
gr_libs/ml/base/rl_agent.py +13 -10
gr_libs/ml/consts.py +1 -1
gr_libs/ml/neural/deep_rl_learner.py +433 -352
gr_libs/ml/neural/utils/__init__.py +1 -1
gr_libs/ml/neural/utils/dictlist.py +3 -3
gr_libs/ml/neural/utils/penv.py +5 -2
gr_libs/ml/planner/mcts/mcts_model.py +524 -302
gr_libs/ml/planner/mcts/utils/__init__.py +1 -1
gr_libs/ml/planner/mcts/utils/node.py +11 -7
gr_libs/ml/planner/mcts/utils/tree.py +14 -10
gr_libs/ml/sequential/__init__.py +1 -1
gr_libs/ml/sequential/lstm_model.py +256 -175
gr_libs/ml/tabular/state.py +7 -7
gr_libs/ml/tabular/tabular_q_learner.py +123 -73
gr_libs/ml/tabular/tabular_rl_agent.py +20 -19
gr_libs/ml/utils/__init__.py +8 -2
gr_libs/ml/utils/format.py +78 -70
gr_libs/ml/utils/math.py +2 -1
gr_libs/ml/utils/other.py +1 -1
gr_libs/ml/utils/storage.py +88 -28
gr_libs/problems/consts.py +1549 -1227
gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +145 -80
gr_libs/recognizer/graml/gr_dataset.py +209 -110
gr_libs/recognizer/graml/graml_recognizer.py +431 -240
gr_libs/recognizer/recognizer.py +38 -27
gr_libs/recognizer/utils/__init__.py +1 -1
gr_libs/recognizer/utils/format.py +8 -3
{gr_libs-0.1.7.post0.dist-info → gr_libs-0.1.8.dist-info}/METADATA +1 -1
gr_libs-0.1.8.dist-info/RECORD +70 -0
{gr_libs-0.1.7.post0.dist-info → gr_libs-0.1.8.dist-info}/WHEEL +1 -1
tests/test_gcdraco.py +10 -0
tests/test_graml.py +8 -4
tests/test_graql.py +2 -1
tutorials/gcdraco_panda_tutorial.py +66 -0
tutorials/gcdraco_parking_tutorial.py +61 -0
tutorials/graml_minigrid_tutorial.py +42 -12
tutorials/graml_panda_tutorial.py +35 -14
tutorials/graml_parking_tutorial.py +37 -20
tutorials/graml_point_maze_tutorial.py +33 -13
tutorials/graql_minigrid_tutorial.py +31 -15
gr_libs-0.1.7.post0.dist-info/RECORD +0 -67
{gr_libs-0.1.7.post0.dist-info → gr_libs-0.1.8.dist-info}/top_level.txt +0 -0

gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py CHANGED Viewed

@@ -1,102 +1,167 @@
 from abc import abstractmethod
 import os
 import dill
-from typing import List, Type
+from typing import List, Type, Callable
 import numpy as np
 from gr_libs.environment.environment import EnvProperty, GCEnvProperty
 from gr_libs.environment.utils.utils import domain_to_env_property
-from gr_libs.metrics.metrics import kl_divergence_norm_softmax, mean_wasserstein_distance
+from gr_libs.metrics.metrics import (
+    kl_divergence_norm_softmax,
+    mean_wasserstein_distance,
+)
 from gr_libs.ml.base import RLAgent
 from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
 from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
 from gr_libs.ml.utils.storage import get_gr_as_rl_experiment_confidence_path
-from gr_libs.recognizer.recognizer import GaAdaptingRecognizer, GaAgentTrainerRecognizer, LearningRecognizer, Recognizer
+from gr_libs.recognizer.recognizer import (
+    GaAdaptingRecognizer,
+    GaAgentTrainerRecognizer,
+    LearningRecognizer,
+    Recognizer,
+)
 class GRAsRL(Recognizer):
-	def __init__(self, *args, **kwargs):
-		super().__init__(*args, **kwargs)
-		self.agents = {} # consider changing to ContextualAgent
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.agents = {}  # consider changing to ContextualAgent
+    def goals_adaptation_phase(self, dynamic_goals: List[str], dynamic_train_configs):
+        super().goals_adaptation_phase(dynamic_goals, dynamic_train_configs)
+        dynamic_goals_problems = [
+            self.env_prop.goal_to_problem_str(goal) for goal in dynamic_goals
+        ]
+        self.active_goals = dynamic_goals
+        self.active_problems = dynamic_goals_problems
+        for problem_name, config in zip(dynamic_goals_problems, dynamic_train_configs):
+            agent_kwargs = {
+                "domain_name": self.env_prop.domain_name,
+                "problem_name": problem_name,
+                "env_prop": self.env_prop,
+            }
+            if config[0]:
+                agent_kwargs["algorithm"] = config[0]
+            if config[1]:
+                agent_kwargs["num_timesteps"] = config[1]
+            agent = self.rl_agent_type(**agent_kwargs)
+            agent.learn()
+            self.agents[problem_name] = agent
+        self.action_space = next(iter(self.agents.values())).env.action_space
-	def goals_adaptation_phase(self, dynamic_goals: List[str], dynamic_train_configs):
-		super().goals_adaptation_phase(dynamic_goals, dynamic_train_configs)
-		dynamic_goals_problems = [self.env_prop.goal_to_problem_str(goal) for goal in dynamic_goals]
-		self.active_goals = dynamic_goals
-		self.active_problems = dynamic_goals_problems
-		for problem_name, config in zip(dynamic_goals_problems, dynamic_train_configs):
-			agent_kwargs = {"domain_name": self.env_prop.domain_name,
-							"problem_name": problem_name}
-			if config[0]: agent_kwargs["algorithm"] = config[0]
-			if config[1]: agent_kwargs["num_timesteps"] = config[1]
-			agent = self.rl_agent_type(**agent_kwargs)
-			agent.learn()
-			self.agents[problem_name] = agent
-		self.action_space = next(iter(self.agents.values())).env.action_space
+    def inference_phase(self, inf_sequence, true_goal, percentage) -> str:
+        scores = []
+        for problem_name in self.active_problems:
+            agent = self.choose_agent(problem_name)
+            if self.env_prop.gc_adaptable():
+                assert (
+                    self.__class__.__name__ == "GCDraco"
+                ), "This recognizer is not compatible with goal conditioned problems."
+                inf_sequence = self.prepare_inf_sequence(problem_name, inf_sequence)
+            score = self.evaluation_function(inf_sequence, agent, self.action_space)
+            scores.append(score)
+        # scores = metrics.softmin(np.array(scores))
+        if self.collect_statistics:
+            results_path = get_gr_as_rl_experiment_confidence_path(
+                domain_name=self.env_prop.domain_name,
+                env_name=self.env_prop.name,
+                recognizer=self.__class__.__name__,
+            )
+            if not os.path.exists(results_path):
+                os.makedirs(results_path)
+            with open(
+                results_path + f"/true_{true_goal}_{percentage}_scores.pkl", "wb"
+            ) as scores_file:
+                dill.dump(
+                    [
+                        (str(goal), score)
+                        for (goal, score) in zip(self.active_goals, scores)
+                    ],
+                    scores_file,
+                )
+        div, true_goal_index = min((div, goal) for (goal, div) in enumerate(scores))
+        return str(self.active_goals[true_goal_index])
-	def inference_phase(self, inf_sequence, true_goal, percentage) -> str:
-		scores = []
-		for problem_name in self.active_problems:
-			agent = self.choose_agent(problem_name)
-			if self.env_prop.gc_adaptable():
-				assert self.__class__.__name__ == "GCDraco", "This recognizer is not compatible with goal conditioned problems."
-				inf_sequence = self.prepare_inf_sequence(problem_name, inf_sequence)
-			score = self.evaluation_function(inf_sequence, agent, self.action_space)
-			scores.append(score)
-		#scores = metrics.softmin(np.array(scores))
-		if self.collect_statistics:
-			results_path = get_gr_as_rl_experiment_confidence_path(domain_name=self.env_prop.domain_name, env_name=self.env_prop.name, recognizer=self.__class__.__name__)
-			if not os.path.exists(results_path): os.makedirs(results_path)
-			with open(results_path + f'/true_{true_goal}_{percentage}_scores.pkl', 'wb') as scores_file:
-				dill.dump([(str(goal), score) for (goal, score) in zip(self.active_goals, scores)], scores_file)
-		div, true_goal_index = min((div, goal) for (goal, div) in enumerate(scores))
-		return str(self.active_goals[true_goal_index])
-	def choose_agent(self, problem_name:str) -> RLAgent:
-		return self.agents[problem_name]
+    def choose_agent(self, problem_name: str) -> RLAgent:
+        return self.agents[problem_name]
 class Graql(GRAsRL, GaAgentTrainerRecognizer):
-	def __init__(self, *args, **kwargs):
-		super().__init__(*args, **kwargs)
-		assert not self.env_prop.gc_adaptable() and self.env_prop.is_state_discrete() and self.env_prop.is_action_discrete()
-		if self.rl_agent_type==None: self.rl_agent_type = TabularQLearner
-		self.evaluation_function = kl_divergence_norm_softmax
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        assert (
+            not self.env_prop.gc_adaptable()
+            and self.env_prop.is_state_discrete()
+            and self.env_prop.is_action_discrete()
+        )
+        if self.rl_agent_type == None:
+            self.rl_agent_type = TabularQLearner
+        self.evaluation_function = kl_divergence_norm_softmax
 class Draco(GRAsRL, GaAgentTrainerRecognizer):
-	def __init__(self, *args, **kwargs):
-		super().__init__(*args, **kwargs)
-		assert not self.env_prop.is_state_discrete() and not self.env_prop.is_action_discrete()
-		if self.rl_agent_type==None: self.rl_agent_type = DeepRLAgent
-		self.evaluation_function = mean_wasserstein_distance
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        assert (
+            not self.env_prop.is_state_discrete()
+            and not self.env_prop.is_action_discrete()
+        )
+        if self.rl_agent_type == None:
+            self.rl_agent_type = DeepRLAgent
+        self.evaluation_function = kwargs.get("evaluation_function")
+        assert (
+            self.evaluation_function is None
+            or type(self.evaluation_function) != Callable
+        )
+class GCDraco(
+    GRAsRL, LearningRecognizer, GaAdaptingRecognizer
+):  # TODO problem: it gets 2 goal_adaptation phase from parents, one with configs and one without.
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        assert (
+            self.env_prop.gc_adaptable()
+            and not self.env_prop.is_state_discrete()
+            and not self.env_prop.is_action_discrete()
+        )
+        if self.rl_agent_type == None:
+            self.rl_agent_type = GCDeepRLAgent
+        self.evaluation_function = kwargs.get("evaluation_function")
+        assert (
+            self.evaluation_function is None
+            or type(self.evaluation_function) != Callable
+        )
+    def domain_learning_phase(self, base_goals: List[str], train_configs):
+        super().domain_learning_phase(base_goals, train_configs)
+        agent_kwargs = {
+            "domain_name": self.env_prop.domain_name,
+            "problem_name": self.env_prop.name,
+            "algorithm": self.original_train_configs[0][0],
+            "num_timesteps": self.original_train_configs[0][1],
+            "env_prop": self.env_prop,
+        }
+        agent = self.rl_agent_type(**agent_kwargs)
+        agent.learn()
+        self.agents[self.env_prop.name] = agent
+        self.action_space = agent.env.action_space
-class GCDraco(GRAsRL, LearningRecognizer, GaAdaptingRecognizer): # TODO problem: it gets 2 goal_adaptation phase from parents, one with configs and one without.
-	def __init__(self, *args, **kwargs):
-		super().__init__(*args, **kwargs)
-		assert self.env_prop.gc_adaptable() and not self.env_prop.is_state_discrete() and not self.env_prop.is_action_discrete()
-		self.evaluation_function = mean_wasserstein_distance
-		if self.rl_agent_type==None: self.rl_agent_type = GCDeepRLAgent
+    # this method currently does nothing but optimizations can be made here.
+    def goals_adaptation_phase(self, dynamic_goals):
+        self.active_goals = dynamic_goals
+        self.active_problems = [
+            self.env_prop.goal_to_problem_str(goal) for goal in dynamic_goals
+        ]
-	def domain_learning_phase(self, base_goals: List[str], train_configs):
-		super().domain_learning_phase(base_goals, train_configs)
-		agent_kwargs = {"domain_name": self.env_prop.domain_name,
-						"problem_name": self.env_prop.name,
-						"algorithm": self.original_train_configs[0][0],
-						"num_timesteps": self.original_train_configs[0][1]}
-		agent = self.rl_agent_type(**agent_kwargs)
-		agent.learn()
-		self.agents[self.env_prop.name] = agent
-		self.action_space = agent.env.action_space
+    def choose_agent(self, problem_name: str) -> RLAgent:
+        return next(iter(self.agents.values()))
-	# this method currently does nothing but optimizations can be made here.
-	def goals_adaptation_phase(self, dynamic_goals):
-		self.active_goals = dynamic_goals
-		self.active_problems = [self.env_prop.goal_to_problem_str(goal) for goal in dynamic_goals]
-	def choose_agent(self, problem_name:str) -> RLAgent:
-		return next(iter(self.agents.values()))
-	def prepare_inf_sequence(self, problem_name: str, inf_sequence):
-		if not self.env_prop.use_goal_directed_problem():
-			for obs in inf_sequence:
-				obs[0]['desired_goal'] = np.array([self.env_prop.str_to_goal(problem_name)], dtype=obs[0]['desired_goal'].dtype)
-			return inf_sequence
-		return inf_sequence
+    def prepare_inf_sequence(self, problem_name: str, inf_sequence):
+        if not self.env_prop.use_goal_directed_problem():
+            for obs in inf_sequence:
+                obs[0]["desired_goal"] = np.array(
+                    [self.env_prop.str_to_goal(problem_name)],
+                    dtype=obs[0]["desired_goal"].dtype,
+                )
+            return inf_sequence
+        return inf_sequence

gr_libs/recognizer/graml/gr_dataset.py CHANGED Viewed

@@ -12,123 +12,222 @@ import os
 import dill
 import torch
 class GRDataset(Dataset):
-	def __init__(self, num_samples, samples):
-		self.num_samples = num_samples
-		self.samples = samples
+    def __init__(self, num_samples, samples):
+        self.num_samples = num_samples
+        self.samples = samples
+    def __len__(self):
+        return self.num_samples
-	def __len__(self):
-		return self.num_samples
+    def __getitem__(self, idx):
+        return self.samples[
+            idx
+        ]  # returns a tuple - as appended in 'generate_dataset' last line
-	def __getitem__(self, idx):
-		return self.samples[idx] # returns a tuple - as appended in 'generate_dataset' last line
 def check_diff_goals(first_agent_goal, second_agent_goal):
-	try:
-		assert first_agent_goal != second_agent_goal
-	except Exception as e:
-		try:
-			assert any(first_agent_goal != second_agent_goal)
-		except Exception as e:
-			for arr1, arr2 in zip(first_agent_goal, second_agent_goal):
-				assert any(elm1!=elm2 for elm1, elm2 in zip(arr1, arr2))
+    try:
+        assert first_agent_goal != second_agent_goal
+    except Exception as e:
+        try:
+            assert any(first_agent_goal != second_agent_goal)
+        except Exception as e:
+            for arr1, arr2 in zip(first_agent_goal, second_agent_goal):
+                assert any(elm1 != elm2 for elm1, elm2 in zip(arr1, arr2))
-def generate_datasets(num_samples, agents: List[ContextualAgent], observation_creation_method : MethodType, problems: List[str], env_prop:EnvProperty, recognizer_name:str, gc_goal_set=None):
-	if gc_goal_set: model_name = env_prop.name
-	else: model_name = env_prop.problem_list_to_str_tuple(problems)
-	dataset_directory = get_siamese_dataset_path(domain_name=env_prop.domain_name, env_name=env_prop.name, model_name=model_name, recognizer=recognizer_name)
-	dataset_train_path, dataset_dev_path = os.path.join(dataset_directory, 'train.pkl'), os.path.join(dataset_directory, 'dev.pkl')
-	if os.path.exists(dataset_train_path) and os.path.exists(dataset_dev_path):
-		print(f"Loading pre-existing datasets in {dataset_directory}")
-		with open(dataset_train_path, 'rb') as train_file:
-			train_samples = dill.load(train_file)
-		with open(dataset_dev_path, 'rb') as dev_file:
-			dev_samples = dill.load(dev_file)
-	else:
-		print(f"{dataset_directory} doesn't exist, generating datasets")
-		if not os.path.exists(dataset_directory):
-			os.makedirs(dataset_directory)
-		all_samples = []
-		for i in range(num_samples):
-			if gc_goal_set != None: # TODO change to having one flow for both cases and injecting according to gc_goal_set or not
-				assert env_prop.gc_adaptable() == True, "shouldn't specify a goal directed representation if not generating datasets with a general agent."
-				is_same_goal = (np.random.choice([1, 0], 1, p=[1/max(len(gc_goal_set), 6), 1 - 1/max(len(gc_goal_set), 6)]))[0]
-				first_is_consecutive = np.random.choice([True, False], 1, p=[0.5, 0.5])[0]
-				first_random_index = np.random.randint(0, len(gc_goal_set)) # works for lists of every object type, while np.choice only works for 1d arrays
-				first_agent_goal = gc_goal_set[first_random_index] # could be either a real goal or a goal-directed problem name
-				#first_agent_goal = np.random.choice(gc_goal_set)
-				first_trace_percentage = random.choice([0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1])
-				first_observation = []
-				first_agent_kwargs = {
-					"action_selection_method": observation_creation_method,
-					"percentage": first_trace_percentage,
-					"is_consecutive": first_is_consecutive,
-					"save_fig": False
-				}
-				while first_observation == []:
-					# needs to be different than agents[0] problem_name, it should be from the gc_goal_set.
-					# but the problem is with the panda because it
-					if env_prop.use_goal_directed_problem(): first_agent_kwargs["goal_directed_problem"] = first_agent_goal
-					else: first_agent_kwargs["goal_directed_goal"] = first_agent_goal
-					first_observation = agents[0].agent.generate_partial_observation(**first_agent_kwargs)
-				first_observation = agents[0].agent.simplify_observation(first_observation)
+def generate_datasets(
+    num_samples,
+    agents: List[ContextualAgent],
+    observation_creation_method: MethodType,
+    problems: List[str],
+    env_prop: EnvProperty,
+    recognizer_name: str,
+    gc_goal_set=None,
+):
+    if gc_goal_set:
+        model_name = env_prop.name
+    else:
+        model_name = env_prop.problem_list_to_str_tuple(problems)
+    dataset_directory = get_siamese_dataset_path(
+        domain_name=env_prop.domain_name,
+        env_name=env_prop.name,
+        model_name=model_name,
+        recognizer=recognizer_name,
+    )
+    dataset_train_path, dataset_dev_path = os.path.join(
+        dataset_directory, "train.pkl"
+    ), os.path.join(dataset_directory, "dev.pkl")
+    if os.path.exists(dataset_train_path) and os.path.exists(dataset_dev_path):
+        print(f"Loading pre-existing datasets in {dataset_directory}")
+        with open(dataset_train_path, "rb") as train_file:
+            train_samples = dill.load(train_file)
+        with open(dataset_dev_path, "rb") as dev_file:
+            dev_samples = dill.load(dev_file)
+    else:
+        print(f"{dataset_directory} doesn't exist, generating datasets")
+        if not os.path.exists(dataset_directory):
+            os.makedirs(dataset_directory)
+        all_samples = []
+        for i in range(num_samples):
+            if (
+                gc_goal_set != None
+            ):  # TODO change to having one flow for both cases and injecting according to gc_goal_set or not
+                assert (
+                    env_prop.gc_adaptable() == True
+                ), "shouldn't specify a goal directed representation if not generating datasets with a general agent."
+                is_same_goal = (
+                    np.random.choice(
+                        [1, 0],
+                        1,
+                        p=[
+                            1 / max(len(gc_goal_set), 6),
+                            1 - 1 / max(len(gc_goal_set), 6),
+                        ],
+                    )
+                )[0]
+                first_is_consecutive = np.random.choice([True, False], 1, p=[0.5, 0.5])[
+                    0
+                ]
+                first_random_index = np.random.randint(
+                    0, len(gc_goal_set)
+                )  # works for lists of every object type, while np.choice only works for 1d arrays
+                first_agent_goal = gc_goal_set[
+                    first_random_index
+                ]  # could be either a real goal or a goal-directed problem name
+                # first_agent_goal = np.random.choice(gc_goal_set)
+                first_trace_percentage = random.choice(
+                    [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
+                )
+                first_observation = []
+                first_agent_kwargs = {
+                    "action_selection_method": observation_creation_method,
+                    "percentage": first_trace_percentage,
+                    "is_consecutive": first_is_consecutive,
+                    "save_fig": False,
+                }
+                while first_observation == []:
+                    # needs to be different than agents[0] problem_name, it should be from the gc_goal_set.
+                    # but the problem is with the panda because it
+                    if env_prop.use_goal_directed_problem():
+                        first_agent_kwargs["goal_directed_problem"] = first_agent_goal
+                    else:
+                        first_agent_kwargs["goal_directed_goal"] = first_agent_goal
+                    first_observation = agents[0].agent.generate_partial_observation(
+                        **first_agent_kwargs
+                    )
+                first_observation = agents[0].agent.simplify_observation(
+                    first_observation
+                )
-				second_is_consecutive = np.random.choice([True, False], 1, p=[0.5, 0.5])[0]
-				second_agent_goal = first_agent_goal
-				second_random_index = first_random_index
-				if not is_same_goal:
-					second_random_index = np.random.choice([i for i in range(len(gc_goal_set)) if i != first_random_index])
-					assert first_random_index != second_random_index
-				second_agent_goal = gc_goal_set[second_random_index]
-				if not is_same_goal: check_diff_goals(first_agent_goal, second_agent_goal)
-				second_trace_percentage = first_trace_percentage
-				second_observation = []
-				second_agent_kwargs = {
-					"action_selection_method": observation_creation_method,
-					"percentage": second_trace_percentage,
-					"is_consecutive": second_is_consecutive,
-					"save_fig": False
-				}
-				while second_observation == []:
-					if env_prop.use_goal_directed_problem() == True: second_agent_kwargs["goal_directed_problem"] = second_agent_goal
-					else: second_agent_kwargs["goal_directed_goal"] = second_agent_goal
-					second_observation = agents[0].agent.generate_partial_observation(**second_agent_kwargs)
-				second_observation = agents[0].agent.simplify_observation(second_observation)
-			else:
-				is_same_goal = (np.random.choice([1, 0], 1, p=[1/max(len(agents), 6), 1 - 1/max(len(agents), 6)]))[0]
-				first_is_consecutive = np.random.choice([True, False], 1, p=[0.5, 0.5])[0]
-				first_agent = np.random.choice(agents)
-				first_trace_percentage = random.choice([0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1])
-				first_observation = first_agent.agent.generate_partial_observation(action_selection_method=observation_creation_method, percentage=first_trace_percentage, is_consecutive=first_is_consecutive, save_fig=False, random_optimalism=True)
-				first_observation = first_agent.agent.simplify_observation(first_observation)
-				second_agent = first_agent
-				if not is_same_goal:
-					second_agent = np.random.choice([agent for agent in agents if agent != first_agent])
-					assert second_agent != first_agent
-				second_is_consecutive = np.random.choice([True, False], 1, p=[0.5, 0.5])[0]
-				second_trace_percentage = first_trace_percentage
-				second_observation = second_agent.agent.generate_partial_observation(action_selection_method=observation_creation_method, percentage=second_trace_percentage, is_consecutive=second_is_consecutive, save_fig=False, random_optimalism=True)
-				second_observation = second_agent.agent.simplify_observation(second_observation)
-				if is_same_goal:
-					observations_distance = measure_average_sequence_distance(first_observation, second_observation) # for debugging mate
-			all_samples.append((
-				[torch.tensor(observation, dtype=torch.float32) for observation in first_observation],
-				[torch.tensor(observation, dtype=torch.float32) for observation in second_observation],
-				torch.tensor(is_same_goal, dtype=torch.float32)))
-			# all_samples.append((first_observation, second_observation, torch.tensor(is_same_goal, dtype=torch.float32)))
-			if i % 1000 == 0:
-				print(f'generated {i} samples')
+                second_is_consecutive = np.random.choice(
+                    [True, False], 1, p=[0.5, 0.5]
+                )[0]
+                second_agent_goal = first_agent_goal
+                second_random_index = first_random_index
+                if not is_same_goal:
+                    second_random_index = np.random.choice(
+                        [i for i in range(len(gc_goal_set)) if i != first_random_index]
+                    )
+                    assert first_random_index != second_random_index
+                second_agent_goal = gc_goal_set[second_random_index]
+                if not is_same_goal:
+                    check_diff_goals(first_agent_goal, second_agent_goal)
+                second_trace_percentage = first_trace_percentage
+                second_observation = []
+                second_agent_kwargs = {
+                    "action_selection_method": observation_creation_method,
+                    "percentage": second_trace_percentage,
+                    "is_consecutive": second_is_consecutive,
+                    "save_fig": False,
+                }
+                while second_observation == []:
+                    if env_prop.use_goal_directed_problem() == True:
+                        second_agent_kwargs["goal_directed_problem"] = second_agent_goal
+                    else:
+                        second_agent_kwargs["goal_directed_goal"] = second_agent_goal
+                    second_observation = agents[0].agent.generate_partial_observation(
+                        **second_agent_kwargs
+                    )
+                second_observation = agents[0].agent.simplify_observation(
+                    second_observation
+                )
+            else:
+                is_same_goal = (
+                    np.random.choice(
+                        [1, 0],
+                        1,
+                        p=[1 / max(len(agents), 6), 1 - 1 / max(len(agents), 6)],
+                    )
+                )[0]
+                first_is_consecutive = np.random.choice([True, False], 1, p=[0.5, 0.5])[
+                    0
+                ]
+                first_agent = np.random.choice(agents)
+                first_trace_percentage = random.choice(
+                    [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
+                )
+                first_observation = first_agent.agent.generate_partial_observation(
+                    action_selection_method=observation_creation_method,
+                    percentage=first_trace_percentage,
+                    is_consecutive=first_is_consecutive,
+                    save_fig=False,
+                    random_optimalism=True,
+                )
+                first_observation = first_agent.agent.simplify_observation(
+                    first_observation
+                )
-		total_samples = len(all_samples)
-		train_size = int(0.8 * total_samples)
-		train_samples = all_samples[:train_size]
-		dev_samples = all_samples[train_size:]
-		with open(dataset_train_path, 'wb') as train_file:
-			dill.dump(train_samples, train_file)
-		with open(dataset_dev_path, 'wb') as dev_file:
-			dill.dump(dev_samples, dev_file)
+                second_agent = first_agent
+                if not is_same_goal:
+                    second_agent = np.random.choice(
+                        [agent for agent in agents if agent != first_agent]
+                    )
+                    assert second_agent != first_agent
+                second_is_consecutive = np.random.choice(
+                    [True, False], 1, p=[0.5, 0.5]
+                )[0]
+                second_trace_percentage = first_trace_percentage
+                second_observation = second_agent.agent.generate_partial_observation(
+                    action_selection_method=observation_creation_method,
+                    percentage=second_trace_percentage,
+                    is_consecutive=second_is_consecutive,
+                    save_fig=False,
+                    random_optimalism=True,
+                )
+                second_observation = second_agent.agent.simplify_observation(
+                    second_observation
+                )
+                if is_same_goal:
+                    observations_distance = measure_average_sequence_distance(
+                        first_observation, second_observation
+                    )  # for debugging mate
+            all_samples.append(
+                (
+                    [
+                        torch.tensor(observation, dtype=torch.float32)
+                        for observation in first_observation
+                    ],
+                    [
+                        torch.tensor(observation, dtype=torch.float32)
+                        for observation in second_observation
+                    ],
+                    torch.tensor(is_same_goal, dtype=torch.float32),
+                )
+            )
+            # all_samples.append((first_observation, second_observation, torch.tensor(is_same_goal, dtype=torch.float32)))
+            if i % 1000 == 0:
+                print(f"generated {i} samples")
-	return train_samples, dev_samples
+        total_samples = len(all_samples)
+        train_size = int(0.8 * total_samples)
+        train_samples = all_samples[:train_size]
+        dev_samples = all_samples[train_size:]
+        with open(dataset_train_path, "wb") as train_file:
+            dill.dump(train_samples, train_file)
+        with open(dataset_dev_path, "wb") as dev_file:
+            dill.dump(dev_samples, dev_file)
+    return train_samples, dev_samples

gr-libs 0.1.7.post0__py3-none-any.whl → 0.1.8__py3-none-any.whl

gr-libs 0.1.7.post0py3-none-any.whl → 0.1.8py3-none-any.whl