PyPI - gr-libs - Versions diffs - 0.1.3__py3-none-any.whl - Mend

gr-libs 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

evaluation/analyze_results_cross_alg_cross_domain.py +277 -0
evaluation/create_minigrid_map_image.py +34 -0
evaluation/file_system.py +42 -0
evaluation/generate_experiments_results.py +92 -0
evaluation/generate_experiments_results_new_ver1.py +254 -0
evaluation/generate_experiments_results_new_ver2.py +331 -0
evaluation/generate_task_specific_statistics_plots.py +272 -0
evaluation/get_plans_images.py +47 -0
evaluation/increasing_and_decreasing_.py +63 -0
gr_libs/__init__.py +2 -0
gr_libs/environment/__init__.py +0 -0
gr_libs/environment/environment.py +227 -0
gr_libs/environment/utils/__init__.py +0 -0
gr_libs/environment/utils/utils.py +17 -0
gr_libs/metrics/__init__.py +0 -0
gr_libs/metrics/metrics.py +224 -0
gr_libs/ml/__init__.py +6 -0
gr_libs/ml/agent.py +56 -0
gr_libs/ml/base/__init__.py +1 -0
gr_libs/ml/base/rl_agent.py +54 -0
gr_libs/ml/consts.py +22 -0
gr_libs/ml/neural/__init__.py +3 -0
gr_libs/ml/neural/deep_rl_learner.py +395 -0
gr_libs/ml/neural/utils/__init__.py +2 -0
gr_libs/ml/neural/utils/dictlist.py +33 -0
gr_libs/ml/neural/utils/penv.py +57 -0
gr_libs/ml/planner/__init__.py +0 -0
gr_libs/ml/planner/mcts/__init__.py +0 -0
gr_libs/ml/planner/mcts/mcts_model.py +330 -0
gr_libs/ml/planner/mcts/utils/__init__.py +2 -0
gr_libs/ml/planner/mcts/utils/node.py +33 -0
gr_libs/ml/planner/mcts/utils/tree.py +102 -0
gr_libs/ml/sequential/__init__.py +1 -0
gr_libs/ml/sequential/lstm_model.py +192 -0
gr_libs/ml/tabular/__init__.py +3 -0
gr_libs/ml/tabular/state.py +21 -0
gr_libs/ml/tabular/tabular_q_learner.py +453 -0
gr_libs/ml/tabular/tabular_rl_agent.py +126 -0
gr_libs/ml/utils/__init__.py +6 -0
gr_libs/ml/utils/env.py +7 -0
gr_libs/ml/utils/format.py +100 -0
gr_libs/ml/utils/math.py +13 -0
gr_libs/ml/utils/other.py +24 -0
gr_libs/ml/utils/storage.py +127 -0
gr_libs/recognizer/__init__.py +0 -0
gr_libs/recognizer/gr_as_rl/__init__.py +0 -0
gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +102 -0
gr_libs/recognizer/graml/__init__.py +0 -0
gr_libs/recognizer/graml/gr_dataset.py +134 -0
gr_libs/recognizer/graml/graml_recognizer.py +266 -0
gr_libs/recognizer/recognizer.py +46 -0
gr_libs/recognizer/utils/__init__.py +1 -0
gr_libs/recognizer/utils/format.py +13 -0
gr_libs-0.1.3.dist-info/METADATA +197 -0
gr_libs-0.1.3.dist-info/RECORD +62 -0
gr_libs-0.1.3.dist-info/WHEEL +5 -0
gr_libs-0.1.3.dist-info/top_level.txt +3 -0
tutorials/graml_minigrid_tutorial.py +30 -0
tutorials/graml_panda_tutorial.py +32 -0
tutorials/graml_parking_tutorial.py +38 -0
tutorials/graml_point_maze_tutorial.py +43 -0
tutorials/graql_minigrid_tutorial.py +29 -0

gr_libs/ml/utils/format.py ADDED Viewed

@@ -0,0 +1,100 @@
+import numpy
+import re
+import torch
+import gr_libs.ml
+import gymnasium as gym
+import random
+def get_obss_preprocessor(obs_space):
+	# Check if obs_space is an image space
+	if isinstance(obs_space, gym.spaces.Box):
+		obs_space = {"image": obs_space.shape}
+		def preprocess_obss(obss, device=None):
+			return ml.DictList({
+				"image": preprocess_images(obss, device=device)
+			})
+	# Check if it is a MiniGrid observation space
+	elif isinstance(obs_space, gym.spaces.Dict) and "image" in obs_space.spaces.keys():
+		obs_space = {"image": obs_space.spaces["image"].shape, "text": 100}
+		vocab = Vocabulary(obs_space["text"])
+		def preprocess_obss(obss, device=None):
+			return ml.DictList({
+				"image": preprocess_images([obs["image"] for obs in obss], device=device),
+				"text": preprocess_texts([obs["mission"] for obs in obss], vocab, device=device)
+			})
+		preprocess_obss.vocab = vocab
+	# Check if it is a MiniGrid observation space
+	elif isinstance(obs_space, gym.spaces.Dict) and "observation" in obs_space.spaces.keys():
+		obs_space = {"observation": obs_space.spaces["observation"].shape}
+		def preprocess_obss(obss, device=None):
+			return ml.DictList({
+				"observation": preprocess_images(obss, device=device)
+		})
+	else:
+		raise ValueError("Unknown observation space: " + str(obs_space))
+	return obs_space, preprocess_obss
+def preprocess_images(images, device=None):
+	# Bug of Pytorch: very slow if not first converted to numpy array
+	images = numpy.array(images)
+	return torch.tensor(images, device=device, dtype=torch.float)
+def random_subset_with_order(sequence, subset_size, is_consecutive = True):
+	if subset_size >= len(sequence):
+		return sequence
+	else:
+		if is_consecutive:
+			indices_to_select = [i for i in range(subset_size)]
+		else:
+			indices_to_select = sorted(random.sample(range(len(sequence)), subset_size))  # Randomly select indices to keep
+		return [sequence[i] for i in indices_to_select]  # Return the elements corresponding to the selected indices
+def preprocess_texts(texts, vocab, device=None):
+	var_indexed_texts = []
+	max_text_len = 0
+	for text in texts:
+		tokens = re.findall("([a-z]+)", text.lower())
+		var_indexed_text = numpy.array([vocab[token] for token in tokens])
+		var_indexed_texts.append(var_indexed_text)
+		max_text_len = max(len(var_indexed_text), max_text_len)
+	indexed_texts = numpy.zeros((len(texts), max_text_len))
+	for i, indexed_text in enumerate(var_indexed_texts):
+		indexed_texts[i, :len(indexed_text)] = indexed_text
+	return torch.tensor(indexed_texts, device=device, dtype=torch.long)
+class Vocabulary:
+	"""A mapping from tokens to ids with a capacity of `max_size` words.
+	It can be saved in a `vocab.json` file."""
+	def __init__(self, max_size):
+		self.max_size = max_size
+		self.vocab = {}
+	def load_vocab(self, vocab):
+		self.vocab = vocab
+	def __getitem__(self, token):
+		if not token in self.vocab.keys():
+			if len(self.vocab) >= self.max_size:
+				raise ValueError("Maximum vocabulary capacity reached")
+			self.vocab[token] = len(self.vocab) + 1
+		return self.vocab[token]

gr_libs/ml/utils/math.py ADDED Viewed

@@ -0,0 +1,13 @@
+import math
+from typing import Callable, Generator, List
+def softmax(values: List[float]) -> List[float]:
+    """Computes softmax probabilities for an array of values
+    TODO We should probably use numpy arrays here
+    Args:
+        values (np.array): Input values for which to compute softmax
+    Returns:
+        np.array: softmax probabilities
+    """
+    return [(math.exp(q)) / sum([math.exp(_q) for _q in values]) for q in values]

gr_libs/ml/utils/other.py ADDED Viewed

@@ -0,0 +1,24 @@
+import random
+import numpy
+import torch
+import collections
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+def seed(seed):
+    random.seed(seed)
+    numpy.random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(seed)
+def synthesize(array):
+    d = collections.OrderedDict()
+    d["mean"] = numpy.mean(array)
+    d["std"] = numpy.std(array)
+    d["min"] = numpy.amin(array)
+    d["max"] = numpy.amax(array)
+    return d

gr_libs/ml/utils/storage.py ADDED Viewed

@@ -0,0 +1,127 @@
+import csv
+import os
+import torch
+import logging
+import sys
+from .other import device
+def create_folders_if_necessary(path):
+    if not os.path.exists(path):
+        os.makedirs(path)
+def get_storage_framework_dir(recognizer: str):
+    return os.path.join(get_storage_dir(),recognizer)
+def get_storage_dir():
+    return "dataset"
+def _get_models_directory_name():
+    return "models"
+def _get_siamese_datasets_directory_name():
+    return "siamese_datasets"
+def _get_observations_directory_name():
+    return "observations"
+def get_observation_file_name(observability_percentage: float):
+    return 'obs' + str(observability_percentage) + '.pkl'
+def get_domain_dir(domain_name, recognizer:str):
+    return os.path.join(get_storage_framework_dir(recognizer), domain_name)
+def get_env_dir(domain_name, env_name, recognizer:str):
+    return os.path.join(get_domain_dir(domain_name, recognizer), env_name)
+def get_observations_dir(domain_name, env_name, recognizer:str):
+    return os.path.join(get_env_dir(domain_name=domain_name, env_name=env_name, recognizer=recognizer), _get_observations_directory_name())
+def get_agent_model_dir(domain_name, model_name, class_name):
+    return os.path.join(get_storage_dir(), _get_models_directory_name(), domain_name, model_name, class_name)
+def get_lstm_model_dir(domain_name, env_name, model_name, recognizer:str):
+    return os.path.join(get_env_dir(domain_name=domain_name, env_name=env_name, recognizer=recognizer), model_name)
+def get_models_dir(domain_name, env_name, recognizer:str):
+    return os.path.join(get_env_dir(domain_name=domain_name, env_name=env_name, recognizer=recognizer), _get_models_directory_name())
+### GRAML PATHS ###
+def get_siamese_dataset_path(domain_name, env_name, model_name, recognizer:str):
+    return os.path.join(get_lstm_model_dir(domain_name, env_name, model_name, recognizer), _get_siamese_datasets_directory_name())
+def get_embeddings_result_path(domain_name, env_name, recognizer:str):
+    return os.path.join(get_env_dir(domain_name, env_name=env_name, recognizer=recognizer), "goal_embeddings")
+def get_embeddings_result_path(domain_name, env_name, recognizer:str):
+    return os.path.join(get_env_dir(domain_name, env_name=env_name, recognizer=recognizer), "goal_embeddings")
+def get_and_create(path):
+    create_folders_if_necessary(path)
+    return path
+def get_experiment_results_path(domain, env_name, task, recognizer:str):
+    return os.path.join(get_env_dir(domain, env_name=env_name, recognizer=recognizer), "experiment_results", env_name, task, "experiment_results")
+def get_plans_result_path(domain_name, env_name, recognizer:str):
+    return os.path.join(get_env_dir(domain_name, env_name=env_name, recognizer=recognizer), "plans")
+def get_policy_sequences_result_path(domain_name, env_name, recognizer:str):
+    return os.path.join(get_env_dir(domain_name, env_name, recognizer=recognizer), "policy_sequences")
+### END GRAML PATHS ###
+''
+### GRAQL PATHS ###
+def get_gr_as_rl_experiment_confidence_path(domain_name, env_name, recognizer:str):
+    return os.path.join(get_env_dir(domain_name=domain_name, env_name=env_name, recognizer=recognizer), "experiments")
+### GRAQL PATHS ###
+def get_status_path(model_dir):
+    return os.path.join(model_dir, "status.pt")
+def get_status(model_dir):
+    path = get_status_path(model_dir)
+    return torch.load(path, map_location=device)
+def save_status(status, model_dir):
+    path = get_status_path(model_dir)
+    utils.create_folders_if_necessary(path)
+    torch.save(status, path)
+def get_vocab(model_dir):
+    return get_status(model_dir)["vocab"]
+def get_model_state(model_dir):
+    return get_status(model_dir)["model_state"]
+def get_txt_logger(model_dir):
+    path = os.path.join(model_dir, "log.txt")
+    utils.create_folders_if_necessary(path)
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(message)s",
+        handlers=[
+            logging.FileHandler(filename=path),
+            logging.StreamHandler(sys.stdout)
+        ]
+    )
+    return logging.getLogger()
+def get_csv_logger(model_dir):
+    csv_path = os.path.join(model_dir, "log.csv")
+    utils.create_folders_if_necessary(csv_path)
+    csv_file = open(csv_path, "a")
+    return csv_file, csv.writer(csv_file)

gr_libs/recognizer/__init__.py ADDED Viewed

File without changes

gr_libs/recognizer/gr_as_rl/__init__.py ADDED Viewed

File without changes

gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py ADDED Viewed

@@ -0,0 +1,102 @@
+from abc import abstractmethod
+import os
+import dill
+from typing import List, Type
+import numpy as np
+from gr_libs.environment.environment import EnvProperty, GCEnvProperty
+from gr_libs.environment.utils.utils import domain_to_env_property
+from gr_libs.metrics.metrics import kl_divergence_norm_softmax, mean_wasserstein_distance
+from gr_libs.ml.base import RLAgent
+from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
+from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
+from gr_libs.ml.utils.storage import get_gr_as_rl_experiment_confidence_path
+from gr_libs.recognizer.recognizer import GaAdaptingRecognizer, GaAgentTrainerRecognizer, LearningRecognizer, Recognizer
+class GRAsRL(Recognizer):
+	def __init__(self, *args, **kwargs):
+		super().__init__(*args, **kwargs)
+		self.agents = {} # consider changing to ContextualAgent
+	def goals_adaptation_phase(self, dynamic_goals: List[str], dynamic_train_configs):
+		super().goals_adaptation_phase(dynamic_goals, dynamic_train_configs)
+		dynamic_goals_problems = [self.env_prop.goal_to_problem_str(goal) for goal in dynamic_goals]
+		self.active_goals = dynamic_goals
+		self.active_problems = dynamic_goals_problems
+		for problem_name, config in zip(dynamic_goals_problems, dynamic_train_configs):
+			agent_kwargs = {"domain_name": self.env_prop.domain_name,
+							"problem_name": problem_name}
+			if config[0]: agent_kwargs["algorithm"] = config[0]
+			if config[1]: agent_kwargs["num_timesteps"] = config[1]
+			agent = self.rl_agent_type(**agent_kwargs)
+			agent.learn()
+			self.agents[problem_name] = agent
+		self.action_space = next(iter(self.agents.values())).env.action_space
+	def inference_phase(self, inf_sequence, true_goal, percentage) -> str:
+		scores = []
+		for problem_name in self.active_problems:
+			agent = self.choose_agent(problem_name)
+			if self.env_prop.gc_adaptable():
+				assert self.__class__.__name__ == "GCDraco", "This recognizer is not compatible with goal conditioned problems."
+				inf_sequence = self.prepare_inf_sequence(problem_name, inf_sequence)
+			score = self.evaluation_function(inf_sequence, agent, self.action_space)
+			scores.append(score)
+		#scores = metrics.softmin(np.array(scores))
+		if self.collect_statistics:
+			results_path = get_gr_as_rl_experiment_confidence_path(domain_name=self.env_prop.domain_name, env_name=self.env_prop.name, recognizer=self.__class__.__name__)
+			if not os.path.exists(results_path): os.makedirs(results_path)
+			with open(results_path + f'/true_{true_goal}_{percentage}_scores.pkl', 'wb') as scores_file:
+				dill.dump([(str(goal), score) for (goal, score) in zip(self.active_goals, scores)], scores_file)
+		div, true_goal_index = min((div, goal) for (goal, div) in enumerate(scores))
+		return str(self.active_goals[true_goal_index])
+	def choose_agent(self, problem_name:str) -> RLAgent:
+		return self.agents[problem_name]
+class Graql(GRAsRL, GaAgentTrainerRecognizer):
+	def __init__(self, *args, **kwargs):
+		super().__init__(*args, **kwargs)
+		assert not self.env_prop.gc_adaptable() and self.env_prop.is_state_discrete() and self.env_prop.is_action_discrete()
+		if self.rl_agent_type==None: self.rl_agent_type = TabularQLearner
+		self.evaluation_function = kl_divergence_norm_softmax
+class Draco(GRAsRL, GaAgentTrainerRecognizer):
+	def __init__(self, *args, **kwargs):
+		super().__init__(*args, **kwargs)
+		assert not self.env_prop.is_state_discrete() and not self.env_prop.is_action_discrete()
+		if self.rl_agent_type==None: self.rl_agent_type = DeepRLAgent
+		self.evaluation_function = mean_wasserstein_distance
+class GCDraco(GRAsRL, LearningRecognizer, GaAdaptingRecognizer): # TODO problem: it gets 2 goal_adaptation phase from parents, one with configs and one without.
+	def __init__(self, *args, **kwargs):
+		super().__init__(*args, **kwargs)
+		assert self.env_prop.gc_adaptable() and not self.env_prop.is_state_discrete() and not self.env_prop.is_action_discrete()
+		self.evaluation_function = mean_wasserstein_distance
+		if self.rl_agent_type==None: self.rl_agent_type = GCDeepRLAgent
+	def domain_learning_phase(self, base_goals: List[str], train_configs):
+		super().domain_learning_phase(base_goals, train_configs)
+		agent_kwargs = {"domain_name": self.env_prop.domain_name,
+						"problem_name": self.env_prop.name,
+						"algorithm": self.original_train_configs[0][0],
+						"num_timesteps": self.original_train_configs[0][1]}
+		agent = self.rl_agent_type(**agent_kwargs)
+		agent.learn()
+		self.agents[self.env_prop.name] = agent
+		self.action_space = agent.env.action_space
+	# this method currently does nothing but optimizations can be made here.
+	def goals_adaptation_phase(self, dynamic_goals):
+		self.active_goals = dynamic_goals
+		self.active_problems = [self.env_prop.goal_to_problem_str(goal) for goal in dynamic_goals]
+	def choose_agent(self, problem_name:str) -> RLAgent:
+		return next(iter(self.agents.values()))
+	def prepare_inf_sequence(self, problem_name: str, inf_sequence):
+		if not self.env_prop.use_goal_directed_problem():
+			for obs in inf_sequence:
+				obs[0]['desired_goal'] = np.array([self.env_prop.str_to_goal(problem_name)], dtype=obs[0]['desired_goal'].dtype)
+			return inf_sequence
+		return inf_sequence

gr_libs/recognizer/graml/__init__.py ADDED Viewed

File without changes

gr_libs/recognizer/graml/gr_dataset.py ADDED Viewed

@@ -0,0 +1,134 @@
+import numpy as np
+from torch.utils.data import Dataset
+import random
+from types import MethodType
+from typing import List
+from gr_libs.environment.environment import EnvProperty
+from gr_libs.metrics.metrics import measure_average_sequence_distance
+from gr_libs.ml.base.rl_agent import ContextualAgent
+from gr_libs.ml.utils import get_siamese_dataset_path
+from gr_libs.ml.base import RLAgent
+import os
+import dill
+import torch
+class GRDataset(Dataset):
+	def __init__(self, num_samples, samples):
+		self.num_samples = num_samples
+		self.samples = samples
+	def __len__(self):
+		return self.num_samples
+	def __getitem__(self, idx):
+		return self.samples[idx] # returns a tuple - as appended in 'generate_dataset' last line
+def check_diff_goals(first_agent_goal, second_agent_goal):
+	try:
+		assert first_agent_goal != second_agent_goal
+	except Exception as e:
+		try:
+			assert any(first_agent_goal != second_agent_goal)
+		except Exception as e:
+			for arr1, arr2 in zip(first_agent_goal, second_agent_goal):
+				assert any(elm1!=elm2 for elm1, elm2 in zip(arr1, arr2))
+def generate_datasets(num_samples, agents: List[ContextualAgent], observation_creation_method : MethodType, problems: List[str], env_prop:EnvProperty, recognizer_name:str, gc_goal_set=None):
+	if gc_goal_set: model_name = env_prop.name
+	else: model_name = env_prop.problem_list_to_str_tuple(problems)
+	dataset_directory = get_siamese_dataset_path(domain_name=env_prop.domain_name, env_name=env_prop.name, model_name=model_name, recognizer=recognizer_name)
+	dataset_train_path, dataset_dev_path = os.path.join(dataset_directory, 'train.pkl'), os.path.join(dataset_directory, 'dev.pkl')
+	if os.path.exists(dataset_train_path) and os.path.exists(dataset_dev_path):
+		print(f"Loading pre-existing datasets in {dataset_directory}")
+		with open(dataset_train_path, 'rb') as train_file:
+			train_samples = dill.load(train_file)
+		with open(dataset_dev_path, 'rb') as dev_file:
+			dev_samples = dill.load(dev_file)
+	else:
+		print(f"{dataset_directory} doesn't exist, generating datasets")
+		if not os.path.exists(dataset_directory):
+			os.makedirs(dataset_directory)
+		all_samples = []
+		for i in range(num_samples):
+			if gc_goal_set != None: # TODO change to having one flow for both cases and injecting according to gc_goal_set or not
+				assert env_prop.gc_adaptable() == True, "shouldn't specify a goal directed representation if not generating datasets with a general agent."
+				is_same_goal = (np.random.choice([1, 0], 1, p=[1/max(len(gc_goal_set), 6), 1 - 1/max(len(gc_goal_set), 6)]))[0]
+				first_is_consecutive = np.random.choice([True, False], 1, p=[0.5, 0.5])[0]
+				first_random_index = np.random.randint(0, len(gc_goal_set)) # works for lists of every object type, while np.choice only works for 1d arrays
+				first_agent_goal = gc_goal_set[first_random_index] # could be either a real goal or a goal-directed problem name
+				#first_agent_goal = np.random.choice(gc_goal_set)
+				first_trace_percentage = random.choice([0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1])
+				first_observation = []
+				first_agent_kwargs = {
+					"action_selection_method": observation_creation_method,
+					"percentage": first_trace_percentage,
+					"is_consecutive": first_is_consecutive,
+					"save_fig": False
+				}
+				while first_observation == []:
+					# needs to be different than agents[0] problem_name, it should be from the gc_goal_set.
+					# but the problem is with the panda because it
+					if env_prop.use_goal_directed_problem(): first_agent_kwargs["goal_directed_problem"] = first_agent_goal
+					else: first_agent_kwargs["goal_directed_goal"] = first_agent_goal
+					first_observation = agents[0].agent.generate_partial_observation(**first_agent_kwargs)
+				first_observation = agents[0].agent.simplify_observation(first_observation)
+				second_is_consecutive = np.random.choice([True, False], 1, p=[0.5, 0.5])[0]
+				second_agent_goal = first_agent_goal
+				second_random_index = first_random_index
+				if not is_same_goal:
+					second_random_index = np.random.choice([i for i in range(len(gc_goal_set)) if i != first_random_index])
+					assert first_random_index != second_random_index
+				second_agent_goal = gc_goal_set[second_random_index]
+				if not is_same_goal: check_diff_goals(first_agent_goal, second_agent_goal)
+				second_trace_percentage = first_trace_percentage
+				second_observation = []
+				second_agent_kwargs = {
+					"action_selection_method": observation_creation_method,
+					"percentage": second_trace_percentage,
+					"is_consecutive": second_is_consecutive,
+					"save_fig": False
+				}
+				while second_observation == []:
+					if env_prop.use_goal_directed_problem() == True: second_agent_kwargs["goal_directed_problem"] = second_agent_goal
+					else: second_agent_kwargs["goal_directed_goal"] = second_agent_goal
+					second_observation = agents[0].agent.generate_partial_observation(**second_agent_kwargs)
+				second_observation = agents[0].agent.simplify_observation(second_observation)
+			else:
+				is_same_goal = (np.random.choice([1, 0], 1, p=[1/max(len(agents), 6), 1 - 1/max(len(agents), 6)]))[0]
+				first_is_consecutive = np.random.choice([True, False], 1, p=[0.5, 0.5])[0]
+				first_agent = np.random.choice(agents)
+				first_trace_percentage = random.choice([0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1])
+				first_observation = first_agent.agent.generate_partial_observation(action_selection_method=observation_creation_method, percentage=first_trace_percentage, is_consecutive=first_is_consecutive, save_fig=False, random_optimalism=True)
+				first_observation = first_agent.agent.simplify_observation(first_observation)
+				second_agent = first_agent
+				if not is_same_goal:
+					second_agent = np.random.choice([agent for agent in agents if agent != first_agent])
+					assert second_agent != first_agent
+				second_is_consecutive = np.random.choice([True, False], 1, p=[0.5, 0.5])[0]
+				second_trace_percentage = first_trace_percentage
+				second_observation = second_agent.agent.generate_partial_observation(action_selection_method=observation_creation_method, percentage=second_trace_percentage, is_consecutive=second_is_consecutive, save_fig=False, random_optimalism=True)
+				second_observation = second_agent.agent.simplify_observation(second_observation)
+				if is_same_goal:
+					observations_distance = measure_average_sequence_distance(first_observation, second_observation) # for debugging mate
+			all_samples.append((
+				[torch.tensor(observation, dtype=torch.float32) for observation in first_observation],
+				[torch.tensor(observation, dtype=torch.float32) for observation in second_observation],
+				torch.tensor(is_same_goal, dtype=torch.float32)))
+			# all_samples.append((first_observation, second_observation, torch.tensor(is_same_goal, dtype=torch.float32)))
+			if i % 1000 == 0:
+				print(f'generated {i} samples')
+		total_samples = len(all_samples)
+		train_size = int(0.8 * total_samples)
+		train_samples = all_samples[:train_size]
+		dev_samples = all_samples[train_size:]
+		with open(dataset_train_path, 'wb') as train_file:
+			dill.dump(train_samples, train_file)
+		with open(dataset_dev_path, 'wb') as dev_file:
+			dill.dump(dev_samples, dev_file)
+	return train_samples, dev_samples