PyPI - gr-libs - Versions diffs - 0.1.8__py3-none-any.whl → 0.2.5__py3-none-any.whl - Mend

gr-libs 0.1.8py3-none-any.whl → 0.2.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

gr_libs/__init__.py +3 -1
gr_libs/_version.py +2 -2
gr_libs/all_experiments.py +260 -0
gr_libs/environment/__init__.py +14 -1
gr_libs/environment/_utils/__init__.py +0 -0
gr_libs/environment/{utils → _utils}/utils.py +1 -1
gr_libs/environment/environment.py +278 -23
gr_libs/evaluation/__init__.py +1 -0
gr_libs/evaluation/generate_experiments_results.py +100 -0
gr_libs/metrics/__init__.py +2 -0
gr_libs/metrics/metrics.py +166 -31
gr_libs/ml/__init__.py +1 -6
gr_libs/ml/base/__init__.py +3 -1
gr_libs/ml/base/rl_agent.py +68 -3
gr_libs/ml/neural/__init__.py +1 -3
gr_libs/ml/neural/deep_rl_learner.py +241 -84
gr_libs/ml/neural/utils/__init__.py +1 -2
gr_libs/ml/planner/mcts/{utils → _utils}/tree.py +1 -1
gr_libs/ml/planner/mcts/mcts_model.py +71 -34
gr_libs/ml/sequential/__init__.py +0 -1
gr_libs/ml/sequential/{lstm_model.py → _lstm_model.py} +11 -14
gr_libs/ml/tabular/__init__.py +1 -3
gr_libs/ml/tabular/tabular_q_learner.py +27 -9
gr_libs/ml/tabular/tabular_rl_agent.py +22 -9
gr_libs/ml/utils/__init__.py +2 -9
gr_libs/ml/utils/format.py +13 -90
gr_libs/ml/utils/math.py +3 -2
gr_libs/ml/utils/other.py +2 -2
gr_libs/ml/utils/storage.py +41 -94
gr_libs/odgr_executor.py +263 -0
gr_libs/problems/consts.py +570 -292
gr_libs/recognizer/{utils → _utils}/format.py +2 -2
gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +127 -36
gr_libs/recognizer/graml/{gr_dataset.py → _gr_dataset.py} +11 -11
gr_libs/recognizer/graml/graml_recognizer.py +186 -35
gr_libs/recognizer/recognizer.py +59 -10
gr_libs/tutorials/draco_panda_tutorial.py +58 -0
gr_libs/tutorials/draco_parking_tutorial.py +56 -0
{tutorials → gr_libs/tutorials}/gcdraco_panda_tutorial.py +11 -11
{tutorials → gr_libs/tutorials}/gcdraco_parking_tutorial.py +6 -8
{tutorials → gr_libs/tutorials}/graml_minigrid_tutorial.py +18 -14
{tutorials → gr_libs/tutorials}/graml_panda_tutorial.py +11 -12
{tutorials → gr_libs/tutorials}/graml_parking_tutorial.py +8 -10
{tutorials → gr_libs/tutorials}/graml_point_maze_tutorial.py +17 -3
{tutorials → gr_libs/tutorials}/graql_minigrid_tutorial.py +2 -2
{gr_libs-0.1.8.dist-info → gr_libs-0.2.5.dist-info}/METADATA +95 -29
gr_libs-0.2.5.dist-info/RECORD +72 -0
{gr_libs-0.1.8.dist-info → gr_libs-0.2.5.dist-info}/WHEEL +1 -1
gr_libs-0.2.5.dist-info/top_level.txt +2 -0
tests/test_draco.py +14 -0
tests/test_gcdraco.py +2 -2
tests/test_graml.py +4 -4
tests/test_graql.py +1 -1
tests/test_odgr_executor_expertbasedgraml.py +14 -0
tests/test_odgr_executor_gcdraco.py +14 -0
tests/test_odgr_executor_gcgraml.py +14 -0
tests/test_odgr_executor_graql.py +14 -0
evaluation/analyze_results_cross_alg_cross_domain.py +0 -267
evaluation/create_minigrid_map_image.py +0 -38
evaluation/file_system.py +0 -53
evaluation/generate_experiments_results.py +0 -141
evaluation/generate_experiments_results_new_ver1.py +0 -238
evaluation/generate_experiments_results_new_ver2.py +0 -331
evaluation/generate_task_specific_statistics_plots.py +0 -500
evaluation/get_plans_images.py +0 -62
evaluation/increasing_and_decreasing_.py +0 -104
gr_libs/ml/neural/utils/penv.py +0 -60
gr_libs-0.1.8.dist-info/RECORD +0 -70
gr_libs-0.1.8.dist-info/top_level.txt +0 -4
/gr_libs/{environment/utils/__init__.py → _evaluation/_generate_experiments_results.py} +0 -0
/gr_libs/ml/planner/mcts/{utils → _utils}/__init__.py +0 -0
/gr_libs/ml/planner/mcts/{utils → _utils}/node.py +0 -0
/gr_libs/recognizer/{utils → _utils}/__init__.py +0 -0

gr_libs/recognizer/{utils → _utils}/format.py RENAMED Viewed

@@ -1,9 +1,9 @@
+from gr_libs.recognizer.gr_as_rl.gr_as_rl_recognizer import Draco, GCDraco, Graql
 from gr_libs.recognizer.graml.graml_recognizer import (
-    GCGraml,
     ExpertBasedGraml,
+    GCGraml,
     MCTSBasedGraml,
 )
-from gr_libs.recognizer.gr_as_rl.gr_as_rl_recognizer import Graql, Draco, GCDraco
 def recognizer_str_to_obj(recognizer_str: str):

gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py CHANGED Viewed

@@ -1,14 +1,9 @@
-from abc import abstractmethod
 import os
 import dill
-from typing import List, Type, Callable
 import numpy as np
-from gr_libs.environment.environment import EnvProperty, GCEnvProperty
-from gr_libs.environment.utils.utils import domain_to_env_property
-from gr_libs.metrics.metrics import (
-    kl_divergence_norm_softmax,
-    mean_wasserstein_distance,
-)
+from gr_libs.metrics.metrics import kl_divergence_norm_softmax
 from gr_libs.ml.base import RLAgent
 from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
 from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
@@ -22,11 +17,40 @@ from gr_libs.recognizer.recognizer import (
 class GRAsRL(Recognizer):
+    """
+    GRAsRL class represents a goal recognition framework that using reinforcement learning.
+    It inherits from the Recognizer class and implements the goal recognition process, including the
+    Goal adaptation and the inference phase. It trains agents for each new goal, which makes it impractical
+    for realtime environments where goals mmight change.
+    Attributes:
+        agents (dict): A dictionary that maps problem names to RLAgent instances.
+        active_goals (List[str]): A list of active goals.
+        active_problems (List[str]): A list of active problem names.
+        action_space (gym.Space): The action space of the RLAgent.
+    Methods:
+        goals_adaptation_phase: Performs the goals adaptation phase.
+        prepare_inf_sequence: Prepares the inference sequence for goal-directed problems.
+        inference_phase: Performs the inference phase and returns the recognized goal.
+        choose_agent: Returns the RLAgent for a given problem name.
+    """
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.agents = {}  # consider changing to ContextualAgent
-    def goals_adaptation_phase(self, dynamic_goals: List[str], dynamic_train_configs):
+    def goals_adaptation_phase(self, dynamic_goals: list[str], dynamic_train_configs):
+        """
+        Performs the goals adaptation phase.
+        Args:
+            dynamic_goals (List[str]): A list of dynamic goals.
+            dynamic_train_configs: The dynamic training configurations.
+        Returns:
+            None
+        """
         super().goals_adaptation_phase(dynamic_goals, dynamic_train_configs)
         dynamic_goals_problems = [
             self.env_prop.goal_to_problem_str(goal) for goal in dynamic_goals
@@ -48,18 +72,46 @@ class GRAsRL(Recognizer):
             self.agents[problem_name] = agent
         self.action_space = next(iter(self.agents.values())).env.action_space
+    def prepare_inf_sequence(self, problem_name: str, inf_sequence):
+        """
+        Prepares the inference sequence for goal-directed problems.
+        Args:
+            problem_name (str): The name of the problem.
+            inf_sequence: The inference sequence.
+        Returns:
+            The prepared inference sequence.
+        """
+        if not self.env_prop.use_goal_directed_problem():
+            for obs in inf_sequence:
+                obs[0]["desired_goal"] = np.array(
+                    [self.env_prop.str_to_goal(problem_name)],
+                    dtype=obs[0]["desired_goal"].dtype,
+                )
+            return inf_sequence
+        return inf_sequence
     def inference_phase(self, inf_sequence, true_goal, percentage) -> str:
+        """
+        Performs the inference phase and returns the recognized goal.
+        Args:
+            inf_sequence: The inference sequence.
+            true_goal: The true goal.
+            percentage: The percentage.
+        Returns:
+            The recognized goal as a string.
+        """
         scores = []
         for problem_name in self.active_problems:
             agent = self.choose_agent(problem_name)
             if self.env_prop.gc_adaptable():
-                assert (
-                    self.__class__.__name__ == "GCDraco"
-                ), "This recognizer is not compatible with goal conditioned problems."
                 inf_sequence = self.prepare_inf_sequence(problem_name, inf_sequence)
             score = self.evaluation_function(inf_sequence, agent, self.action_space)
             scores.append(score)
-        # scores = metrics.softmin(np.array(scores))
         if self.collect_statistics:
             results_path = get_gr_as_rl_experiment_confidence_path(
                 domain_name=self.env_prop.domain_name,
@@ -82,10 +134,24 @@ class GRAsRL(Recognizer):
         return str(self.active_goals[true_goal_index])
     def choose_agent(self, problem_name: str) -> RLAgent:
+        """
+        Returns the RLAgent for a given problem name.
+        Args:
+            problem_name (str): The name of the problem.
+        Returns:
+            The RLAgent instance.
+        """
         return self.agents[problem_name]
 class Graql(GRAsRL, GaAgentTrainerRecognizer):
+    """
+    Graql extends the GRAsRL framework and GaAgentTrainerRecognizer, since it trains new agents for every new goal and it adheres
+    to the goal recognition as reinforcement learning framework. It uses a tabular Q-learning agent for discrete state and action spaces.
+    """
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         assert (
@@ -93,12 +159,31 @@ class Graql(GRAsRL, GaAgentTrainerRecognizer):
             and self.env_prop.is_state_discrete()
             and self.env_prop.is_action_discrete()
         )
-        if self.rl_agent_type == None:
+        if self.rl_agent_type is None:
             self.rl_agent_type = TabularQLearner
         self.evaluation_function = kl_divergence_norm_softmax
 class Draco(GRAsRL, GaAgentTrainerRecognizer):
+    """
+    Draco class represents a recognizer agent trained using the GRAsRL framework.
+    Like Graql, it trains new agents for every new goal and adheres to the goal recognition as reinforcement learning framework.
+    It uses a deep reinforcement learning agent for continuous state and action spaces.
+    Args:
+        *args: Variable length argument list.
+        **kwargs: Arbitrary keyword arguments.
+    Attributes:
+        rl_agent_type (type): Type of the reinforcement learning agent.
+        evaluation_function (callable): Function used for evaluation.
+    """
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        # Add any additional initialization code here
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         assert (
@@ -108,15 +193,25 @@ class Draco(GRAsRL, GaAgentTrainerRecognizer):
         if self.rl_agent_type == None:
             self.rl_agent_type = DeepRLAgent
         self.evaluation_function = kwargs.get("evaluation_function")
-        assert (
-            self.evaluation_function is None
-            or type(self.evaluation_function) != Callable
-        )
+        if self.evaluation_function is None:
+            from gr_libs.metrics.metrics import mean_wasserstein_distance
+            self.evaluation_function = mean_wasserstein_distance
+        assert callable(
+            self.evaluation_function
+        ), "Evaluation function must be a callable function."
+class GCDraco(GRAsRL, LearningRecognizer, GaAdaptingRecognizer):
+    """
+    GCDraco recognizer uses goal-conditioned reinforcement learning using the Draco algorithm.
+    It inherits from GRAsRL, LearningRecognizer, and GaAdaptingRecognizer.
+    It is designed for environments with continuous state and action spaces.
+    It uses a goal-conditioned deep reinforcement learning agent for training and inference, which
+    enables it to adapt to new goals during the goal adaptation phase without requiring retraining,
+    making it suitable for dynamic environments.
+    """
-class GCDraco(
-    GRAsRL, LearningRecognizer, GaAdaptingRecognizer
-):  # TODO problem: it gets 2 goal_adaptation phase from parents, one with configs and one without.
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         assert (
@@ -127,12 +222,18 @@ class GCDraco(
         if self.rl_agent_type == None:
             self.rl_agent_type = GCDeepRLAgent
         self.evaluation_function = kwargs.get("evaluation_function")
-        assert (
-            self.evaluation_function is None
-            or type(self.evaluation_function) != Callable
-        )
+        if self.evaluation_function is None:
+            from gr_libs.metrics.metrics import mean_wasserstein_distance
+            self.evaluation_function = mean_wasserstein_distance
+        assert callable(
+            self.evaluation_function
+        ), "Evaluation function must be a callable function."
-    def domain_learning_phase(self, base_goals: List[str], train_configs):
+    def domain_learning_phase(self, problems):
+        base = problems["gc"]
+        base_goals = base["goals"]
+        train_configs = base["train_configs"]
         super().domain_learning_phase(base_goals, train_configs)
         agent_kwargs = {
             "domain_name": self.env_prop.domain_name,
@@ -155,13 +256,3 @@ class GCDraco(
     def choose_agent(self, problem_name: str) -> RLAgent:
         return next(iter(self.agents.values()))
-    def prepare_inf_sequence(self, problem_name: str, inf_sequence):
-        if not self.env_prop.use_goal_directed_problem():
-            for obs in inf_sequence:
-                obs[0]["desired_goal"] = np.array(
-                    [self.env_prop.str_to_goal(problem_name)],
-                    dtype=obs[0]["desired_goal"].dtype,
-                )
-            return inf_sequence
-        return inf_sequence

gr_libs/recognizer/graml/{gr_dataset.py → _gr_dataset.py} RENAMED Viewed

@@ -1,16 +1,16 @@
-import numpy as np
-from torch.utils.data import Dataset
+import os
 import random
 from types import MethodType
-from typing import List
+import dill
+import numpy as np
+import torch
+from torch.utils.data import Dataset
 from gr_libs.environment.environment import EnvProperty
 from gr_libs.metrics.metrics import measure_average_sequence_distance
 from gr_libs.ml.base.rl_agent import ContextualAgent
 from gr_libs.ml.utils import get_siamese_dataset_path
-from gr_libs.ml.base import RLAgent
-import os
-import dill
-import torch
 class GRDataset(Dataset):
@@ -30,19 +30,19 @@ class GRDataset(Dataset):
 def check_diff_goals(first_agent_goal, second_agent_goal):
     try:
         assert first_agent_goal != second_agent_goal
-    except Exception as e:
+    except Exception:
         try:
             assert any(first_agent_goal != second_agent_goal)
-        except Exception as e:
+        except Exception:
             for arr1, arr2 in zip(first_agent_goal, second_agent_goal):
                 assert any(elm1 != elm2 for elm1, elm2 in zip(arr1, arr2))
 def generate_datasets(
     num_samples,
-    agents: List[ContextualAgent],
+    agents: list[ContextualAgent],
     observation_creation_method: MethodType,
-    problems: List[str],
+    problems: list[str],
     env_prop: EnvProperty,
     recognizer_name: str,
     gc_goal_set=None,

gr_libs/recognizer/graml/graml_recognizer.py CHANGED Viewed

@@ -1,39 +1,49 @@
-from abc import ABC, abstractmethod
-from collections import namedtuple
+""" Collection of recognizers that use GRAML methods: metric learning for ODGR. """
 import os
-from gr_libs.environment.environment import EnvProperty, GCEnvProperty, LSTMProperties
-from gr_libs.ml import utils
-from gr_libs.ml.base import ContextualAgent
-from typing import List, Tuple
+from abc import abstractmethod
+import dill
 import numpy as np
-from torch.utils.data import DataLoader
-from torch.nn.utils.rnn import pad_sequence
 import torch
+from torch.nn.utils.rnn import pad_sequence
+from torch.utils.data import DataLoader
+from gr_libs.environment.environment import EnvProperty
+from gr_libs.metrics import metrics
+from gr_libs.ml import utils
+from gr_libs.ml.base import ContextualAgent
 from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
 from gr_libs.ml.planner.mcts import mcts_model
-import dill
+from gr_libs.ml.sequential._lstm_model import LstmObservations, train_metric_model
 from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
-from gr_libs.recognizer.graml.gr_dataset import GRDataset, generate_datasets
-from gr_libs.ml.sequential.lstm_model import LstmObservations, train_metric_model
 from gr_libs.ml.utils.format import random_subset_with_order
 from gr_libs.ml.utils.storage import (
     get_and_create,
-    get_lstm_model_dir,
     get_embeddings_result_path,
+    get_lstm_model_dir,
     get_policy_sequences_result_path,
 )
-from gr_libs.metrics import metrics
+from gr_libs.recognizer.graml._gr_dataset import GRDataset, generate_datasets
 from gr_libs.recognizer.recognizer import (
     GaAdaptingRecognizer,
     GaAgentTrainerRecognizer,
     LearningRecognizer,
-    Recognizer,
-)  # import first, very dependent
+)
 ### TODO IMPLEMENT MORE SELECTION METHODS, MAKE SURE action_probs IS AS IT SEEMS: list of action-probability 'es ###
 def collate_fn(batch):
+    """
+    Collates a batch of data for training or evaluation.
+    Args:
+        batch (list): A list of tuples, where each tuple contains the first traces, second traces, and the label indicating whether the goals are the same.
+    Returns:
+        tuple: A tuple containing the padded first traces, padded second traces, labels, lengths of first traces, and lengths of second traces.
+    """
     first_traces, second_traces, is_same_goals = zip(*batch)
     # torch.stack takes tensor tuples (fixed size) and stacks them up in a matrix
     first_traces_padded = pad_sequence(
@@ -68,17 +78,52 @@ def save_weights(model: LstmObservations, path):
 class Graml(LearningRecognizer):
+    """
+    The Graml class is a subclass of LearningRecognizer and represents a recognizer that uses the Graml algorithm for goal recognition.
+    Graml learns a metric over observation sequences, over time: using a GC or a collection of agents, it creates a dataset and learns
+    the metric on it during the domain learning phase. During the goals adaptation phase, it creates or receives a library of sequences for each goal,
+    and maintains embeddings of them for the inference phase. The inference phase uses the learned metric to find the closest goal to a given sequence.
+    Attributes:
+        agents (list[ContextualAgent]): A list of contextual agents associated with the recognizer.
+        train_func: The function used for training the metric model.
+        collate_func: The function used for collating data in the training process.
+    Methods:
+        train_agents_on_base_goals(base_goals: list[str], train_configs: list): Trains the agents on the given base goals and train configurations.
+        domain_learning_phase(base_goals: list[str], train_configs: list): Performs the domain learning phase of the Graml algorithm.
+        goals_adaptation_phase(dynamic_goals: list[EnvProperty], save_fig=False): Performs the goals adaptation phase of the Graml algorithm.
+        get_goal_plan(goal): Retrieves the plan associated with the given goal.
+        dump_plans(true_sequence, true_goal, percentage): Dumps the plans to a file.
+        create_embeddings_dict(): Creates the embeddings dictionary for the plans.
+        inference_phase(inf_sequence, true_goal, percentage) -> str: Performs the inference phase of the Graml algorithm and returns the closest goal.
+        generate_sequences_library(goal: str, save_fig=False) -> list[list[tuple[np.ndarray, np.ndarray]]]: Generates the sequences library for the given goal.
+        update_sequences_library_inference_phase(inf_sequence) -> list[list[tuple[np.ndarray, np.ndarray]]]: Updates the sequences library during the inference phase.
+    """
     def __init__(self, *args, **kwargs):
+        """
+        Initialize the GramlRecognizer object.
+        Args:
+            *args: Variable length argument list.
+            **kwargs: Arbitrary keyword arguments.
+        Attributes:
+            agents (list[ContextualAgent]): List of contextual agents.
+            train_func: Training function for the metric model.
+            collate_func: Collate function for data batching.
+        """
         super().__init__(*args, **kwargs)
-        self.agents: List[ContextualAgent] = []
+        self.agents: list[ContextualAgent] = []
         self.train_func = train_metric_model
         self.collate_func = collate_fn
     @abstractmethod
-    def train_agents_on_base_goals(self, base_goals: List[str], train_configs: List):
+    def train_agents_on_base_goals(self, base_goals: list[str], train_configs: list):
         pass
-    def domain_learning_phase(self, base_goals: List[str], train_configs: List):
+    def domain_learning_phase(self, base_goals: list[str], train_configs: list):
         super().domain_learning_phase(base_goals, train_configs)
         self.train_agents_on_base_goals(base_goals, train_configs)
         # train the network so it will find a metric for the observations of the base agents such that traces of agents to different goals are far from one another
@@ -130,7 +175,7 @@ class Graml(LearningRecognizer):
             )
             save_weights(model=self.model, path=self.model_file_path)
-    def goals_adaptation_phase(self, dynamic_goals: List[EnvProperty], save_fig=False):
+    def goals_adaptation_phase(self, dynamic_goals: list[EnvProperty], save_fig=False):
         self.is_first_inf_since_new_goals = True
         self.current_goals = dynamic_goals
         # start by training each rl agent on the base goal set
@@ -245,13 +290,13 @@ class Graml(LearningRecognizer):
     @abstractmethod
     def generate_sequences_library(
         self, goal: str, save_fig=False
-    ) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
+    ) -> list[list[tuple[np.ndarray, np.ndarray]]]:
         pass
     # this function duplicates every sequence and creates a consecutive and non-consecutive version of it
     def update_sequences_library_inference_phase(
         self, inf_sequence
-    ) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
+    ) -> list[list[tuple[np.ndarray, np.ndarray]]]:
         new_plans_dict = {}
         for goal, obss in self.plans_dict.items():
             new_obss = []
@@ -281,17 +326,27 @@ class Graml(LearningRecognizer):
 class BGGraml(Graml):
+    """
+    BGGraml class represents a goal-directed agent for the BGGraml algorithm.
+    It extends the Graml class and provides additional methods for training agents on base goals.
+    """
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-    def domain_learning_phase(self, base_goals: List[str], train_configs: List):
-        assert len(train_configs) == len(
-            base_goals
-        ), "There should be train configs for every goal in BGGraml."
-        return super().domain_learning_phase(base_goals, train_configs)
+    def domain_learning_phase(self, problems):
+        # Always use 'bg' for BGGraml
+        base = problems["bg"]
+        base_goals = base["goals"]
+        train_configs = base["train_configs"]
+        assert len(base_goals) == len(
+            train_configs
+        ), "base_goals and train_configs should have the same length"
+        super().domain_learning_phase(base_goals, train_configs)
     # In case we need goal-directed agent for every goal
-    def train_agents_on_base_goals(self, base_goals: List[str], train_configs: List):
+    def train_agents_on_base_goals(self, base_goals: list[str], train_configs: list):
         self.original_problems = [
             self.env_prop.goal_to_problem_str(g) for g in base_goals
         ]
@@ -316,14 +371,40 @@ class BGGraml(Graml):
 class MCTSBasedGraml(BGGraml, GaAdaptingRecognizer):
+    """
+    MCTSBasedGraml is a class that represents a recognizer based on the MCTS algorithm.
+    It inherits from BGGraml and GaAdaptingRecognizer classes.
+    Attributes:
+        rl_agent_type (type): The type of reinforcement learning agent used.
+    """
     def __init__(self, *args, **kwargs):
+        """
+        Initialize the GramlRecognizer object.
+        Args:
+            *args: Variable length argument list.
+            **kwargs: Arbitrary keyword arguments.
+        """
         super().__init__(*args, **kwargs)
         if self.rl_agent_type == None:
             self.rl_agent_type = TabularQLearner
     def generate_sequences_library(
         self, goal: str, save_fig=False
-    ) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
+    ) -> list[list[tuple[np.ndarray, np.ndarray]]]:
+        """
+        Generates a library of sequences for a given goal.
+        Args:
+            goal (str): The goal for which to generate sequences.
+            save_fig (bool, optional): Whether to save the generated figure. Defaults to False.
+        Returns:
+            list[list[tuple[np.ndarray, np.ndarray]]]: The generated sequences library.
+        """
         problem_name = self.env_prop.goal_to_problem_str(goal)
         img_path = os.path.join(
             get_policy_sequences_result_path(
@@ -342,7 +423,29 @@ class MCTSBasedGraml(BGGraml, GaAdaptingRecognizer):
 class ExpertBasedGraml(BGGraml, GaAgentTrainerRecognizer):
+    """
+    ExpertBasedGraml class represents a Graml recognizer that uses expert knowledge to generate sequences library and adapt goals.
+    Args:
+        *args: Variable length argument list.
+        **kwargs: Arbitrary keyword arguments.
+    Attributes:
+        rl_agent_type (type): The type of reinforcement learning agent used.
+        env_prop (EnvironmentProperties): The environment properties.
+        dynamic_train_configs_dict (dict): The dynamic training configurations for each problem.
+    """
     def __init__(self, *args, **kwargs):
+        """
+        Initialize the GRAML Recognizer.
+        Args:
+            *args: Variable length argument list.
+            **kwargs: Arbitrary keyword arguments.
+        """
         super().__init__(*args, **kwargs)
         if self.rl_agent_type == None:
             if self.env_prop.is_state_discrete() and self.env_prop.is_action_discrete():
@@ -352,7 +455,18 @@ class ExpertBasedGraml(BGGraml, GaAgentTrainerRecognizer):
     def generate_sequences_library(
         self, goal: str, save_fig=False
-    ) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
+    ) -> list[list[tuple[np.ndarray, np.ndarray]]]:
+        """
+        Generates a sequences library for a given goal.
+        Args:
+            goal (str): The goal for which to generate the sequences library.
+            save_fig (bool, optional): Whether to save the figure. Defaults to False.
+        Returns:
+            list[list[tuple[np.ndarray, np.ndarray]]]: The generated sequences library.
+        """
         problem_name = self.env_prop.goal_to_problem_str(goal)
         kwargs = {
             "domain_name": self.domain_name,
@@ -377,7 +491,18 @@ class ExpertBasedGraml(BGGraml, GaAgentTrainerRecognizer):
             agent_kwargs["fig_path"] = fig_path
         return [agent.generate_observation(**agent_kwargs)]
-    def goals_adaptation_phase(self, dynamic_goals: List[str], dynamic_train_configs):
+    def goals_adaptation_phase(self, dynamic_goals: list[str], dynamic_train_configs):
+        """
+        Performs the goals adaptation phase.
+        Args:
+            dynamic_goals (list[str]): The dynamic goals.
+            dynamic_train_configs: The dynamic training configurations.
+        Returns:
+            The result of the goals adaptation phase.
+        """
         self.dynamic_goals_problems = [
             self.env_prop.goal_to_problem_str(g) for g in dynamic_goals
         ]
@@ -391,6 +516,28 @@ class ExpertBasedGraml(BGGraml, GaAgentTrainerRecognizer):
 class GCGraml(Graml, GaAdaptingRecognizer):
+    """
+    GCGraml class represents a recognizer that uses the GCDeepRLAgent for domain learning and sequence generation.
+    It makes its adaptation phase quicker and require less assumptions, but the assumption of a GC agent is still needed and may result
+    in less optimal policies that generate the observations in the synthetic dataset, which could eventually lead to a less optimal metric.
+    Args:
+        Graml (class): Base class for Graml recognizers.
+        GaAdaptingRecognizer (class): Base class for GA adapting recognizers.
+    Attributes:
+        rl_agent_type (class): The type of RL agent to be used for learning and generation.
+        env_prop (object): The environment properties.
+        agents (list): List of contextual agents.
+    Methods:
+        __init__: Initializes the GCGraml recognizer.
+        domain_learning_phase: Performs the domain learning phase.
+        train_agents_on_base_goals: Trains the RL agents on the base goals.
+        generate_sequences_library: Generates sequences library for a specific goal.
+    """
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         if self.rl_agent_type == None:
@@ -401,14 +548,18 @@ class GCGraml(Graml, GaAdaptingRecognizer):
             and not self.env_prop.is_action_discrete()
         )
-    def domain_learning_phase(self, base_goals: List[str], train_configs: List):
+    def domain_learning_phase(self, problems):
+        # Always use 'gc' for GCGraml
+        base = problems["gc"]
+        base_goals = base["goals"]
+        train_configs = base["train_configs"]
         assert (
             len(train_configs) == 1
-        ), "There should be one train config for the sole gc agent in GCGraml."
-        return super().domain_learning_phase(base_goals, train_configs)
+        ), "GCGraml should only have one train config for the base goals, it uses a single agent"
+        super().domain_learning_phase(base_goals, train_configs)
     # In case we need goal-directed agent for every goal
-    def train_agents_on_base_goals(self, base_goals: List[str], train_configs: List):
+    def train_agents_on_base_goals(self, base_goals: list[str], train_configs: list):
         self.gc_goal_set = base_goals
         self.original_problems = self.env_prop.name  # needed for gr_dataset
         # start by training each rl agent on the base goal set
@@ -432,7 +583,7 @@ class GCGraml(Graml, GaAdaptingRecognizer):
     def generate_sequences_library(
         self, goal: str, save_fig=False
-    ) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
+    ) -> list[list[tuple[np.ndarray, np.ndarray]]]:
         problem_name = self.env_prop.goal_to_problem_str(goal)
         kwargs = {
             "domain_name": self.domain_name,

gr-libs 0.1.8__py3-none-any.whl → 0.2.5__py3-none-any.whl

gr-libs 0.1.8py3-none-any.whl → 0.2.5py3-none-any.whl