PyPI - gr-libs - Versions diffs - 0.1.8__py3-none-any.whl → 0.2.5__py3-none-any.whl - Mend

gr-libs 0.1.8py3-none-any.whl → 0.2.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

gr_libs/__init__.py +3 -1
gr_libs/_version.py +2 -2
gr_libs/all_experiments.py +260 -0
gr_libs/environment/__init__.py +14 -1
gr_libs/environment/_utils/__init__.py +0 -0
gr_libs/environment/{utils → _utils}/utils.py +1 -1
gr_libs/environment/environment.py +278 -23
gr_libs/evaluation/__init__.py +1 -0
gr_libs/evaluation/generate_experiments_results.py +100 -0
gr_libs/metrics/__init__.py +2 -0
gr_libs/metrics/metrics.py +166 -31
gr_libs/ml/__init__.py +1 -6
gr_libs/ml/base/__init__.py +3 -1
gr_libs/ml/base/rl_agent.py +68 -3
gr_libs/ml/neural/__init__.py +1 -3
gr_libs/ml/neural/deep_rl_learner.py +241 -84
gr_libs/ml/neural/utils/__init__.py +1 -2
gr_libs/ml/planner/mcts/{utils → _utils}/tree.py +1 -1
gr_libs/ml/planner/mcts/mcts_model.py +71 -34
gr_libs/ml/sequential/__init__.py +0 -1
gr_libs/ml/sequential/{lstm_model.py → _lstm_model.py} +11 -14
gr_libs/ml/tabular/__init__.py +1 -3
gr_libs/ml/tabular/tabular_q_learner.py +27 -9
gr_libs/ml/tabular/tabular_rl_agent.py +22 -9
gr_libs/ml/utils/__init__.py +2 -9
gr_libs/ml/utils/format.py +13 -90
gr_libs/ml/utils/math.py +3 -2
gr_libs/ml/utils/other.py +2 -2
gr_libs/ml/utils/storage.py +41 -94
gr_libs/odgr_executor.py +263 -0
gr_libs/problems/consts.py +570 -292
gr_libs/recognizer/{utils → _utils}/format.py +2 -2
gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +127 -36
gr_libs/recognizer/graml/{gr_dataset.py → _gr_dataset.py} +11 -11
gr_libs/recognizer/graml/graml_recognizer.py +186 -35
gr_libs/recognizer/recognizer.py +59 -10
gr_libs/tutorials/draco_panda_tutorial.py +58 -0
gr_libs/tutorials/draco_parking_tutorial.py +56 -0
{tutorials → gr_libs/tutorials}/gcdraco_panda_tutorial.py +11 -11
{tutorials → gr_libs/tutorials}/gcdraco_parking_tutorial.py +6 -8
{tutorials → gr_libs/tutorials}/graml_minigrid_tutorial.py +18 -14
{tutorials → gr_libs/tutorials}/graml_panda_tutorial.py +11 -12
{tutorials → gr_libs/tutorials}/graml_parking_tutorial.py +8 -10
{tutorials → gr_libs/tutorials}/graml_point_maze_tutorial.py +17 -3
{tutorials → gr_libs/tutorials}/graql_minigrid_tutorial.py +2 -2
{gr_libs-0.1.8.dist-info → gr_libs-0.2.5.dist-info}/METADATA +95 -29
gr_libs-0.2.5.dist-info/RECORD +72 -0
{gr_libs-0.1.8.dist-info → gr_libs-0.2.5.dist-info}/WHEEL +1 -1
gr_libs-0.2.5.dist-info/top_level.txt +2 -0
tests/test_draco.py +14 -0
tests/test_gcdraco.py +2 -2
tests/test_graml.py +4 -4
tests/test_graql.py +1 -1
tests/test_odgr_executor_expertbasedgraml.py +14 -0
tests/test_odgr_executor_gcdraco.py +14 -0
tests/test_odgr_executor_gcgraml.py +14 -0
tests/test_odgr_executor_graql.py +14 -0
evaluation/analyze_results_cross_alg_cross_domain.py +0 -267
evaluation/create_minigrid_map_image.py +0 -38
evaluation/file_system.py +0 -53
evaluation/generate_experiments_results.py +0 -141
evaluation/generate_experiments_results_new_ver1.py +0 -238
evaluation/generate_experiments_results_new_ver2.py +0 -331
evaluation/generate_task_specific_statistics_plots.py +0 -500
evaluation/get_plans_images.py +0 -62
evaluation/increasing_and_decreasing_.py +0 -104
gr_libs/ml/neural/utils/penv.py +0 -60
gr_libs-0.1.8.dist-info/RECORD +0 -70
gr_libs-0.1.8.dist-info/top_level.txt +0 -4
/gr_libs/{environment/utils/__init__.py → _evaluation/_generate_experiments_results.py} +0 -0
/gr_libs/ml/planner/mcts/{utils → _utils}/__init__.py +0 -0
/gr_libs/ml/planner/mcts/{utils → _utils}/node.py +0 -0
/gr_libs/recognizer/{utils → _utils}/__init__.py +0 -0

gr_libs/metrics/__init__.py CHANGED Viewed

@@ -1,3 +1,5 @@
+""" metrics for GR algorithms """
 from .metrics import (
     mean_p_value,
     mean_wasserstein_distance,

gr_libs/metrics/metrics.py CHANGED Viewed

@@ -1,23 +1,24 @@
+""" metrics for GR algorithms, to perform distance, similarity, likelihood and other measurements and metrics. """
 import math
+from collections.abc import Callable, Generator
+from math import log2
+from typing import Any
 import dill
 import gymnasium
 import numpy as np
-from typing import Callable, Generator, List, Dict, Tuple, Any
-from math import log2
-from scipy.stats import wasserstein_distance
 from gymnasium.spaces.discrete import Discrete
-# import torch
-# from torch.distributions.categorical import Categorical
+from scipy.stats import wasserstein_distance
 from ..ml.base import State
 from ..ml.base.rl_agent import RLAgent
 from ..ml.neural.deep_rl_learner import DeepRLAgent
-def kl_divergence(p1: List[float], p2: List[float]) -> float:
-    """Computes Kullback–Leibler divergence from two probabilities distributions p1 and p2.
+def kl_divergence(p1: list[float], p2: list[float]) -> float:
+    """
+    Computes Kullback–Leibler divergence from two probabilities distributions p1 and p2.
     We follow the formula in Wikipedia https://en.wikipedia.org/wiki/Kullback–Leibler_divergence
     Args:
@@ -32,8 +33,19 @@ def kl_divergence(p1: List[float], p2: List[float]) -> float:
 def kl_divergence_norm_softmax(
-    observations: List[Tuple[State, Any]], agent, actions: Discrete
+    observations: list[tuple[State, Any]], agent, actions: Discrete
 ):
+    """
+    Calculates the Kullback-Leibler (KL) divergence between two probability distributions.
+    Args:
+        observations (list[tuple[State, Any]]): List of observations and corresponding actions.
+        agent: The agent object.
+        actions: The discrete actions.
+    Returns:
+        float: The mean KL divergence between the two distributions.
+    """
     distances = []
     p_traj = traj_to_policy(observations=observations, actions=actions)
@@ -42,7 +54,7 @@ def kl_divergence_norm_softmax(
         state_pickled = dill.dumps(state)
         qp1 = p_traj[state_pickled]
-        qp2_flatten_distribution_list: List[float] = agent.get_actions_probabilities(
+        qp2_flatten_distribution_list: list[float] = agent.get_actions_probabilities(
             observation=(observation, agent_pos)
         )
         distances.append(kl_divergence(qp1, qp2_flatten_distribution_list))
@@ -62,6 +74,17 @@ def amplify(values, alpha=1.0):
 def stochastic_amplified_selection(actions_probs, alpha=8.0):
+    """
+    Selects an action based on the given action probabilities, with amplification using the specified alpha value.
+    Parameters:
+        actions_probs (list): A list of action probabilities.
+        alpha (float): Amplification factor (default: 8.0).
+    Returns:
+        int: The selected action.
+    """
     action_probs_amplified = amplify(actions_probs, alpha)
     choice = np.random.choice(len(action_probs_amplified), p=action_probs_amplified)
     if choice == 3:
@@ -69,11 +92,32 @@ def stochastic_amplified_selection(actions_probs, alpha=8.0):
     return choice
+import numpy as np
 def stochastic_selection(actions_probs):
+    """
+    Selects an action based on the given probabilities using a stochastic selection method.
+    Parameters:
+        actions_probs (list): A list of probabilities for each action.
+    Returns:
+        int: The index of the selected action.
+    """
     return np.random.choice(len(actions_probs), p=actions_probs)
 def greedy_selection(actions_probs):
+    """
+    Selects the action with the highest probability.
+    Args:
+        actions_probs (numpy.ndarray): Array of action probabilities.
+    Returns:
+        int: Index of the selected action.
+    """
     return np.argmax(actions_probs)
@@ -104,13 +148,21 @@ def measure_average_sequence_distance(seq1, seq2):
 def traj_to_policy(
-    observations: List[Tuple[State, Any]], actions: Discrete, epsilon: float = 0.0
-) -> Dict[str, List[float]]:
-    # converts a trajectory from a planner to a policy
-    # where the taken action has 99.99999% probability
+    observations: list[tuple[State, Any]], actions: Discrete, epsilon: float = 0.0
+) -> dict[str, list[float]]:
+    """
+    Converts a trajectory from a planner to a policy.
+    Args:
+        observations (list[tuple[State, Any]]): List of tuples containing the observation and the corresponding action.
+        actions (Discrete): Discrete action space.
+        epsilon (float, optional): Exploration parameter. Defaults to 0.0.
+    Returns:
+        dict[str, list[float]]: Dictionary mapping serialized states to action probabilities.
+    """
     trajectory_as_policy = {}
-    for (observation, agent_pos), action in observations:
-        # in the discrete world the action is the index
+    for (observation, _agent_pos), action in observations:
         action_index = action
         actions_len = actions.n
@@ -123,21 +175,48 @@ def traj_to_policy(
     return trajectory_as_policy
+from collections.abc import Generator
+from typing import Any
 def pass_observation_patcher(
-    observations: List[Any], agent: RLAgent
+    observations: list[Any], agent: RLAgent
 ) -> Generator[None, None, None]:
-    for observation in observations:
-        yield observation
+    """
+    Generator function that yields observations.
+    Args:
+        observations (list): List of observations.
+        agent (RLAgent): RL agent object.
+    Yields:
+        None: Yields each observation from the list.
+    """
+    yield from observations
 def mean_wasserstein_distance(
-    observations: List[Tuple[State, Any]],
+    observations: list[tuple[State, Any]],
     agent: DeepRLAgent,
     actions: gymnasium.spaces.Box,
     observation_patcher: Callable[
-        [List[Any], RLAgent], Generator[None, None, None]
+        [list[Any], RLAgent], Generator[None, None, None]
     ] = pass_observation_patcher,
 ):
+    """
+    Calculates the mean Wasserstein distance between observed actions and actor means.
+    Args:
+        observations (list[tuple[State, Any]]): List of observations and corresponding actions.
+        agent (DeepRLAgent): The deep reinforcement learning agent.
+        actions (gymnasium.spaces.Box): The action space.
+        observation_patcher (Callable[[list[Any], RLAgent], Generator[None, None, None]], optional):
+            A function that patches the observations. Defaults to pass_observation_patcher.
+    Returns:
+        float: The mean Wasserstein distance between observed actions and actor means.
+    """
     distances = []
     for observation, observed_action in observation_patcher(observations, agent):
@@ -163,10 +242,21 @@ def mean_wasserstein_distance(
 def mean_action_distance_continuous(
-    observations: List[Tuple[State, Any]],
+    observations: list[tuple[State, Any]],
     agent: DeepRLAgent,
     actions: gymnasium.spaces.Box,
 ):
+    """
+    Calculates the mean distance between the predicted actions and the actual actions for a continuous action space.
+    Args:
+        observations (list[tuple[State, Any]]): A list of tuples containing the observations and corresponding actions.
+        agent (DeepRLAgent): The deep reinforcement learning agent used to predict actions.
+        actions (gymnasium.spaces.Box): The action space.
+    Returns:
+        float: The mean distance between the predicted actions and the actual actions.
+    """
     distances = []
     for observation, action in observations:
         action2, _ = agent.model.predict(
@@ -182,18 +272,29 @@ def mean_action_distance_continuous(
         ), f"Actions should be on the same length:{action},{action2}"
         total_diff = 0
-        # total_diff = []
         for action1, action2 in zip(action_arr, action2_arr):
             total_diff += math.fabs(action1 - action2)
-        # distances.append(statistics.mean(total_diff))
         distances.append(total_diff)
-    # print(f"distances:{distances}")
     return np.mean(distances)
+from collections.abc import Generator
+from typing import Any
 def set_agent_goal_observation(
-    observations: List[Any], agent: RLAgent
+    observations: list[Any], agent: RLAgent
 ) -> Generator[None, None, None]:
+    """
+    Sets the desired goal in each observation to the agent's goal.
+    Args:
+        observations (list): List of observations.
+        agent (RLAgent): The RL agent.
+    Yields:
+        tuple: A tuple containing the modified observation and the corresponding action.
+    """
     copy_observation = observations.copy()
     for observation, action in copy_observation:
         observation["desired_goal"] = agent.goal
@@ -205,13 +306,29 @@ def z_score(x, mean_action: float, std_dev: float):
 def mean_p_value(
-    observations: List[Tuple[State, Any]],
+    observations: list[tuple[State, Any]],
     agent: DeepRLAgent,
     actions: gymnasium.spaces.Box,
     observation_patcher: Callable[
-        [List[Any], RLAgent], Generator[None, None, None]
+        [list[Any], RLAgent], Generator[None, None, None]
     ] = pass_observation_patcher,
 ):
+    """
+    Calculate the mean p-value for a given set of observations.
+    Args:
+        observations (list[tuple[State, Any]]): List of observations and corresponding actions.
+        agent (DeepRLAgent): The deep reinforcement learning agent.
+        actions (gymnasium.spaces.Box): The action space.
+        observation_patcher (Callable[[list[Any], RLAgent], Generator[None, None, None]], optional):
+            A function that patches the observations. Defaults to pass_observation_patcher.
+    Returns:
+        float: The mean p-value.
+    Raises:
+        Exception: If the lengths of observed actions, actor mean, and std-dev are not equal.
+    """
     distances = []
     for observation, observed_action in observation_patcher(observations, agent):
         # execute prediction X times and add to list (observed_action * X) |X| Len
@@ -250,12 +367,30 @@ def mean_p_value(
     return np.mean(distances)
-def normalize(values: List[float]) -> List[float]:
+def normalize(values: list[float]) -> list[float]:
+    """
+    Normalize a list of values by dividing each value by the sum of all values.
+    Args:
+        values (list[float]): The list of values to be normalized.
+    Returns:
+        list[float]: The normalized list of values.
+    """
     values /= sum(values)
     return values
-def max(values: List[float]) -> List[float]:
+def maximum(values: list[float]) -> list[float]:
+    """
+    Returns a list with the same length as the input list, where the maximum value is set to 1.0 and all other values are set to 0.0.
+    Args:
+        values (list[float]): The input list of values.
+    Returns:
+        list[float]: A list with the same length as the input list, where the maximum value is set to 1.0 and all other values are set to 0.0.
+    """
     if not len(values):
         return values
     vals = np.array(values)

gr_libs/ml/__init__.py CHANGED Viewed

@@ -1,8 +1,3 @@
-from ..ml.utils import device, seed, synthesize
+# from ml.neural import ACModel, RecurrentACModel
 # from ml.neural import PPOAlgo
-from ..ml.tabular import TabularQLearner
-# from ml.neural import ACModel, RecurrentACModel
-from ..ml.neural import DictList
-from ..ml.agent import Agent

gr_libs/ml/base/__init__.py CHANGED Viewed

@@ -1 +1,3 @@
-from gr_libs.ml.base.rl_agent import RLAgent, State, ContextualAgent
+""" base ML classes for other modules to extend. """
+from gr_libs.ml.base.rl_agent import ContextualAgent, RLAgent, State

gr_libs/ml/base/rl_agent.py CHANGED Viewed

@@ -1,12 +1,33 @@
-from typing import Any
 from abc import ABC, abstractmethod
-import numpy as np
+from typing import Any
 State = Any
 class ContextualAgent:
+    """
+    A class representing a contextual agent for reinforcement learning, including gym properties.
+    Args:
+        problem_name (str): The name of the problem the agent is designed to solve.
+        problem_goal (str): The goal of the problem the agent is designed to achieve.
+        agent: The underlying agent implementation.
+    Attributes:
+        problem_name (str): The name of the problem the agent is designed to solve.
+        problem_goal (str): The goal of the problem the agent is designed to achieve.
+        agent: The underlying agent implementation.
+    """
     def __init__(self, problem_name, problem_goal, agent):
+        """
+        Initializes a reinforcement learning agent.
+        Args:
+            problem_name (str): The name of the problem.
+            problem_goal (str): The goal of the problem.
+            agent: The agent object.
+        """
         self.problem_name = problem_name
         self.problem_goal = problem_goal
         self.agent = agent
@@ -23,6 +44,18 @@ class RLAgent(ABC):
         problem_name: str,
         domain_name: str,
     ):
+        """
+        Initializes a reinforcement learning agent.
+        Args:
+            episodes (int): The number of episodes to train the agent.
+            decaying_eps (bool): Whether to use decaying epsilon-greedy exploration.
+            epsilon (float): The exploration rate.
+            learning_rate (float): The learning rate.
+            gamma (float): The discount factor.
+            problem_name (str): The name of the problem.
+            domain_name (str): The name of the domain.
+        """
         self.episodes = episodes
         self.decaying_eps = decaying_eps
         self.epsilon = epsilon
@@ -35,18 +68,50 @@ class RLAgent(ABC):
     @abstractmethod
     def learn(self):
-        pass
+        """
+        Abstract method for the agent to learn from the environment.
+        """
     def class_name(self):
+        """
+        Returns the name of the agent's class.
+        Returns:
+            str: The name of the agent's class.
+        """
         return self.__class__.__name__
     def get_actions_probabilities(self, observation):
+        """
+        Get the probabilities of available actions given an observation.
+        Args:
+            observation: The observation from the environment.
+        Raises:
+            Exception: This function is unimplemented.
+        Returns:
+            Any: The probabilities of available actions.
+        """
         raise Exception("function get_actions_probabilities is unimplemented")
     def get_number_of_unique_states(self):
+        """
+        Get the number of unique states encountered by the agent.
+        Returns:
+            int: The number of unique states encountered.
+        """
         return len(self.states_counter)
     def update_states_counter(self, observation_str: str):
+        """
+        Update the counter for the number of times each observation state is encountered.
+        Args:
+            observation_str (str): The string representation of the observation state.
+        """
         if observation_str in self.states_counter:
             self.states_counter[observation_str] = (
                 self.states_counter[observation_str] + 1

gr_libs/ml/neural/__init__.py CHANGED Viewed

@@ -1,3 +1 @@
-# from ml.neural.model import AbstractACModel, RecurrentACModel, ACModel
-# from ml.neural.algorithms import BaseAlgo, A2CAlgo, PPOAlgo
-from gr_libs.ml.neural.utils import DictList
+""" Algorithms that involve using neural networks. """

gr-libs 0.1.8__py3-none-any.whl → 0.2.5__py3-none-any.whl

gr-libs 0.1.8py3-none-any.whl → 0.2.5py3-none-any.whl