PyPI - gr-libs - Versions diffs - 0.1.7.post0__py3-none-any.whl → 0.1.8__py3-none-any.whl - Mend

gr-libs 0.1.7.post0py3-none-any.whl → 0.1.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

evaluation/analyze_results_cross_alg_cross_domain.py +236 -246
evaluation/create_minigrid_map_image.py +10 -6
evaluation/file_system.py +16 -5
evaluation/generate_experiments_results.py +123 -74
evaluation/generate_experiments_results_new_ver1.py +227 -243
evaluation/generate_experiments_results_new_ver2.py +317 -317
evaluation/generate_task_specific_statistics_plots.py +481 -253
evaluation/get_plans_images.py +41 -26
evaluation/increasing_and_decreasing_.py +97 -56
gr_libs/__init__.py +2 -1
gr_libs/_version.py +2 -2
gr_libs/environment/__init__.py +16 -8
gr_libs/environment/environment.py +167 -39
gr_libs/environment/utils/utils.py +22 -12
gr_libs/metrics/__init__.py +5 -0
gr_libs/metrics/metrics.py +76 -34
gr_libs/ml/__init__.py +2 -0
gr_libs/ml/agent.py +21 -6
gr_libs/ml/base/__init__.py +1 -1
gr_libs/ml/base/rl_agent.py +13 -10
gr_libs/ml/consts.py +1 -1
gr_libs/ml/neural/deep_rl_learner.py +433 -352
gr_libs/ml/neural/utils/__init__.py +1 -1
gr_libs/ml/neural/utils/dictlist.py +3 -3
gr_libs/ml/neural/utils/penv.py +5 -2
gr_libs/ml/planner/mcts/mcts_model.py +524 -302
gr_libs/ml/planner/mcts/utils/__init__.py +1 -1
gr_libs/ml/planner/mcts/utils/node.py +11 -7
gr_libs/ml/planner/mcts/utils/tree.py +14 -10
gr_libs/ml/sequential/__init__.py +1 -1
gr_libs/ml/sequential/lstm_model.py +256 -175
gr_libs/ml/tabular/state.py +7 -7
gr_libs/ml/tabular/tabular_q_learner.py +123 -73
gr_libs/ml/tabular/tabular_rl_agent.py +20 -19
gr_libs/ml/utils/__init__.py +8 -2
gr_libs/ml/utils/format.py +78 -70
gr_libs/ml/utils/math.py +2 -1
gr_libs/ml/utils/other.py +1 -1
gr_libs/ml/utils/storage.py +88 -28
gr_libs/problems/consts.py +1549 -1227
gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +145 -80
gr_libs/recognizer/graml/gr_dataset.py +209 -110
gr_libs/recognizer/graml/graml_recognizer.py +431 -240
gr_libs/recognizer/recognizer.py +38 -27
gr_libs/recognizer/utils/__init__.py +1 -1
gr_libs/recognizer/utils/format.py +8 -3
{gr_libs-0.1.7.post0.dist-info → gr_libs-0.1.8.dist-info}/METADATA +1 -1
gr_libs-0.1.8.dist-info/RECORD +70 -0
{gr_libs-0.1.7.post0.dist-info → gr_libs-0.1.8.dist-info}/WHEEL +1 -1
tests/test_gcdraco.py +10 -0
tests/test_graml.py +8 -4
tests/test_graql.py +2 -1
tutorials/gcdraco_panda_tutorial.py +66 -0
tutorials/gcdraco_parking_tutorial.py +61 -0
tutorials/graml_minigrid_tutorial.py +42 -12
tutorials/graml_panda_tutorial.py +35 -14
tutorials/graml_parking_tutorial.py +37 -20
tutorials/graml_point_maze_tutorial.py +33 -13
tutorials/graql_minigrid_tutorial.py +31 -15
gr_libs-0.1.7.post0.dist-info/RECORD +0 -67
{gr_libs-0.1.7.post0.dist-info → gr_libs-0.1.8.dist-info}/top_level.txt +0 -0

gr_libs/environment/utils/utils.py CHANGED Viewed

@@ -1,17 +1,27 @@
 import logging
 import sys
-from gr_libs.environment.environment import MINIGRID, PANDA, PARKING, POINT_MAZE, EnvProperty, MinigridProperty, PandaProperty, ParkingProperty, PointMazeProperty
+from gr_libs.environment.environment import (
+    MINIGRID,
+    PANDA,
+    PARKING,
+    POINT_MAZE,
+    EnvProperty,
+    MinigridProperty,
+    PandaProperty,
+    ParkingProperty,
+    PointMazeProperty,
+)
 def domain_to_env_property(domain_name: str):
-	if domain_name == MINIGRID:
-		return MinigridProperty
-	elif domain_name == PARKING:
-		return ParkingProperty
-	elif domain_name == PANDA:
-		return PandaProperty
-	elif domain_name == POINT_MAZE:
-		return PointMazeProperty
-	else:
-		logging.error(f"Domain {domain_name} is not supported.")
-		sys.exit(1)
+    if domain_name == MINIGRID:
+        return MinigridProperty
+    elif domain_name == PARKING:
+        return ParkingProperty
+    elif domain_name == PANDA:
+        return PandaProperty
+    elif domain_name == POINT_MAZE:
+        return PointMazeProperty
+    else:
+        logging.error(f"Domain {domain_name} is not supported.")
+        sys.exit(1)

gr_libs/metrics/__init__.py CHANGED Viewed

@@ -0,0 +1,5 @@
+from .metrics import (
+    mean_p_value,
+    mean_wasserstein_distance,
+    stochastic_amplified_selection,
+)

gr_libs/metrics/metrics.py CHANGED Viewed

@@ -7,6 +7,7 @@ from typing import Callable, Generator, List, Dict, Tuple, Any
 from math import log2
 from scipy.stats import wasserstein_distance
 from gymnasium.spaces.discrete import Discrete
 # import torch
 # from torch.distributions.categorical import Categorical
@@ -26,21 +27,24 @@ def kl_divergence(p1: List[float], p2: List[float]) -> float:
     Returns:
         float: The KL-divergence between p1 and p2
     """
-    assert (len(p1) == len(p2))
+    assert len(p1) == len(p2)
     return sum(p1[i] * log2(p1[i] / p2[i]) for i in range(len(p1)))
-def kl_divergence_norm_softmax(observations: List[Tuple[State, Any]], agent, actions: Discrete):
+def kl_divergence_norm_softmax(
+    observations: List[Tuple[State, Any]], agent, actions: Discrete
+):
     distances = []
     p_traj = traj_to_policy(observations=observations, actions=actions)
     for (observation, agent_pos), action in observations:
-        state = observation['image']
+        state = observation["image"]
         state_pickled = dill.dumps(state)
         qp1 = p_traj[state_pickled]
         qp2_flatten_distribution_list: List[float] = agent.get_actions_probabilities(
-            observation=(observation, agent_pos))
+            observation=(observation, agent_pos)
+        )
         distances.append(kl_divergence(qp1, qp2_flatten_distribution_list))
     return np.mean(distances)
@@ -53,9 +57,10 @@ def amplify(values, alpha=1.0):
     Returns:
         np.array: amplified softmax probabilities
     """
-    values = values[:3]**alpha # currently only choose to turn or move forward
+    values = values[:3] ** alpha  # currently only choose to turn or move forward
     return values / np.sum(values)
 def stochastic_amplified_selection(actions_probs, alpha=8.0):
     action_probs_amplified = amplify(actions_probs, alpha)
     choice = np.random.choice(len(action_probs_amplified), p=action_probs_amplified)
@@ -63,12 +68,15 @@ def stochastic_amplified_selection(actions_probs, alpha=8.0):
         choice = 6
     return choice
 def stochastic_selection(actions_probs):
     return np.random.choice(len(actions_probs), p=actions_probs)
 def greedy_selection(actions_probs):
     return np.argmax(actions_probs)
 def measure_average_sequence_distance(seq1, seq2):
     """Measures the sequence similarity between two sequences of observations and actions.
@@ -82,19 +90,22 @@ def measure_average_sequence_distance(seq1, seq2):
     # Ensure both sequences have the same length
     min_seq_len = np.min([len(seq1), len(seq2)])
-    assert np.max([len(seq1), len(seq2)]) <= 30*min_seq_len, "We can't really measure similarity in case the sequences are really not the same... maybe just return a default NOT_SIMILAR here."
+    assert (
+        np.max([len(seq1), len(seq2)]) <= 30 * min_seq_len
+    ), "We can't really measure similarity in case the sequences are really not the same... maybe just return a default NOT_SIMILAR here."
     # Calculate the Euclidean distance between corresponding elements in the sequences
     distances = []
     for i in range(0, min_seq_len):
-        distances.append(np.sum(np.abs(np.array(seq1[i])-np.array(seq2[i]))))
+        distances.append(np.sum(np.abs(np.array(seq1[i]) - np.array(seq2[i]))))
     # Calculate the average distance over all elements
     return np.mean(np.array(distances))
-def traj_to_policy(observations: List[Tuple[State, Any]], actions: Discrete, epsilon: float = 0.) -> Dict[
-    str, List[float]]:
+def traj_to_policy(
+    observations: List[Tuple[State, Any]], actions: Discrete, epsilon: float = 0.0
+) -> Dict[str, List[float]]:
     # converts a trajectory from a planner to a policy
     # where the taken action has 99.99999% probability
     trajectory_as_policy = {}
@@ -104,22 +115,28 @@ def traj_to_policy(observations: List[Tuple[State, Any]], actions: Discrete, eps
         actions_len = actions.n
         qs = [1e-6 + epsilon / actions_len for _ in range(actions_len)]
-        qs[action_index] = 1. - 1e-6 * (actions_len - 1) - epsilon
+        qs[action_index] = 1.0 - 1e-6 * (actions_len - 1) - epsilon
-        state = observation['image']
+        state = observation["image"]
         state_pickled = dill.dumps(state)
         trajectory_as_policy[state_pickled] = qs
     return trajectory_as_policy
-def pass_observation_patcher(observations: List[Any], agent: RLAgent) -> Generator[None, None, None]:
+def pass_observation_patcher(
+    observations: List[Any], agent: RLAgent
+) -> Generator[None, None, None]:
     for observation in observations:
         yield observation
 def mean_wasserstein_distance(
-        observations: List[Tuple[State, Any]],
-        agent: DeepRLAgent,
-        actions: gymnasium.spaces.Box,
-        observation_patcher: Callable[[List[Any], RLAgent], Generator[None, None, None]] = pass_observation_patcher
+    observations: List[Tuple[State, Any]],
+    agent: DeepRLAgent,
+    actions: gymnasium.spaces.Box,
+    observation_patcher: Callable[
+        [List[Any], RLAgent], Generator[None, None, None]
+    ] = pass_observation_patcher,
 ):
     distances = []
@@ -141,22 +158,28 @@ def mean_wasserstein_distance(
             wasserstein_distances.append(
                 wasserstein_distance([observation_action], [actor_mean])
             )
-        distances.append(mean(wasserstein_distances))
-    return mean(distances)
+        distances.append(np.mean(wasserstein_distances))
+    return np.mean(distances)
-def mean_action_distance_continuous(observations: List[Tuple[State, Any]], agent: DeepRLAgent, actions: gymnasium.spaces.Box):
+def mean_action_distance_continuous(
+    observations: List[Tuple[State, Any]],
+    agent: DeepRLAgent,
+    actions: gymnasium.spaces.Box,
+):
     distances = []
     for observation, action in observations:
         action2, _ = agent.model.predict(
             observation,
             state=None,
             deterministic=True,
-            episode_start=np.ones((1,), dtype=bool)
+            episode_start=np.ones((1,), dtype=bool),
         )
         action_arr, action2_arr = action[0], action2[0]
         print(f"actor means:{action2}")
-        assert len(action_arr) == len(action2_arr), f"Actions should be on the same length:{action},{action2}"
+        assert len(action_arr) == len(
+            action2_arr
+        ), f"Actions should be on the same length:{action},{action2}"
         total_diff = 0
         # total_diff = []
@@ -165,24 +188,29 @@ def mean_action_distance_continuous(observations: List[Tuple[State, Any]], agent
         # distances.append(statistics.mean(total_diff))
         distances.append(total_diff)
     # print(f"distances:{distances}")
-    return mean(distances)
+    return np.mean(distances)
-def set_agent_goal_observation(observations: List[Any], agent: RLAgent) -> Generator[None, None, None]:
+def set_agent_goal_observation(
+    observations: List[Any], agent: RLAgent
+) -> Generator[None, None, None]:
     copy_observation = observations.copy()
     for observation, action in copy_observation:
-        observation['desired_goal'] = agent.goal
+        observation["desired_goal"] = agent.goal
         yield observation, action
 def z_score(x, mean_action: float, std_dev: float):
     return (x - mean_action) / std_dev
 def mean_p_value(
-        observations: List[Tuple[State, Any]],
-        agent: DeepRLAgent,
-        actions: gymnasium.spaces.Box,
-        observation_patcher: Callable[[List[Any], RLAgent], Generator[None, None, None]] = pass_observation_patcher
+    observations: List[Tuple[State, Any]],
+    agent: DeepRLAgent,
+    actions: gymnasium.spaces.Box,
+    observation_patcher: Callable[
+        [List[Any], RLAgent], Generator[None, None, None]
+    ] = pass_observation_patcher,
 ):
     distances = []
     for observation, observed_action in observation_patcher(observations, agent):
@@ -194,25 +222,39 @@ def mean_p_value(
         observed_actions = observed_action[0]
         log_std_dev = log_std_dev[0]
-        if len(actor_means) != len(observed_actions) or len(actor_means) != len(log_std_dev) or len(observed_actions) != len(log_std_dev):
+        if (
+            len(actor_means) != len(observed_actions)
+            or len(actor_means) != len(log_std_dev)
+            or len(observed_actions) != len(log_std_dev)
+        ):
             raise Exception(
                 f"Length of observed actions, actor mean and std-dev should be equal! "
                 f"{len(observed_actions)},{len(actor_means)},{len(log_std_dev)}"
             )
         z_scores = []
-        for actor_mean, observation_action, action_log_std_dev in zip(actor_means, observed_actions, log_std_dev):
+        for actor_mean, observation_action, action_log_std_dev in zip(
+            actor_means, observed_actions, log_std_dev
+        ):
             z_scores.append(
-                math.fabs(z_score(observation_action, actor_mean, math.pow(2, math.fabs(action_log_std_dev))))
+                math.fabs(
+                    z_score(
+                        observation_action,
+                        actor_mean,
+                        math.pow(2, math.fabs(action_log_std_dev)),
+                    )
+                )
             )
-        mean_distances = mean(z_scores)
+        mean_distances = np.mean(z_scores)
         distances.append(mean_distances)
-    return mean(distances)
+    return np.mean(distances)
 def normalize(values: List[float]) -> List[float]:
     values /= sum(values)
     return values
 def max(values: List[float]) -> List[float]:
     if not len(values):
         return values
@@ -220,4 +262,4 @@ def max(values: List[float]) -> List[float]:
     argmax = vals.argmax()
     vals[:] = 0.0
     vals[argmax] = 1.0
-    return vals
+    return vals

gr_libs/ml/__init__.py CHANGED Viewed

@@ -1,6 +1,8 @@
 from ..ml.utils import device, seed, synthesize
 # from ml.neural import PPOAlgo
 from ..ml.tabular import TabularQLearner
 # from ml.neural import ACModel, RecurrentACModel
 from ..ml.neural import DictList
 from ..ml.agent import Agent

gr_libs/ml/agent.py CHANGED Viewed

@@ -2,6 +2,7 @@ import torch
 from gr_libs.ml import utils
 from gr_libs.ml.utils.other import device
 # from ml.neural import ACModel
@@ -12,15 +13,27 @@ class Agent:
     - to choose an action given an observation,
     - to analyze the feedback (i.e. reward and done state) of its action."""
-    def __init__(self, obs_space, action_space, model_dir,
-                 argmax=False, num_envs=1, use_memory=True, use_text=False):
+    def __init__(
+        self,
+        obs_space,
+        action_space,
+        model_dir,
+        argmax=False,
+        num_envs=1,
+        use_memory=True,
+        use_text=False,
+    ):
         obs_space, self.preprocess_obss = utils.get_obss_preprocessor(obs_space)
-        self.acmodel = ACModel(obs_space, action_space, use_memory=use_memory, use_text=use_text)
+        self.acmodel = ACModel(
+            obs_space, action_space, use_memory=use_memory, use_text=use_text
+        )
         self.argmax = argmax
         self.num_envs = num_envs
         if self.acmodel.recurrent:
-            self.memories = torch.zeros(self.num_envs, self.acmodel.memory_size, device=device)
+            self.memories = torch.zeros(
+                self.num_envs, self.acmodel.memory_size, device=device
+            )
         self.acmodel.load_state_dict(utils.get_model_state(model_dir))
         self.acmodel.to(device)
@@ -49,8 +62,10 @@ class Agent:
     def analyze_feedbacks(self, rewards, dones):
         if self.acmodel.recurrent:
-            masks = 1 - torch.tensor(dones, dtype=torch.float, device=device).unsqueeze(1)
+            masks = 1 - torch.tensor(dones, dtype=torch.float, device=device).unsqueeze(
+                1
+            )
             self.memories *= masks
     def analyze_feedback(self, reward, done):
-        return self.analyze_feedbacks([reward], [done])
+        return self.analyze_feedbacks([reward], [done])

gr_libs/ml/base/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- from gr_libs.ml.base.rl_agent import RLAgent, State, ContextualAgent
1	+ from gr_libs.ml.base.rl_agent import RLAgent, State, ContextualAgent

gr_libs/ml/base/rl_agent.py CHANGED Viewed

@@ -4,22 +4,24 @@ import numpy as np
 State = Any
 class ContextualAgent:
     def __init__(self, problem_name, problem_goal, agent):
         self.problem_name = problem_name
         self.problem_goal = problem_goal
         self.agent = agent
 class RLAgent(ABC):
     def __init__(
-            self,
-            episodes: int,
-            decaying_eps: bool,
-            epsilon: float,
-            learning_rate: float,
-            gamma: float,
-            problem_name: str,
-            domain_name: str
+        self,
+        episodes: int,
+        decaying_eps: bool,
+        epsilon: float,
+        learning_rate: float,
+        gamma: float,
+        problem_name: str,
+        domain_name: str,
     ):
         self.episodes = episodes
         self.decaying_eps = decaying_eps
@@ -46,9 +48,10 @@ class RLAgent(ABC):
     def update_states_counter(self, observation_str: str):
         if observation_str in self.states_counter:
-            self.states_counter[observation_str] = self.states_counter[observation_str] + 1
+            self.states_counter[observation_str] = (
+                self.states_counter[observation_str] + 1
+            )
         else:
             self.states_counter[observation_str] = 1
         if len(self.states_counter) % 10000 == 0:
             print(f"probably error to many {len(self.states_counter)}")

gr_libs/ml/consts.py CHANGED Viewed

@@ -19,4 +19,4 @@ OPTIM_EPS = 1e-8
 OPTIM_ALPHA = 0.99
 CLIP_EPS = 0.2
 RECURRENCE = 1
-TEXT = False
+TEXT = False

gr-libs 0.1.7.post0__py3-none-any.whl → 0.1.8__py3-none-any.whl

gr-libs 0.1.7.post0py3-none-any.whl → 0.1.8py3-none-any.whl