PyPI - gr-libs - Versions diffs - 0.1.7.post0__py3-none-any.whl → 0.2.2__py3-none-any.whl - Mend

gr-libs 0.1.7.post0py3-none-any.whl → 0.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

gr_libs/__init__.py +4 -1
gr_libs/_evaluation/__init__.py +1 -0
gr_libs/_evaluation/_analyze_results_cross_alg_cross_domain.py +260 -0
gr_libs/_evaluation/_generate_experiments_results.py +141 -0
gr_libs/_evaluation/_generate_task_specific_statistics_plots.py +497 -0
gr_libs/_evaluation/_get_plans_images.py +61 -0
gr_libs/_evaluation/_increasing_and_decreasing_.py +106 -0
gr_libs/_version.py +2 -2
gr_libs/all_experiments.py +294 -0
gr_libs/environment/__init__.py +30 -9
gr_libs/environment/_utils/utils.py +27 -0
gr_libs/environment/environment.py +417 -54
gr_libs/metrics/__init__.py +7 -0
gr_libs/metrics/metrics.py +231 -54
gr_libs/ml/__init__.py +2 -5
gr_libs/ml/agent.py +21 -6
gr_libs/ml/base/__init__.py +3 -1
gr_libs/ml/base/rl_agent.py +81 -13
gr_libs/ml/consts.py +1 -1
gr_libs/ml/neural/__init__.py +1 -3
gr_libs/ml/neural/deep_rl_learner.py +619 -378
gr_libs/ml/neural/utils/__init__.py +1 -2
gr_libs/ml/neural/utils/dictlist.py +3 -3
gr_libs/ml/planner/mcts/{utils → _utils}/__init__.py +1 -1
gr_libs/ml/planner/mcts/{utils → _utils}/node.py +11 -7
gr_libs/ml/planner/mcts/{utils → _utils}/tree.py +15 -11
gr_libs/ml/planner/mcts/mcts_model.py +571 -312
gr_libs/ml/sequential/__init__.py +0 -1
gr_libs/ml/sequential/_lstm_model.py +270 -0
gr_libs/ml/tabular/__init__.py +1 -3
gr_libs/ml/tabular/state.py +7 -7
gr_libs/ml/tabular/tabular_q_learner.py +150 -82
gr_libs/ml/tabular/tabular_rl_agent.py +42 -28
gr_libs/ml/utils/__init__.py +2 -3
gr_libs/ml/utils/format.py +28 -97
gr_libs/ml/utils/math.py +5 -3
gr_libs/ml/utils/other.py +3 -3
gr_libs/ml/utils/storage.py +88 -81
gr_libs/odgr_executor.py +268 -0
gr_libs/problems/consts.py +1549 -1227
gr_libs/recognizer/_utils/__init__.py +0 -0
gr_libs/recognizer/_utils/format.py +18 -0
gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +233 -88
gr_libs/recognizer/graml/_gr_dataset.py +233 -0
gr_libs/recognizer/graml/graml_recognizer.py +586 -252
gr_libs/recognizer/recognizer.py +90 -30
gr_libs/tutorials/draco_panda_tutorial.py +58 -0
gr_libs/tutorials/draco_parking_tutorial.py +56 -0
gr_libs/tutorials/gcdraco_panda_tutorial.py +62 -0
gr_libs/tutorials/gcdraco_parking_tutorial.py +57 -0
gr_libs/tutorials/graml_minigrid_tutorial.py +64 -0
gr_libs/tutorials/graml_panda_tutorial.py +57 -0
gr_libs/tutorials/graml_parking_tutorial.py +52 -0
gr_libs/tutorials/graml_point_maze_tutorial.py +60 -0
gr_libs/tutorials/graql_minigrid_tutorial.py +50 -0
{gr_libs-0.1.7.post0.dist-info → gr_libs-0.2.2.dist-info}/METADATA +84 -29
gr_libs-0.2.2.dist-info/RECORD +71 -0
{gr_libs-0.1.7.post0.dist-info → gr_libs-0.2.2.dist-info}/WHEEL +1 -1
gr_libs-0.2.2.dist-info/top_level.txt +2 -0
tests/test_draco.py +14 -0
tests/test_gcdraco.py +10 -0
tests/test_graml.py +12 -8
tests/test_graql.py +3 -2
evaluation/analyze_results_cross_alg_cross_domain.py +0 -277
evaluation/create_minigrid_map_image.py +0 -34
evaluation/file_system.py +0 -42
evaluation/generate_experiments_results.py +0 -92
evaluation/generate_experiments_results_new_ver1.py +0 -254
evaluation/generate_experiments_results_new_ver2.py +0 -331
evaluation/generate_task_specific_statistics_plots.py +0 -272
evaluation/get_plans_images.py +0 -47
evaluation/increasing_and_decreasing_.py +0 -63
gr_libs/environment/utils/utils.py +0 -17
gr_libs/ml/neural/utils/penv.py +0 -57
gr_libs/ml/sequential/lstm_model.py +0 -192
gr_libs/recognizer/graml/gr_dataset.py +0 -134
gr_libs/recognizer/utils/__init__.py +0 -1
gr_libs/recognizer/utils/format.py +0 -13
gr_libs-0.1.7.post0.dist-info/RECORD +0 -67
gr_libs-0.1.7.post0.dist-info/top_level.txt +0 -4
tutorials/graml_minigrid_tutorial.py +0 -34
tutorials/graml_panda_tutorial.py +0 -41
tutorials/graml_parking_tutorial.py +0 -39
tutorials/graml_point_maze_tutorial.py +0 -39
tutorials/graql_minigrid_tutorial.py +0 -34
/gr_libs/environment/{utils → _utils}/__init__.py +0 -0

gr_libs/ml/tabular/tabular_q_learner.py CHANGED Viewed

@@ -1,19 +1,18 @@
-# Don't import stuff from metrics! it's a higher level module.
+""" implementation of q-learning """
 import os.path
 import pickle
-import random
+from collections.abc import Iterable
+from random import Random
 from types import MethodType
+from typing import Any
 import dill
-from gymnasium import register
 import numpy as np
-from tqdm import tqdm
-from typing import Any
-from random import Random
-from typing import List, Iterable
 from gymnasium.error import InvalidAction
-from gr_libs.environment.environment import QLEARNING, MinigridProperty
+from tqdm import tqdm
+from gr_libs.environment.environment import QLEARNING, EnvProperty
 from gr_libs.ml.tabular import TabularState
 from gr_libs.ml.tabular.tabular_rl_agent import TabularRLAgent
 from gr_libs.ml.utils import get_agent_model_dir, random_subset_with_order, softmax
@@ -27,21 +26,42 @@ class TabularQLearner(TabularRLAgent):
     MODEL_FILE_NAME = r"tabular_model.txt"
     CONF_FILE = r"conf.pkl"
-    def __init__(self,
-                 domain_name: str,
-                 problem_name: str,
-                 algorithm: str,
-                 num_timesteps: int,
-                 decaying_eps: bool = True,
-                 eps: float = 1.0,
-                 alpha: float = 0.5,
-                 decay: float = 0.000002,
-                 gamma: float = 0.9,
-                 rand: Random = Random(),
-                 learning_rate: float = 0.001,
-                 check_partial_goals: bool = True,
-                 valid_only: bool = False
-                 ):
+    def __init__(
+        self,
+        domain_name: str,
+        problem_name: str,
+        env_prop: EnvProperty,
+        algorithm: str,
+        num_timesteps: int,
+        decaying_eps: bool = True,
+        eps: float = 1.0,
+        alpha: float = 0.5,
+        decay: float = 0.000002,
+        gamma: float = 0.9,
+        rand: Random = Random(),
+        learning_rate: float = 0.001,
+        check_partial_goals: bool = True,
+        valid_only: bool = False,
+    ):
+        """
+        Initialize a TabularQLearner object.
+        Args:
+            domain_name (str): The name of the domain.
+            problem_name (str): The name of the problem.
+            env_prop (EnvProperty): The environment properties.
+            algorithm (str): The algorithm to use.
+            num_timesteps (int): The number of timesteps.
+            decaying_eps (bool, optional): Whether to use decaying epsilon. Defaults to True.
+            eps (float, optional): The initial epsilon value. Defaults to 1.0.
+            alpha (float, optional): The learning rate. Defaults to 0.5.
+            decay (float, optional): The decay rate. Defaults to 0.000002.
+            gamma (float, optional): The discount factor. Defaults to 0.9.
+            rand (Random, optional): The random number generator. Defaults to Random().
+            learning_rate (float, optional): The learning rate. Defaults to 0.001.
+            check_partial_goals (bool, optional): Whether to check partial goals. Defaults to True.
+            valid_only (bool, optional): Whether to use valid goals only. Defaults to False.
+        """
         super().__init__(
             domain_name=domain_name,
             problem_name=problem_name,
@@ -52,14 +72,23 @@ class TabularQLearner(TabularRLAgent):
             decay=decay,
             gamma=gamma,
             rand=rand,
-            learning_rate=learning_rate
+            learning_rate=learning_rate,
         )
-        assert algorithm == QLEARNING, f"algorithm {algorithm} is not supported by {self.__class__.__name__}"
+        assert (
+            algorithm == QLEARNING
+        ), f"algorithm {algorithm} is not supported by {self.__class__.__name__}"
+        self.env_prop = env_prop
         self.valid_only = valid_only
         self.check_partial_goals = check_partial_goals
         self.goal_literals_achieved = set()
-        self.model_directory = get_agent_model_dir(domain_name=domain_name, model_name=problem_name, class_name=self.class_name())
-        self.model_file_path = os.path.join(self.model_directory, TabularQLearner.MODEL_FILE_NAME)
+        self.model_directory = get_agent_model_dir(
+            domain_name=domain_name,
+            model_name=problem_name,
+            class_name=self.class_name(),
+        )
+        self.model_file_path = os.path.join(
+            self.model_directory, TabularQLearner.MODEL_FILE_NAME
+        )
         self._conf_file = os.path.join(self.model_directory, TabularQLearner.CONF_FILE)
         self._learned_episodes = 0
@@ -73,12 +102,13 @@ class TabularQLearner(TabularRLAgent):
             print(f"Loading pre-existing conf file in {self._conf_file}")
             with open(self._conf_file, "rb") as f:
                 conf = dill.load(file=f)
-            self._learned_episodes = conf['learned_episodes']
+            self._learned_episodes = conf["learned_episodes"]
         # hyperparameters
         self.base_eps = eps
         self.patience = 400000
         if self.decaying_eps:
             def epsilon():
                 self._c_eps = max((self.episodes - self.step) / self.episodes, 0.01)
                 return self._c_eps
@@ -146,22 +176,22 @@ class TabularQLearner(TabularRLAgent):
         if not os.path.exists(directory):
             os.makedirs(directory)
-        with open(path, 'wb') as f:
+        with open(path, "wb") as f:
             pickle.dump(self.q_table, f)
     def load_q_table(self, path: str):
-        with open(path, 'rb') as f:
+        with open(path, "rb") as f:
             table = pickle.load(f)
         self.q_table = table
     def add_new_state(self, state: TabularState):
-        self.q_table[str(state)] = [0.] * self.number_of_actions
+        self.q_table[str(state)] = [0.0] * self.number_of_actions
-    def get_all_q_values(self, state: TabularState) -> List[float]:
+    def get_all_q_values(self, state: TabularState) -> list[float]:
         if str(state) in self.q_table:
             return self.q_table[str(state)]
         else:
-            return [0.] * self.number_of_actions
+            return [0.0] * self.number_of_actions
     def best_action(self, state: TabularState) -> float:
         if str(state) not in self.q_table:
@@ -229,7 +259,7 @@ class TabularQLearner(TabularRLAgent):
         """
         old_q = self.get_q_value(self.last_state, self.last_action)
-        td_error = - old_q
+        td_error = -old_q
         new_q = old_q + self.alpha * (reward + td_error)
         self.set_q_value(self.last_state, self.last_action, new_q)
@@ -244,14 +274,18 @@ class TabularQLearner(TabularRLAgent):
         if self._learned_episodes >= self.episodes:
             print("learned episodes is above the requsted episodes")
             return
-        print(f'Using {self.__class__.__name__}')
-        tq = tqdm(range(self.episodes - self._learned_episodes),
-                  postfix=f"States: {len(self.q_table.keys())}. Goals: {done_times}. Eps: {self._c_eps:.3f}. MaxR: {max_r}")
+        print(f"Using {self.__class__.__name__}")
+        tq = tqdm(
+            range(self.episodes - self._learned_episodes),
+            postfix=f"States: {len(self.q_table.keys())}. Goals: {done_times}. Eps: {self._c_eps:.3f}. MaxR: {max_r}",
+        )
         for n in tq:
             self.step = n
             episode_r = 0
             observation, info = self.env.reset()
-            tabular_state = TabularState.gen_tabular_state(environment=self.env, observation=observation)
+            tabular_state = TabularState.gen_tabular_state(
+                environment=self.env, observation=observation
+            )
             action = self.agent_start(state=tabular_state)
             self.update_states_counter(observation_str=str(tabular_state))
@@ -264,7 +298,9 @@ class TabularQLearner(TabularRLAgent):
                     done_times += 1
                 # standard q-learning algorithm
-                next_tabular_state = TabularState.gen_tabular_state(environment=self.env, observation=observation)
+                next_tabular_state = TabularState.gen_tabular_state(
+                    environment=self.env, observation=observation
+                )
                 self.update_states_counter(observation_str=str(next_tabular_state))
                 action = self.agent_step(reward, next_tabular_state)
                 tstep += 1
@@ -277,13 +313,16 @@ class TabularQLearner(TabularRLAgent):
                 max_r = episode_r
                 # print("New all time high reward:", episode_r)
                 tq.set_postfix_str(
-                    f"States: {len(self.q_table.keys())}. Goals: {done_times}. Eps: {self._c_eps:.3f}. MaxR: {max_r}")
+                    f"States: {len(self.q_table.keys())}. Goals: {done_times}. Eps: {self._c_eps:.3f}. MaxR: {max_r}"
+                )
             if (n + 1) % 100 == 0:
                 tq.set_postfix_str(
-                    f"States: {len(self.q_table.keys())}. Goals: {done_times}. Eps: {self._c_eps:.3f}. MaxR: {max_r}")
+                    f"States: {len(self.q_table.keys())}. Goals: {done_times}. Eps: {self._c_eps:.3f}. MaxR: {max_r}"
+                )
             if (n + 1) % 1000 == 0:
                 tq.set_postfix_str(
-                    f"States: {len(self.q_table.keys())}. Goals: {done_times}. Eps: {self._c_eps:.3f}. MaxR: {max_r}")
+                    f"States: {len(self.q_table.keys())}. Goals: {done_times}. Eps: {self._c_eps:.3f}. MaxR: {max_r}"
+                )
                 if done_times <= 10:
                     patience += 1
                     if patience >= self.patience:
@@ -297,14 +336,18 @@ class TabularQLearner(TabularRLAgent):
                 done_times = 0
             self.goal_literals_achieved.clear()
-        print(f"number of unique states found during training:{self.get_number_of_unique_states()}")
+        print(
+            f"number of unique states found during training:{self.get_number_of_unique_states()}"
+        )
         print("finish learning and saving status")
         self.save_models_to_files()
     def exploit(self, number_of_steps=20):
         observation, info = self.env.reset()
         for step_number in range(number_of_steps):
-            tabular_state = TabularState.gen_tabular_state(environment=self.env, observation=observation)
+            tabular_state = TabularState.gen_tabular_state(
+                environment=self.env, observation=observation
+            )
             action = self.policy(state=tabular_state)
             observation, reward, terminated, truncated, _ = self.env.step(action)
             done = terminated | truncated
@@ -314,16 +357,18 @@ class TabularQLearner(TabularRLAgent):
     def get_actions_probabilities(self, observation):
         obs, agent_pos = observation
-        direction = obs['direction']
+        direction = obs["direction"]
         x, y = agent_pos
-        tabular_state = TabularState(agent_x_position=x, agent_y_position=y, agent_direction=direction)
+        tabular_state = TabularState(
+            agent_x_position=x, agent_y_position=y, agent_direction=direction
+        )
         return softmax(self.get_all_q_values(tabular_state))
     def get_q_of_specific_cell(self, cell_key):
         cell_q_table = {}
         for i in range(4):
-            key = cell_key + ':' + str(i)
+            key = cell_key + ":" + str(i)
             if key in self.q_table:
                 cell_q_table[key] = self.q_table[key]
         return cell_q_table
@@ -331,15 +376,14 @@ class TabularQLearner(TabularRLAgent):
     def get_all_cells(self):
         cells = set()
         for key in self.q_table.keys():
-            cell = key.split(':')[0]
+            cell = key.split(":")[0]
             cells.add(cell)
         return list(cells)
     def _save_conf_file(self):
         conf = {
-            'learned_episodes': self._learned_episodes,
-            'states_counter': self.states_counter
+            "learned_episodes": self._learned_episodes,
+            "states_counter": self.states_counter,
         }
         with open(self._conf_file, "wb") as f:
             dill.dump(conf, f)
@@ -347,11 +391,20 @@ class TabularQLearner(TabularRLAgent):
     def save_models_to_files(self):
         self.save_q_table(path=self.model_file_path)
         self._save_conf_file()
     def simplify_observation(self, observation):
-        return [(obs['direction'], agent_pos_x, agent_pos_y, action) for ((obs, (agent_pos_x, agent_pos_y)), action) in observation] # list of tuples, each tuple the sample
-    def generate_observation(self, action_selection_method: MethodType, random_optimalism, save_fig=False, fig_path: str=None, env_prop=None):
+        return [
+            (obs["direction"], agent_pos_x, agent_pos_y, action)
+            for ((obs, (agent_pos_x, agent_pos_y)), action) in observation
+        ]  # list of tuples, each tuple the sample
+    def generate_observation(
+        self,
+        action_selection_method: MethodType,
+        random_optimalism,
+        save_fig=False,
+        fig_path: str = None,
+    ):
         """
         Generate a single observation given a list of agents
@@ -363,26 +416,32 @@ class TabularQLearner(TabularRLAgent):
             list: A list of state-action pairs representing the generated observation.
         Notes:
-            The function randomly selects an agent from the given list and generates a sequence of state-action pairs
-            based on the Q-table of the selected agent. The action selection is stochastic, where each action is
+            The function randomly selects an agent from the given list and generates a sequence of state-action pairs
+            based on the Q-table of the selected agent. The action selection is stochastic, where each action is
             selected based on the probability distribution defined by the Q-values in the Q-table.
-            The generated sequence terminates when a maximum number of steps is reached or when the environment
+            The generated sequence terminates when a maximum number of steps is reached or when the environment
             episode terminates.
         """
         if save_fig == False:
-            assert fig_path == None, "You can't specify a vid path when you don't even save the figure."
+            assert (
+                fig_path == None
+            ), "You can't specify a vid path when you don't even save the figure."
         else:
-            assert fig_path != None, "You must specify a vid path when you save the figure."
+            assert (
+                fig_path != None
+            ), "You must specify a vid path when you save the figure."
         obs, _ = self.env.reset()
         MAX_STEPS = 32
         done = False
         steps = []
         for step_index in range(MAX_STEPS):
             x, y = self.env.unwrapped.agent_pos
-            str_state = "({},{}):{}".format(x, y, obs['direction'])
+            str_state = "({},{}):{}".format(x, y, obs["direction"])
             relevant_actions_idx = 3
-            action_probs = self.q_table[str_state][:relevant_actions_idx] / np.sum(self.q_table[str_state][:relevant_actions_idx])  # Normalize probabilities
+            action_probs = self.q_table[str_state][:relevant_actions_idx] / np.sum(
+                self.q_table[str_state][:relevant_actions_idx]
+            )  # Normalize probabilities
             if step_index == 0 and random_optimalism:
                 # print("in 1st step in generating plan and got random optimalism.")
                 std_dev = np.std(action_probs)
@@ -398,7 +457,8 @@ class TabularQLearner(TabularRLAgent):
                         assert reward >= 0
                         action = 2
                         step_index += 1
-                else: action = action_selection_method(action_probs)
+                else:
+                    action = action_selection_method(action_probs)
             else:
                 action = action_selection_method(action_probs)
             steps.append(((obs, self.env.unwrapped.agent_pos), action))
@@ -408,16 +468,26 @@ class TabularQLearner(TabularRLAgent):
             if done:
                 break
-        #assert len(steps) >= 2
+        # assert len(steps) >= 2
         if save_fig:
             sequence = [pos for ((state, pos), action) in steps]
-            #print(f"sequence to {self.problem_name} is:\n\t{steps}\ngenerating image at {img_path}.")
+            # print(f"sequence to {self.problem_name} is:\n\t{steps}\ngenerating image at {img_path}.")
             print(f"generating sequence image at {fig_path}.")
-            env_prop.create_sequence_image(sequence, fig_path, self.problem_name) # TODO change that assumption, cannot assume this is minigrid env
+            self.env_prop.create_sequence_image(
+                sequence, fig_path, self.problem_name
+            )  # TODO change that assumption, cannot assume this is minigrid env
         return steps
-    def generate_partial_observation(self, action_selection_method: MethodType, percentage: float, save_fig = False, is_consecutive = True, random_optimalism=True, fig_path=None):
+    def generate_partial_observation(
+        self,
+        action_selection_method: MethodType,
+        percentage: float,
+        save_fig=False,
+        is_consecutive=True,
+        random_optimalism=True,
+        fig_path=None,
+    ):
         """
         Generate a single observation given a list of agents
@@ -429,25 +499,23 @@ class TabularQLearner(TabularRLAgent):
             list: A list of state-action pairs representing the generated observation.
         Notes:
-            The function randomly selects an agent from the given list and generates a sequence of state-action pairs
-            based on the Q-table of the selected agent. The action selection is stochastic, where each action is
+            The function randomly selects an agent from the given list and generates a sequence of state-action pairs
+            based on the Q-table of the selected agent. The action selection is stochastic, where each action is
             selected based on the probability distribution defined by the Q-values in the Q-table.
-            The generated sequence terminates when a maximum number of steps is reached or when the environment
+            The generated sequence terminates when a maximum number of steps is reached or when the environment
             episode terminates.
         """
-        steps = self.generate_observation(action_selection_method=action_selection_method, random_optimalism=random_optimalism, save_fig=save_fig, fig_path=fig_path) # steps are a full observation
-        result = random_subset_with_order(steps, (int)(percentage * len(steps)), is_consecutive)
+        steps = self.generate_observation(
+            action_selection_method=action_selection_method,
+            random_optimalism=random_optimalism,
+            save_fig=save_fig,
+            fig_path=fig_path,
+        )  # steps are a full observation
+        result = random_subset_with_order(
+            steps, (int)(percentage * len(steps)), is_consecutive
+        )
         if percentage >= 0.8:
             assert len(result) > 2
         return result
-if __name__ == "__main__":
-    from gr_libs.metrics.metrics import greedy_selection
-    import gr_envs # to register everything
-    agent = TabularQLearner(domain_name="minigrid", problem_name="MiniGrid-LavaCrossingS9N2-DynamicGoal-1x7-v0")
-    agent.generate_observation(greedy_selection, True, True)
-    # python experiments.py --recognizer graml --domain point_maze --task L5 --partial_obs_type continuing --point_maze_env obstacles --collect_stats --inference_same_seq_len

gr_libs/ml/tabular/tabular_rl_agent.py CHANGED Viewed

@@ -1,11 +1,11 @@
-import gymnasium as gym
 from abc import abstractmethod
-from typing import Collection, Literal, Any
 from random import Random
+from typing import Any
+import gymnasium as gym
 import numpy as np
-from gr_libs.ml.base import RLAgent
-from gr_libs.ml.base import State
+from gr_libs.ml.base import RLAgent, State
 class TabularRLAgent(RLAgent):
@@ -15,18 +15,37 @@ class TabularRLAgent(RLAgent):
     recommended as development goes on.
     """
-    def __init__(self,
-                 domain_name: str,
-                 problem_name: str,
-                 episodes: int,
-                 decaying_eps: bool,
-                 eps: float,
-                 alpha: float,
-                 decay: float,
-                 gamma: float,
-                 rand: Random,
-                 learning_rate
-                 ):
+    def __init__(
+        self,
+        domain_name: str,
+        problem_name: str,
+        episodes: int,
+        decaying_eps: bool,
+        eps: float,
+        alpha: float,
+        decay: float,
+        gamma: float,
+        rand: Random,
+        learning_rate,
+    ):
+        """
+        Initializes a TabularRLAgent object.
+        Args:
+            domain_name (str): The name of the domain.
+            problem_name (str): The name of the problem.
+            episodes (int): The number of episodes to run.
+            decaying_eps (bool): Whether to use decaying epsilon.
+            eps (float): The initial epsilon value.
+            alpha (float): The learning rate.
+            decay (float): The decay rate for epsilon.
+            gamma (float): The discount factor.
+            rand (Random): The random number generator.
+            learning_rate: The learning rate.
+        Returns:
+            None
+        """
         super().__init__(
             episodes=episodes,
             decaying_eps=decaying_eps,
@@ -34,7 +53,7 @@ class TabularRLAgent(RLAgent):
             learning_rate=learning_rate,
             gamma=gamma,
             domain_name=domain_name,
-            problem_name=problem_name
+            problem_name=problem_name,
         )
         self.env = gym.make(id=problem_name)
         self.actions = self.env.unwrapped.actions
@@ -59,7 +78,6 @@ class TabularRLAgent(RLAgent):
         Returns:
             (int) the first action the agent takes.
         """
-        pass
     @abstractmethod
     def agent_step(self, reward: float, state: State) -> Any:
@@ -72,7 +90,6 @@ class TabularRLAgent(RLAgent):
         Returns:
             The action the agent is taking.
         """
-        pass
     @abstractmethod
     def agent_end(self, reward: float) -> Any:
@@ -82,18 +99,16 @@ class TabularRLAgent(RLAgent):
             reward (float): the reward the agent received for entering the
                 terminal state.
         """
-        pass
     @abstractmethod
     def policy(self, state: State) -> Any:
         """The action for the specified state under the currently learned policy
-           (unlike agent_step, this does not update the policy using state as a sample.
-           Args:
-                state (Any): the state observation from the environment
-           Returns:
-                The action prescribed for that state
+        (unlike agent_step, this does not update the policy using state as a sample.
+        Args:
+             state (Any): the state observation from the environment
+        Returns:
+             The action prescribed for that state
         """
-        pass
     @abstractmethod
     def softmax_policy(self, state: State) -> np.array:
@@ -105,7 +120,6 @@ class TabularRLAgent(RLAgent):
         Returns:
             np.array: probability of taking each action in self.actions given a state
         """
-        pass
     @abstractmethod
     def learn(self, init_threshold: int = 20):
@@ -122,5 +136,5 @@ class TabularRLAgent(RLAgent):
         Returns:
             Any: [description]
-        """""
+        """ ""
         return self.softmax_policy(state)

gr_libs/ml/utils/__init__.py CHANGED Viewed

@@ -1,6 +1,5 @@
-#from .agent import *
 from .env import make_env
-from .format import Vocabulary, preprocess_images, preprocess_texts, get_obss_preprocessor, random_subset_with_order
+from .format import random_subset_with_order
+from .math import softmax
 from .other import device, seed, synthesize
 from .storage import *
-from .math import softmax

gr-libs 0.1.7.post0__py3-none-any.whl → 0.2.2__py3-none-any.whl

gr-libs 0.1.7.post0py3-none-any.whl → 0.2.2py3-none-any.whl