PyPI - gr-libs - Versions diffs - 0.1.7.post0__py3-none-any.whl → 0.1.8__py3-none-any.whl - Mend

gr-libs 0.1.7.post0py3-none-any.whl → 0.1.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

evaluation/analyze_results_cross_alg_cross_domain.py +236 -246
evaluation/create_minigrid_map_image.py +10 -6
evaluation/file_system.py +16 -5
evaluation/generate_experiments_results.py +123 -74
evaluation/generate_experiments_results_new_ver1.py +227 -243
evaluation/generate_experiments_results_new_ver2.py +317 -317
evaluation/generate_task_specific_statistics_plots.py +481 -253
evaluation/get_plans_images.py +41 -26
evaluation/increasing_and_decreasing_.py +97 -56
gr_libs/__init__.py +2 -1
gr_libs/_version.py +2 -2
gr_libs/environment/__init__.py +16 -8
gr_libs/environment/environment.py +167 -39
gr_libs/environment/utils/utils.py +22 -12
gr_libs/metrics/__init__.py +5 -0
gr_libs/metrics/metrics.py +76 -34
gr_libs/ml/__init__.py +2 -0
gr_libs/ml/agent.py +21 -6
gr_libs/ml/base/__init__.py +1 -1
gr_libs/ml/base/rl_agent.py +13 -10
gr_libs/ml/consts.py +1 -1
gr_libs/ml/neural/deep_rl_learner.py +433 -352
gr_libs/ml/neural/utils/__init__.py +1 -1
gr_libs/ml/neural/utils/dictlist.py +3 -3
gr_libs/ml/neural/utils/penv.py +5 -2
gr_libs/ml/planner/mcts/mcts_model.py +524 -302
gr_libs/ml/planner/mcts/utils/__init__.py +1 -1
gr_libs/ml/planner/mcts/utils/node.py +11 -7
gr_libs/ml/planner/mcts/utils/tree.py +14 -10
gr_libs/ml/sequential/__init__.py +1 -1
gr_libs/ml/sequential/lstm_model.py +256 -175
gr_libs/ml/tabular/state.py +7 -7
gr_libs/ml/tabular/tabular_q_learner.py +123 -73
gr_libs/ml/tabular/tabular_rl_agent.py +20 -19
gr_libs/ml/utils/__init__.py +8 -2
gr_libs/ml/utils/format.py +78 -70
gr_libs/ml/utils/math.py +2 -1
gr_libs/ml/utils/other.py +1 -1
gr_libs/ml/utils/storage.py +88 -28
gr_libs/problems/consts.py +1549 -1227
gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +145 -80
gr_libs/recognizer/graml/gr_dataset.py +209 -110
gr_libs/recognizer/graml/graml_recognizer.py +431 -240
gr_libs/recognizer/recognizer.py +38 -27
gr_libs/recognizer/utils/__init__.py +1 -1
gr_libs/recognizer/utils/format.py +8 -3
{gr_libs-0.1.7.post0.dist-info → gr_libs-0.1.8.dist-info}/METADATA +1 -1
gr_libs-0.1.8.dist-info/RECORD +70 -0
{gr_libs-0.1.7.post0.dist-info → gr_libs-0.1.8.dist-info}/WHEEL +1 -1
tests/test_gcdraco.py +10 -0
tests/test_graml.py +8 -4
tests/test_graql.py +2 -1
tutorials/gcdraco_panda_tutorial.py +66 -0
tutorials/gcdraco_parking_tutorial.py +61 -0
tutorials/graml_minigrid_tutorial.py +42 -12
tutorials/graml_panda_tutorial.py +35 -14
tutorials/graml_parking_tutorial.py +37 -20
tutorials/graml_point_maze_tutorial.py +33 -13
tutorials/graql_minigrid_tutorial.py +31 -15
gr_libs-0.1.7.post0.dist-info/RECORD +0 -67
{gr_libs-0.1.7.post0.dist-info → gr_libs-0.1.8.dist-info}/top_level.txt +0 -0

gr_libs/ml/tabular/tabular_q_learner.py CHANGED Viewed

@@ -13,7 +13,7 @@ from typing import Any
 from random import Random
 from typing import List, Iterable
 from gymnasium.error import InvalidAction
-from gr_libs.environment.environment import QLEARNING, MinigridProperty
+from gr_libs.environment.environment import QLEARNING, EnvProperty
 from gr_libs.ml.tabular import TabularState
 from gr_libs.ml.tabular.tabular_rl_agent import TabularRLAgent
 from gr_libs.ml.utils import get_agent_model_dir, random_subset_with_order, softmax
@@ -27,21 +27,23 @@ class TabularQLearner(TabularRLAgent):
     MODEL_FILE_NAME = r"tabular_model.txt"
     CONF_FILE = r"conf.pkl"
-    def __init__(self,
-                 domain_name: str,
-                 problem_name: str,
-                 algorithm: str,
-                 num_timesteps: int,
-                 decaying_eps: bool = True,
-                 eps: float = 1.0,
-                 alpha: float = 0.5,
-                 decay: float = 0.000002,
-                 gamma: float = 0.9,
-                 rand: Random = Random(),
-                 learning_rate: float = 0.001,
-                 check_partial_goals: bool = True,
-                 valid_only: bool = False
-                 ):
+    def __init__(
+        self,
+        domain_name: str,
+        problem_name: str,
+        env_prop: EnvProperty,
+        algorithm: str,
+        num_timesteps: int,
+        decaying_eps: bool = True,
+        eps: float = 1.0,
+        alpha: float = 0.5,
+        decay: float = 0.000002,
+        gamma: float = 0.9,
+        rand: Random = Random(),
+        learning_rate: float = 0.001,
+        check_partial_goals: bool = True,
+        valid_only: bool = False,
+    ):
         super().__init__(
             domain_name=domain_name,
             problem_name=problem_name,
@@ -52,14 +54,23 @@ class TabularQLearner(TabularRLAgent):
             decay=decay,
             gamma=gamma,
             rand=rand,
-            learning_rate=learning_rate
+            learning_rate=learning_rate,
         )
-        assert algorithm == QLEARNING, f"algorithm {algorithm} is not supported by {self.__class__.__name__}"
+        assert (
+            algorithm == QLEARNING
+        ), f"algorithm {algorithm} is not supported by {self.__class__.__name__}"
+        self.env_prop = env_prop
         self.valid_only = valid_only
         self.check_partial_goals = check_partial_goals
         self.goal_literals_achieved = set()
-        self.model_directory = get_agent_model_dir(domain_name=domain_name, model_name=problem_name, class_name=self.class_name())
-        self.model_file_path = os.path.join(self.model_directory, TabularQLearner.MODEL_FILE_NAME)
+        self.model_directory = get_agent_model_dir(
+            domain_name=domain_name,
+            model_name=problem_name,
+            class_name=self.class_name(),
+        )
+        self.model_file_path = os.path.join(
+            self.model_directory, TabularQLearner.MODEL_FILE_NAME
+        )
         self._conf_file = os.path.join(self.model_directory, TabularQLearner.CONF_FILE)
         self._learned_episodes = 0
@@ -73,12 +84,13 @@ class TabularQLearner(TabularRLAgent):
             print(f"Loading pre-existing conf file in {self._conf_file}")
             with open(self._conf_file, "rb") as f:
                 conf = dill.load(file=f)
-            self._learned_episodes = conf['learned_episodes']
+            self._learned_episodes = conf["learned_episodes"]
         # hyperparameters
         self.base_eps = eps
         self.patience = 400000
         if self.decaying_eps:
             def epsilon():
                 self._c_eps = max((self.episodes - self.step) / self.episodes, 0.01)
                 return self._c_eps
@@ -146,22 +158,22 @@ class TabularQLearner(TabularRLAgent):
         if not os.path.exists(directory):
             os.makedirs(directory)
-        with open(path, 'wb') as f:
+        with open(path, "wb") as f:
             pickle.dump(self.q_table, f)
     def load_q_table(self, path: str):
-        with open(path, 'rb') as f:
+        with open(path, "rb") as f:
             table = pickle.load(f)
         self.q_table = table
     def add_new_state(self, state: TabularState):
-        self.q_table[str(state)] = [0.] * self.number_of_actions
+        self.q_table[str(state)] = [0.0] * self.number_of_actions
     def get_all_q_values(self, state: TabularState) -> List[float]:
         if str(state) in self.q_table:
             return self.q_table[str(state)]
         else:
-            return [0.] * self.number_of_actions
+            return [0.0] * self.number_of_actions
     def best_action(self, state: TabularState) -> float:
         if str(state) not in self.q_table:
@@ -229,7 +241,7 @@ class TabularQLearner(TabularRLAgent):
         """
         old_q = self.get_q_value(self.last_state, self.last_action)
-        td_error = - old_q
+        td_error = -old_q
         new_q = old_q + self.alpha * (reward + td_error)
         self.set_q_value(self.last_state, self.last_action, new_q)
@@ -244,14 +256,18 @@ class TabularQLearner(TabularRLAgent):
         if self._learned_episodes >= self.episodes:
             print("learned episodes is above the requsted episodes")
             return
-        print(f'Using {self.__class__.__name__}')
-        tq = tqdm(range(self.episodes - self._learned_episodes),
-                  postfix=f"States: {len(self.q_table.keys())}. Goals: {done_times}. Eps: {self._c_eps:.3f}. MaxR: {max_r}")
+        print(f"Using {self.__class__.__name__}")
+        tq = tqdm(
+            range(self.episodes - self._learned_episodes),
+            postfix=f"States: {len(self.q_table.keys())}. Goals: {done_times}. Eps: {self._c_eps:.3f}. MaxR: {max_r}",
+        )
         for n in tq:
             self.step = n
             episode_r = 0
             observation, info = self.env.reset()
-            tabular_state = TabularState.gen_tabular_state(environment=self.env, observation=observation)
+            tabular_state = TabularState.gen_tabular_state(
+                environment=self.env, observation=observation
+            )
             action = self.agent_start(state=tabular_state)
             self.update_states_counter(observation_str=str(tabular_state))
@@ -264,7 +280,9 @@ class TabularQLearner(TabularRLAgent):
                     done_times += 1
                 # standard q-learning algorithm
-                next_tabular_state = TabularState.gen_tabular_state(environment=self.env, observation=observation)
+                next_tabular_state = TabularState.gen_tabular_state(
+                    environment=self.env, observation=observation
+                )
                 self.update_states_counter(observation_str=str(next_tabular_state))
                 action = self.agent_step(reward, next_tabular_state)
                 tstep += 1
@@ -277,13 +295,16 @@ class TabularQLearner(TabularRLAgent):
                 max_r = episode_r
                 # print("New all time high reward:", episode_r)
                 tq.set_postfix_str(
-                    f"States: {len(self.q_table.keys())}. Goals: {done_times}. Eps: {self._c_eps:.3f}. MaxR: {max_r}")
+                    f"States: {len(self.q_table.keys())}. Goals: {done_times}. Eps: {self._c_eps:.3f}. MaxR: {max_r}"
+                )
             if (n + 1) % 100 == 0:
                 tq.set_postfix_str(
-                    f"States: {len(self.q_table.keys())}. Goals: {done_times}. Eps: {self._c_eps:.3f}. MaxR: {max_r}")
+                    f"States: {len(self.q_table.keys())}. Goals: {done_times}. Eps: {self._c_eps:.3f}. MaxR: {max_r}"
+                )
             if (n + 1) % 1000 == 0:
                 tq.set_postfix_str(
-                    f"States: {len(self.q_table.keys())}. Goals: {done_times}. Eps: {self._c_eps:.3f}. MaxR: {max_r}")
+                    f"States: {len(self.q_table.keys())}. Goals: {done_times}. Eps: {self._c_eps:.3f}. MaxR: {max_r}"
+                )
                 if done_times <= 10:
                     patience += 1
                     if patience >= self.patience:
@@ -297,14 +318,18 @@ class TabularQLearner(TabularRLAgent):
                 done_times = 0
             self.goal_literals_achieved.clear()
-        print(f"number of unique states found during training:{self.get_number_of_unique_states()}")
+        print(
+            f"number of unique states found during training:{self.get_number_of_unique_states()}"
+        )
         print("finish learning and saving status")
         self.save_models_to_files()
     def exploit(self, number_of_steps=20):
         observation, info = self.env.reset()
         for step_number in range(number_of_steps):
-            tabular_state = TabularState.gen_tabular_state(environment=self.env, observation=observation)
+            tabular_state = TabularState.gen_tabular_state(
+                environment=self.env, observation=observation
+            )
             action = self.policy(state=tabular_state)
             observation, reward, terminated, truncated, _ = self.env.step(action)
             done = terminated | truncated
@@ -314,16 +339,18 @@ class TabularQLearner(TabularRLAgent):
     def get_actions_probabilities(self, observation):
         obs, agent_pos = observation
-        direction = obs['direction']
+        direction = obs["direction"]
         x, y = agent_pos
-        tabular_state = TabularState(agent_x_position=x, agent_y_position=y, agent_direction=direction)
+        tabular_state = TabularState(
+            agent_x_position=x, agent_y_position=y, agent_direction=direction
+        )
         return softmax(self.get_all_q_values(tabular_state))
     def get_q_of_specific_cell(self, cell_key):
         cell_q_table = {}
         for i in range(4):
-            key = cell_key + ':' + str(i)
+            key = cell_key + ":" + str(i)
             if key in self.q_table:
                 cell_q_table[key] = self.q_table[key]
         return cell_q_table
@@ -331,15 +358,14 @@ class TabularQLearner(TabularRLAgent):
     def get_all_cells(self):
         cells = set()
         for key in self.q_table.keys():
-            cell = key.split(':')[0]
+            cell = key.split(":")[0]
             cells.add(cell)
         return list(cells)
     def _save_conf_file(self):
         conf = {
-            'learned_episodes': self._learned_episodes,
-            'states_counter': self.states_counter
+            "learned_episodes": self._learned_episodes,
+            "states_counter": self.states_counter,
         }
         with open(self._conf_file, "wb") as f:
             dill.dump(conf, f)
@@ -347,11 +373,20 @@ class TabularQLearner(TabularRLAgent):
     def save_models_to_files(self):
         self.save_q_table(path=self.model_file_path)
         self._save_conf_file()
     def simplify_observation(self, observation):
-        return [(obs['direction'], agent_pos_x, agent_pos_y, action) for ((obs, (agent_pos_x, agent_pos_y)), action) in observation] # list of tuples, each tuple the sample
-    def generate_observation(self, action_selection_method: MethodType, random_optimalism, save_fig=False, fig_path: str=None, env_prop=None):
+        return [
+            (obs["direction"], agent_pos_x, agent_pos_y, action)
+            for ((obs, (agent_pos_x, agent_pos_y)), action) in observation
+        ]  # list of tuples, each tuple the sample
+    def generate_observation(
+        self,
+        action_selection_method: MethodType,
+        random_optimalism,
+        save_fig=False,
+        fig_path: str = None,
+    ):
         """
         Generate a single observation given a list of agents
@@ -363,26 +398,32 @@ class TabularQLearner(TabularRLAgent):
             list: A list of state-action pairs representing the generated observation.
         Notes:
-            The function randomly selects an agent from the given list and generates a sequence of state-action pairs
-            based on the Q-table of the selected agent. The action selection is stochastic, where each action is
+            The function randomly selects an agent from the given list and generates a sequence of state-action pairs
+            based on the Q-table of the selected agent. The action selection is stochastic, where each action is
             selected based on the probability distribution defined by the Q-values in the Q-table.
-            The generated sequence terminates when a maximum number of steps is reached or when the environment
+            The generated sequence terminates when a maximum number of steps is reached or when the environment
             episode terminates.
         """
         if save_fig == False:
-            assert fig_path == None, "You can't specify a vid path when you don't even save the figure."
+            assert (
+                fig_path == None
+            ), "You can't specify a vid path when you don't even save the figure."
         else:
-            assert fig_path != None, "You must specify a vid path when you save the figure."
+            assert (
+                fig_path != None
+            ), "You must specify a vid path when you save the figure."
         obs, _ = self.env.reset()
         MAX_STEPS = 32
         done = False
         steps = []
         for step_index in range(MAX_STEPS):
             x, y = self.env.unwrapped.agent_pos
-            str_state = "({},{}):{}".format(x, y, obs['direction'])
+            str_state = "({},{}):{}".format(x, y, obs["direction"])
             relevant_actions_idx = 3
-            action_probs = self.q_table[str_state][:relevant_actions_idx] / np.sum(self.q_table[str_state][:relevant_actions_idx])  # Normalize probabilities
+            action_probs = self.q_table[str_state][:relevant_actions_idx] / np.sum(
+                self.q_table[str_state][:relevant_actions_idx]
+            )  # Normalize probabilities
             if step_index == 0 and random_optimalism:
                 # print("in 1st step in generating plan and got random optimalism.")
                 std_dev = np.std(action_probs)
@@ -398,7 +439,8 @@ class TabularQLearner(TabularRLAgent):
                         assert reward >= 0
                         action = 2
                         step_index += 1
-                else: action = action_selection_method(action_probs)
+                else:
+                    action = action_selection_method(action_probs)
             else:
                 action = action_selection_method(action_probs)
             steps.append(((obs, self.env.unwrapped.agent_pos), action))
@@ -408,16 +450,26 @@ class TabularQLearner(TabularRLAgent):
             if done:
                 break
-        #assert len(steps) >= 2
+        # assert len(steps) >= 2
         if save_fig:
             sequence = [pos for ((state, pos), action) in steps]
-            #print(f"sequence to {self.problem_name} is:\n\t{steps}\ngenerating image at {img_path}.")
+            # print(f"sequence to {self.problem_name} is:\n\t{steps}\ngenerating image at {img_path}.")
             print(f"generating sequence image at {fig_path}.")
-            env_prop.create_sequence_image(sequence, fig_path, self.problem_name) # TODO change that assumption, cannot assume this is minigrid env
+            self.env_prop.create_sequence_image(
+                sequence, fig_path, self.problem_name
+            )  # TODO change that assumption, cannot assume this is minigrid env
         return steps
-    def generate_partial_observation(self, action_selection_method: MethodType, percentage: float, save_fig = False, is_consecutive = True, random_optimalism=True, fig_path=None):
+    def generate_partial_observation(
+        self,
+        action_selection_method: MethodType,
+        percentage: float,
+        save_fig=False,
+        is_consecutive=True,
+        random_optimalism=True,
+        fig_path=None,
+    ):
         """
         Generate a single observation given a list of agents
@@ -429,25 +481,23 @@ class TabularQLearner(TabularRLAgent):
             list: A list of state-action pairs representing the generated observation.
         Notes:
-            The function randomly selects an agent from the given list and generates a sequence of state-action pairs
-            based on the Q-table of the selected agent. The action selection is stochastic, where each action is
+            The function randomly selects an agent from the given list and generates a sequence of state-action pairs
+            based on the Q-table of the selected agent. The action selection is stochastic, where each action is
             selected based on the probability distribution defined by the Q-values in the Q-table.
-            The generated sequence terminates when a maximum number of steps is reached or when the environment
+            The generated sequence terminates when a maximum number of steps is reached or when the environment
             episode terminates.
         """
-        steps = self.generate_observation(action_selection_method=action_selection_method, random_optimalism=random_optimalism, save_fig=save_fig, fig_path=fig_path) # steps are a full observation
-        result = random_subset_with_order(steps, (int)(percentage * len(steps)), is_consecutive)
+        steps = self.generate_observation(
+            action_selection_method=action_selection_method,
+            random_optimalism=random_optimalism,
+            save_fig=save_fig,
+            fig_path=fig_path,
+        )  # steps are a full observation
+        result = random_subset_with_order(
+            steps, (int)(percentage * len(steps)), is_consecutive
+        )
         if percentage >= 0.8:
             assert len(result) > 2
         return result
-if __name__ == "__main__":
-    from gr_libs.metrics.metrics import greedy_selection
-    import gr_envs # to register everything
-    agent = TabularQLearner(domain_name="minigrid", problem_name="MiniGrid-LavaCrossingS9N2-DynamicGoal-1x7-v0")
-    agent.generate_observation(greedy_selection, True, True)
-    # python experiments.py --recognizer graml --domain point_maze --task L5 --partial_obs_type continuing --point_maze_env obstacles --collect_stats --inference_same_seq_len

gr_libs/ml/tabular/tabular_rl_agent.py CHANGED Viewed

@@ -15,18 +15,19 @@ class TabularRLAgent(RLAgent):
     recommended as development goes on.
     """
-    def __init__(self,
-                 domain_name: str,
-                 problem_name: str,
-                 episodes: int,
-                 decaying_eps: bool,
-                 eps: float,
-                 alpha: float,
-                 decay: float,
-                 gamma: float,
-                 rand: Random,
-                 learning_rate
-                 ):
+    def __init__(
+        self,
+        domain_name: str,
+        problem_name: str,
+        episodes: int,
+        decaying_eps: bool,
+        eps: float,
+        alpha: float,
+        decay: float,
+        gamma: float,
+        rand: Random,
+        learning_rate,
+    ):
         super().__init__(
             episodes=episodes,
             decaying_eps=decaying_eps,
@@ -34,7 +35,7 @@ class TabularRLAgent(RLAgent):
             learning_rate=learning_rate,
             gamma=gamma,
             domain_name=domain_name,
-            problem_name=problem_name
+            problem_name=problem_name,
         )
         self.env = gym.make(id=problem_name)
         self.actions = self.env.unwrapped.actions
@@ -87,11 +88,11 @@ class TabularRLAgent(RLAgent):
     @abstractmethod
     def policy(self, state: State) -> Any:
         """The action for the specified state under the currently learned policy
-           (unlike agent_step, this does not update the policy using state as a sample.
-           Args:
-                state (Any): the state observation from the environment
-           Returns:
-                The action prescribed for that state
+        (unlike agent_step, this does not update the policy using state as a sample.
+        Args:
+             state (Any): the state observation from the environment
+        Returns:
+             The action prescribed for that state
         """
         pass
@@ -122,5 +123,5 @@ class TabularRLAgent(RLAgent):
         Returns:
             Any: [description]
-        """""
+        """ ""
         return self.softmax_policy(state)

gr_libs/ml/utils/__init__.py CHANGED Viewed

@@ -1,6 +1,12 @@
-#from .agent import *
+# from .agent import *
 from .env import make_env
-from .format import Vocabulary, preprocess_images, preprocess_texts, get_obss_preprocessor, random_subset_with_order
+from .format import (
+    Vocabulary,
+    preprocess_images,
+    preprocess_texts,
+    get_obss_preprocessor,
+    random_subset_with_order,
+)
 from .other import device, seed, synthesize
 from .storage import *
 from .math import softmax

gr_libs/ml/utils/format.py CHANGED Viewed

@@ -5,96 +5,104 @@ import gr_libs.ml
 import gymnasium as gym
 import random
-def get_obss_preprocessor(obs_space):
-	# Check if obs_space is an image space
-	if isinstance(obs_space, gym.spaces.Box):
-		obs_space = {"image": obs_space.shape}
-		def preprocess_obss(obss, device=None):
-			return ml.DictList({
-				"image": preprocess_images(obss, device=device)
-			})
+def get_obss_preprocessor(obs_space):
+    # Check if obs_space is an image space
+    if isinstance(obs_space, gym.spaces.Box):
+        obs_space = {"image": obs_space.shape}
-	# Check if it is a MiniGrid observation space
-	elif isinstance(obs_space, gym.spaces.Dict) and "image" in obs_space.spaces.keys():
-		obs_space = {"image": obs_space.spaces["image"].shape, "text": 100}
+        def preprocess_obss(obss, device=None):
+            return ml.DictList({"image": preprocess_images(obss, device=device)})
-		vocab = Vocabulary(obs_space["text"])
+    # Check if it is a MiniGrid observation space
+    elif isinstance(obs_space, gym.spaces.Dict) and "image" in obs_space.spaces.keys():
+        obs_space = {"image": obs_space.spaces["image"].shape, "text": 100}
-		def preprocess_obss(obss, device=None):
-			return ml.DictList({
-				"image": preprocess_images([obs["image"] for obs in obss], device=device),
-				"text": preprocess_texts([obs["mission"] for obs in obss], vocab, device=device)
-			})
+        vocab = Vocabulary(obs_space["text"])
-		preprocess_obss.vocab = vocab
+        def preprocess_obss(obss, device=None):
+            return ml.DictList(
+                {
+                    "image": preprocess_images(
+                        [obs["image"] for obs in obss], device=device
+                    ),
+                    "text": preprocess_texts(
+                        [obs["mission"] for obs in obss], vocab, device=device
+                    ),
+                }
+            )
-	# Check if it is a MiniGrid observation space
-	elif isinstance(obs_space, gym.spaces.Dict) and "observation" in obs_space.spaces.keys():
-		obs_space = {"observation": obs_space.spaces["observation"].shape}
+        preprocess_obss.vocab = vocab
-		def preprocess_obss(obss, device=None):
-			return ml.DictList({
-				"observation": preprocess_images(obss, device=device)
-		})
+    # Check if it is a MiniGrid observation space
+    elif (
+        isinstance(obs_space, gym.spaces.Dict)
+        and "observation" in obs_space.spaces.keys()
+    ):
+        obs_space = {"observation": obs_space.spaces["observation"].shape}
+        def preprocess_obss(obss, device=None):
+            return ml.DictList({"observation": preprocess_images(obss, device=device)})
-	else:
-		raise ValueError("Unknown observation space: " + str(obs_space))
+    else:
+        raise ValueError("Unknown observation space: " + str(obs_space))
-	return obs_space, preprocess_obss
+    return obs_space, preprocess_obss
 def preprocess_images(images, device=None):
-	# Bug of Pytorch: very slow if not first converted to numpy array
-	images = numpy.array(images)
-	return torch.tensor(images, device=device, dtype=torch.float)
-def random_subset_with_order(sequence, subset_size, is_consecutive = True):
-	if subset_size >= len(sequence):
-		return sequence
-	else:
-		if is_consecutive:
-			indices_to_select = [i for i in range(subset_size)]
-		else:
-			indices_to_select = sorted(random.sample(range(len(sequence)), subset_size))  # Randomly select indices to keep
-		return [sequence[i] for i in indices_to_select]  # Return the elements corresponding to the selected indices
+    # Bug of Pytorch: very slow if not first converted to numpy array
+    images = numpy.array(images)
+    return torch.tensor(images, device=device, dtype=torch.float)
+def random_subset_with_order(sequence, subset_size, is_consecutive=True):
+    if subset_size >= len(sequence):
+        return sequence
+    else:
+        if is_consecutive:
+            indices_to_select = [i for i in range(subset_size)]
+        else:
+            indices_to_select = sorted(
+                random.sample(range(len(sequence)), subset_size)
+            )  # Randomly select indices to keep
+        return [
+            sequence[i] for i in indices_to_select
+        ]  # Return the elements corresponding to the selected indices
 def preprocess_texts(texts, vocab, device=None):
-	var_indexed_texts = []
-	max_text_len = 0
+    var_indexed_texts = []
+    max_text_len = 0
-	for text in texts:
-		tokens = re.findall("([a-z]+)", text.lower())
-		var_indexed_text = numpy.array([vocab[token] for token in tokens])
-		var_indexed_texts.append(var_indexed_text)
-		max_text_len = max(len(var_indexed_text), max_text_len)
+    for text in texts:
+        tokens = re.findall("([a-z]+)", text.lower())
+        var_indexed_text = numpy.array([vocab[token] for token in tokens])
+        var_indexed_texts.append(var_indexed_text)
+        max_text_len = max(len(var_indexed_text), max_text_len)
-	indexed_texts = numpy.zeros((len(texts), max_text_len))
+    indexed_texts = numpy.zeros((len(texts), max_text_len))
-	for i, indexed_text in enumerate(var_indexed_texts):
-		indexed_texts[i, :len(indexed_text)] = indexed_text
+    for i, indexed_text in enumerate(var_indexed_texts):
+        indexed_texts[i, : len(indexed_text)] = indexed_text
-	return torch.tensor(indexed_texts, device=device, dtype=torch.long)
+    return torch.tensor(indexed_texts, device=device, dtype=torch.long)
 class Vocabulary:
-	"""A mapping from tokens to ids with a capacity of `max_size` words.
-	It can be saved in a `vocab.json` file."""
-	def __init__(self, max_size):
-		self.max_size = max_size
-		self.vocab = {}
-	def load_vocab(self, vocab):
-		self.vocab = vocab
-	def __getitem__(self, token):
-		if not token in self.vocab.keys():
-			if len(self.vocab) >= self.max_size:
-				raise ValueError("Maximum vocabulary capacity reached")
-			self.vocab[token] = len(self.vocab) + 1
-		return self.vocab[token]
+    """A mapping from tokens to ids with a capacity of `max_size` words.
+    It can be saved in a `vocab.json` file."""
+    def __init__(self, max_size):
+        self.max_size = max_size
+        self.vocab = {}
+    def load_vocab(self, vocab):
+        self.vocab = vocab
+    def __getitem__(self, token):
+        if not token in self.vocab.keys():
+            if len(self.vocab) >= self.max_size:
+                raise ValueError("Maximum vocabulary capacity reached")
+            self.vocab[token] = len(self.vocab) + 1
+        return self.vocab[token]

gr_libs/ml/utils/math.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import math
 from typing import Callable, Generator, List
 def softmax(values: List[float]) -> List[float]:
     """Computes softmax probabilities for an array of values
     TODO We should probably use numpy arrays here
@@ -10,4 +11,4 @@ def softmax(values: List[float]) -> List[float]:
     Returns:
         np.array: softmax probabilities
     """
-    return [(math.exp(q)) / sum([math.exp(_q) for _q in values]) for q in values]
+    return [(math.exp(q)) / sum([math.exp(_q) for _q in values]) for q in values]

gr_libs/ml/utils/other.py CHANGED Viewed

@@ -21,4 +21,4 @@ def synthesize(array):
     d["std"] = numpy.std(array)
     d["min"] = numpy.amin(array)
     d["max"] = numpy.amax(array)
-    return d
+    return d

gr-libs 0.1.7.post0__py3-none-any.whl → 0.1.8__py3-none-any.whl

gr-libs 0.1.7.post0py3-none-any.whl → 0.1.8py3-none-any.whl