PyPI - gymcts - Versions diffs - 1.0.0__py3-none-any.whl → 1.2.1__py3-none-any.whl - Mend

gymcts 1.0.0py3-none-any.whl → 1.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

gymcts/colorful_console_utils.py +26 -3
gymcts/{gymcts_deterministic_wrapper.py → gymcts_action_history_wrapper.py} +74 -4
gymcts/gymcts_agent.py +29 -69
gymcts/{gymcts_naive_wrapper.py → gymcts_deepcopy_wrapper.py} +60 -3
gymcts/gymcts_distributed_agent.py +299 -0
gymcts/gymcts_env_abc.py +61 -0
gymcts/gymcts_node.py +107 -44
gymcts/gymcts_tree_plotter.py +96 -0
gymcts/logger.py +1 -4
{gymcts-1.0.0.dist-info → gymcts-1.2.1.dist-info}/METADATA +54 -56
gymcts-1.2.1.dist-info/RECORD +15 -0
{gymcts-1.0.0.dist-info → gymcts-1.2.1.dist-info}/WHEEL +1 -1
gymcts/gymcts_gym_env.py +0 -28
gymcts-1.0.0.dist-info/RECORD +0 -13
{gymcts-1.0.0.dist-info → gymcts-1.2.1.dist-info/licenses}/LICENSE +0 -0
{gymcts-1.0.0.dist-info → gymcts-1.2.1.dist-info}/top_level.txt +0 -0

gymcts/colorful_console_utils.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from typing import Any
 import matplotlib.pyplot as plt
 import numpy as np
@@ -103,8 +105,19 @@ def wrap_with_color_codes(s: object, /, r: int | float, g: int | float, b: int |
            f"{CEND}"
-def wrap_evenly_spaced_color(s: str, n_of_item:int, n_classes:int, c_map="rainbow") -> str:
+def wrap_evenly_spaced_color(s: Any, n_of_item: int, n_classes: int, c_map="rainbow") -> str:
+    """
+    Wraps a string with a color scale (a matplotlib c_map) based on the n_of_item and n_classes.
+    This function is used to color code the available actions in the MCTS tree visualisation.
+    The children of the MCTS tree are colored based on their action for a clearer visualisation.
+    :param s: the string (or object) to be wrapped. objects are converted to string (using the __str__ function).
+    :param n_of_item: the index of the item to be colored. In a mcts tree, this is the (parent-)action of the node.
+    :param n_classes: the number of classes (or items) to be colored. In a mcts tree, this is the number of available actions.
+    :param c_map: the colormap to be used (default is 'rainbow').
+                  The colormap can be any matplotlib colormap, e.g. 'viridis', 'plasma', 'inferno', 'magma', 'cividis'.
+    :return: a string that contains the color-codes (prefix and suffix) and the string s in between.
+    """
     if s is None or n_of_item is None or n_classes is None:
         return s
@@ -117,7 +130,17 @@ def wrap_evenly_spaced_color(s: str, n_of_item:int, n_classes:int, c_map="rainbo
     return f"{color_asni}{s}{CEND}"
-def wrap_with_color_scale(s: str, value: float, min_val:float, max_val:float, c_map=None) -> str:
+def wrap_with_color_scale(s: str, value: float, min_val: float, max_val: float, c_map=None) -> str:
+    """
+    Wraps a string with a color scale (a matplotlib c_map) based on the value, min_val, and max_val.
+    :param s: the string to be wrapped
+    :param value: the value to be mapped to a color
+    :param min_val: the minimum value of the scale
+    :param max_val: the maximum value of the scale
+    :param c_map: the colormap to be used (default is 'rainbow')
+    :return:
+    """
     if s is None or min_val is None or max_val is None or min_val >= max_val:
         return s

gymcts/{gymcts_deterministic_wrapper.py → gymcts_action_history_wrapper.py} RENAMED Viewed

@@ -1,18 +1,32 @@
 import random
-import copy
 import numpy as np
-from typing import TypeVar, Any, SupportsFloat, Callable
+from typing import Any, SupportsFloat, Callable
 import gymnasium as gym
 from gymnasium.core import WrapperActType, WrapperObsType
 from gymnasium.wrappers import RecordEpisodeStatistics
-from gymcts.gymcts_gym_env import SoloMCTSGymEnv
+from gymcts.gymcts_env_abc import GymctsABC
 from gymcts.logger import log
-class DeterministicSoloMCTSGymEnvWrapper(SoloMCTSGymEnv, gym.Wrapper):
+class ActionHistoryMCTSGymEnvWrapper(GymctsABC, gym.Wrapper):
+    """
+    A wrapper for gym environments that implements the GymctsABC interface.
+    It uses the action history as state representation.
+    Please note that this is not the most efficient way to implement the state representation.
+    It is supposed to be used to see if your use-case works well with the MCTS algorithm.
+    If it does, you can consider implementing all GymctsABC methods in a more efficient way.
+    The action history is a list of actions taken in the environment.
+    The state is represented as a list of actions taken in the environment.
+    The state is used to restore the environment using the load_state method.
+    It is supposed to be used to see if your use-case works well with the MCTS algorithm.
+    If it does, you can consider implementing all GymctsABC methods in a more efficient way.
+    """
+    # helper attributes for the wrapper
     _terminal_flag: bool = False
     _last_reward: SupportsFloat = 0
     _step_tuple: tuple[WrapperObsType, SupportsFloat, bool, bool, dict[str, Any]] = None
@@ -25,6 +39,17 @@ class DeterministicSoloMCTSGymEnvWrapper(SoloMCTSGymEnv, gym.Wrapper):
             action_mask_fn: str | Callable[[gym.Env], np.ndarray] | None = None,
             buffer_length: int = 100,
     ):
+        """
+        A wrapper for gym environments that implements the GymctsABC interface.
+        It uses the action history as state representation.
+        Please note that this is not the most efficient way to implement the state representation.
+        It is supposed to be used to see if your use-case works well with the MCTS algorithm.
+        If it does, you can consider implementing all GymctsABC methods in a more efficient way.
+        :param env: the environment to wrap
+        :param action_mask_fn: a function that takes the environment as input and returns a mask of valid actions
+        :param buffer_length: the length of the buffer for recording episodes for determining their rollout returns
+        """
         # wrap with RecordEpisodeStatistics if it is not already wrapped
         env = RecordEpisodeStatistics(env, buffer_length=buffer_length)
@@ -48,6 +73,17 @@ class DeterministicSoloMCTSGymEnvWrapper(SoloMCTSGymEnv, gym.Wrapper):
                 self._action_mask_fn = action_mask_fn
     def load_state(self, state: list[int]) -> None:
+        """
+        Loads the state of the environment. The state is a list of actions taken in the environment.
+        The environment is reset and all actions in the state are performed in order to restore the environment to the
+        same state.
+        This works only for deterministic environments!
+        :param state: the state to load
+        :return: None
+        """
         self.env.reset()
         self._wrapper_action_history = []
@@ -56,15 +92,30 @@ class DeterministicSoloMCTSGymEnvWrapper(SoloMCTSGymEnv, gym.Wrapper):
             self._wrapper_action_history.append(action)
     def is_terminal(self) -> bool:
+        """
+        Returns True if the environment is in a terminal state, False otherwise.
+        :return:
+        """
         if not len(self.get_valid_actions()):
             return True
         else:
             return self._terminal_flag
     def action_masks(self) -> np.ndarray | None:
+        """
+        Returns the action masks for the environment. If the action_mask_fn is not set, it returns None.
+        :return:
+        """
         return self._action_mask_fn(self.env) if self._action_mask_fn is not None else None
     def get_valid_actions(self) -> list[int]:
+        """
+        Returns a list of valid actions for the current state of the environment.
+        :return: a list of valid actions
+        """
         if self._action_mask_fn is None:
             action_space: gym.spaces.Discrete = self.env.action_space  # Type hinting
             return list(range(action_space.n))
@@ -72,6 +123,12 @@ class DeterministicSoloMCTSGymEnvWrapper(SoloMCTSGymEnv, gym.Wrapper):
             return [i for i, mask in enumerate(self.action_masks()) if mask]
     def rollout(self) -> float:
+        """
+        Performs a random rollout from the current state of the environment and returns the return (sum of rewards)
+        of the rollout.
+        :return: the return of the rollout
+        """
         log.debug("performing rollout")
         # random rollout
         # perform random valid action util terminal
@@ -92,11 +149,24 @@ class DeterministicSoloMCTSGymEnvWrapper(SoloMCTSGymEnv, gym.Wrapper):
         return episode_return
     def get_state(self) -> list[int]:
+        """
+        Returns the current state of the environment. The state is a list of actions taken in the environment,
+        namely all action that have been taken in the environment so far (since the last reset).
+        :return: a list of actions taken in the environment
+        """
         return self._wrapper_action_history.copy()
     def step(
             self, action: WrapperActType
     ) -> tuple[WrapperObsType, SupportsFloat, bool, bool, dict[str, Any]]:
+        """
+        Performs a step in the environment. It adds the action to the action history and updates the terminal flag.
+        :param action: action to perform in the environment
+        :return: the step tuple of the environment (obs, reward, terminated, truncated, info)
+        """
         step_tuple = self.env.step(action)
         self._wrapper_action_history.append(action)
         obs, reward, terminated, truncated, info = step_tuple

gymcts/gymcts_agent.py CHANGED Viewed

@@ -1,29 +1,31 @@
 import copy
+import random
 import gymnasium as gym
 from typing import TypeVar, Any, SupportsFloat, Callable
-from gymcts.gymcts_gym_env import SoloMCTSGymEnv
-from gymcts.gymcts_naive_wrapper import NaiveSoloMCTSGymEnvWrapper
-from gymcts.gymcts_node import SoloMCTSNode
+from gymcts.gymcts_env_abc import GymctsABC
+from gymcts.gymcts_deepcopy_wrapper import DeepCopyMCTSGymEnvWrapper
+from gymcts.gymcts_node import GymctsNode
+from gymcts.gymcts_tree_plotter import _generate_mcts_tree
 from gymcts.logger import log
 TSoloMCTSNode = TypeVar("TSoloMCTSNode", bound="SoloMCTSNode")
-class SoloMCTSAgent:
+class GymctsAgent:
     render_tree_after_step: bool = False
     render_tree_max_depth: int = 2
     exclude_unvisited_nodes_from_render: bool = False
     number_of_simulations_per_step: int = 25
-    env: SoloMCTSGymEnv
-    search_root_node: SoloMCTSNode  # NOTE: this is not the same as the root of the tree!
+    env: GymctsABC
+    search_root_node: GymctsNode  # NOTE: this is not the same as the root of the tree!
     clear_mcts_tree_after_step: bool
     def __init__(self,
-                 env: SoloMCTSGymEnv,
+                 env: GymctsABC,
                  clear_mcts_tree_after_step: bool = True,
                  render_tree_after_step: bool = False,
                  render_tree_max_depth: int = 2,
@@ -43,13 +45,13 @@ class SoloMCTSAgent:
         self.env = env
         self.clear_mcts_tree_after_step = clear_mcts_tree_after_step
-        self.search_root_node = SoloMCTSNode(
+        self.search_root_node = GymctsNode(
             action=None,
             parent=None,
             env_reference=env,
         )
-    def navigate_to_leaf(self, from_node: SoloMCTSNode) -> SoloMCTSNode:
+    def navigate_to_leaf(self, from_node: GymctsNode) -> GymctsNode:
         log.debug(f"Navigate to leaf. from_node: {from_node}")
         if from_node.terminal:
             log.debug("Node is terminal. Returning from_node")
@@ -62,11 +64,14 @@ class SoloMCTSAgent:
         # NAVIGATION STRATEGY
         # select child with highest UCB score
         while not temp_node.is_leaf():
-            temp_node = max(temp_node.children.values(), key=lambda child: child.ucb_score())
+            children = list(temp_node.children.values())
+            max_ucb_score = max(child.ucb_score() for child in children)
+            best_children = [child for child in children if child.ucb_score() == max_ucb_score]
+            temp_node = random.choice(best_children)
         log.debug(f"Selected leaf node: {temp_node}")
         return temp_node
-    def expand_node(self, node: SoloMCTSNode) -> None:
+    def expand_node(self, node: GymctsNode) -> None:
         log.debug(f"expanding node: {node}")
         # EXPANSION STRATEGY
         # expand all children
@@ -78,7 +83,7 @@ class SoloMCTSAgent:
             self._load_state(node)
             obs, reward, terminal, truncated, _ = self.env.step(action)
-            child_dict[action] = SoloMCTSNode(
+            child_dict[action] = GymctsNode(
                 action=action,
                 parent=node,
                 env_reference=self.env,
@@ -110,14 +115,14 @@ class SoloMCTSAgent:
         # restore state of current node
         return action_list
-    def _load_state(self, node: SoloMCTSNode) -> None:
-        if isinstance(self.env, NaiveSoloMCTSGymEnvWrapper):
+    def _load_state(self, node: GymctsNode) -> None:
+        if isinstance(self.env, DeepCopyMCTSGymEnvWrapper):
             self.env = copy.deepcopy(node.state)
         else:
             self.env.load_state(node.state)
-    def perform_mcts_step(self, search_start_node: SoloMCTSNode = None, num_simulations: int = None,
-                          render_tree_after_step: bool = None) -> tuple[int, SoloMCTSNode]:
+    def perform_mcts_step(self, search_start_node: GymctsNode = None, num_simulations: int = None,
+                          render_tree_after_step: bool = None) -> tuple[int, GymctsNode]:
         if render_tree_after_step is None:
             render_tree_after_step = self.render_tree_after_step
@@ -149,7 +154,7 @@ class SoloMCTSAgent:
         return action, next_node
-    def vanilla_mcts_search(self, search_start_node: SoloMCTSNode = None, num_simulations=10) -> int:
+    def vanilla_mcts_search(self, search_start_node: GymctsNode = None, num_simulations=10) -> int:
         log.debug(f"performing one MCTS search step with {num_simulations} simulations")
         if search_start_node is None:
             search_start_node = self.search_root_node
@@ -178,7 +183,7 @@ class SoloMCTSAgent:
         return search_start_node.get_best_action()
-    def show_mcts_tree(self, start_node: SoloMCTSNode = None, tree_max_depth: int = None) -> None:
+    def show_mcts_tree(self, start_node: GymctsNode = None, tree_max_depth: int = None) -> None:
         if start_node is None:
             start_node = self.search_root_node
@@ -187,13 +192,17 @@ class SoloMCTSAgent:
             tree_max_depth = self.render_tree_max_depth
         print(start_node.__str__(colored=True, action_space_n=self.env.action_space.n))
-        for line in self._generate_mcts_tree(start_node=start_node, depth=tree_max_depth):
+        for line in _generate_mcts_tree(
+                start_node=start_node,
+                depth=tree_max_depth,
+                action_space_n=self.env.action_space.n,
+        ):
             print(line)
     def show_mcts_tree_from_root(self, tree_max_depth: int = None) -> None:
         self.show_mcts_tree(start_node=self.search_root_node.get_root(), tree_max_depth=tree_max_depth)
-    def backpropagation(self, node: SoloMCTSNode, episode_return: float) -> None:
+    def backpropagation(self, node: GymctsNode, episode_return: float) -> None:
         log.debug(f"performing backpropagation from leaf node: {node}")
         while not node.is_root():
             # node.mean_value = ((node.mean_value * node.visit_count) + episode_return) / (node.visit_count + 1)
@@ -209,53 +218,4 @@ class SoloMCTSAgent:
         node.max_value = max(node.max_value, episode_return)
         node.min_value = min(node.min_value, episode_return)
-    def _generate_mcts_tree(self, start_node: SoloMCTSNode = None, prefix: str = None, depth: int = None) -> list[str]:
-        if prefix is None:
-            prefix = ""
-        import gymcts.colorful_console_utils as ccu
-        if start_node is None:
-            start_node = self.search_root_node
-        # prefix components:
-        space = '    '
-        branch = '│   '
-        # pointers:
-        tee = '├── '
-        last = '└── '
-        contents = start_node.children.values() if start_node.children is not None else []
-        if self.exclude_unvisited_nodes_from_render:
-            contents = [node for node in contents if node.visit_count > 0]
-        # contents each get pointers that are ├── with a final └── :
-        # pointers = [tee] * (len(contents) - 1) + [last]
-        pointers = [tee for _ in range(len(contents) - 1)] + [last]
-        for pointer, current_node in zip(pointers, contents):
-            n_item = current_node.parent.action if current_node.parent is not None else 0
-            n_classes = self.env.action_space.n
-            pointer = ccu.wrap_evenly_spaced_color(
-                s=pointer,
-                n_of_item=n_item,
-                n_classes=n_classes,
-            )
-            yield prefix + pointer + f"{current_node.__str__(colored=True, action_space_n=n_classes)}"
-            if current_node.children and len(current_node.children):  # extend the prefix and recurse:
-                # extension = branch if pointer == tee else space
-                extension = branch if tee in pointer else space
-                # i.e. space because last, └── , above so no more |
-                extension = ccu.wrap_evenly_spaced_color(
-                    s=extension,
-                    n_of_item=n_item,
-                    n_classes=n_classes,
-                )
-                if depth is not None and depth <= 0:
-                    continue
-                yield from self._generate_mcts_tree(
-                    current_node,
-                    prefix=prefix + extension,
-                    depth=depth - 1 if depth is not None else None
-                )

gymcts/{gymcts_naive_wrapper.py → gymcts_deepcopy_wrapper.py} RENAMED Viewed

@@ -7,14 +7,21 @@ import gymnasium as gym
 from gymnasium.core import WrapperActType, WrapperObsType
 from gymnasium.wrappers import RecordEpisodeStatistics
-from gymcts.gymcts_gym_env import SoloMCTSGymEnv
+from gymcts.gymcts_env_abc import GymctsABC
 from gymcts.logger import log
-class NaiveSoloMCTSGymEnvWrapper(SoloMCTSGymEnv, gym.Wrapper):
+class DeepCopyMCTSGymEnvWrapper(GymctsABC, gym.Wrapper):
+    """
+    A wrapper for gym environments that implements the GymctsABC interface.
+    It uses deepcopys as state representation.
+    Please note that this is not the most efficient way to implement the state representation.
+    It is supposed to be used to see if your use-case works well with the MCTS algorithm.
+    If it does, you can consider implementing all GymctsABC methods in a more efficient way.
+    """
+    # helper attributes for the wrapper
     _terminal_flag:bool = False
     _last_reward: SupportsFloat = 0
     _step_tuple: tuple[WrapperObsType, SupportsFloat, bool, bool, dict[str, Any]] = None
@@ -22,9 +29,21 @@ class NaiveSoloMCTSGymEnvWrapper(SoloMCTSGymEnv, gym.Wrapper):
     _action_mask_fn: Callable[[gym.Env], np.ndarray] | None = None
     def is_terminal(self) -> bool:
+        """
+        Returns True if the environment is in a terminal state, False otherwise.
+        :return: True if the environment is in a terminal state, False otherwise.
+        """
         return self._terminal_flag
     def load_state(self, state: Any) -> None:
+        """
+        The load_state method is not implemented. The state is loaded by replacing the env with the 'state' (the copy
+        provided my 'get_state'). 'self' in a method cannot be replaced with another object (as far as i know).
+        :param state: a deepcopy of the environment
+        :return: None
+        """
         msg = """
         The NaiveSoloMCTSGymEnvWrapper uses deepcopies of the entire env as the state.
         The loading of the state is done by replacing the env with the 'state' (the copy provided my 'get_state').
@@ -39,6 +58,16 @@ class NaiveSoloMCTSGymEnvWrapper(SoloMCTSGymEnv, gym.Wrapper):
                  buffer_length: int = 100,
                  record_video: bool = False,
                  ):
+        """
+        The constructor of the wrapper. It wraps the environment with RecordEpisodeStatistics and checks if the action
+        space is discrete. It also checks if the action_mask_fn is a string or a callable. If it is a string, it tries to
+        find the method in the environment. If it is a callable, it assigns it to the _action_mask_fn attribute.
+        :param env: the environment to wrap
+        :param action_mask_fn:
+        :param buffer_length:
+        :param record_video:
+        """
         # wrap with RecordEpisodeStatistics if it is not already wrapped
         env = RecordEpisodeStatistics(env, buffer_length=buffer_length)
@@ -61,6 +90,10 @@ class NaiveSoloMCTSGymEnvWrapper(SoloMCTSGymEnv, gym.Wrapper):
                 self._action_mask_fn = action_mask_fn
     def get_state(self) -> Any:
+        """
+        Returns the current state of the environment as a deepcopy of the environment.
+        :return: a deepcopy of the environment
+        """
         log.debug("getting state")
         original_state = self
         copied_state = copy.deepcopy(self)
@@ -71,9 +104,19 @@ class NaiveSoloMCTSGymEnvWrapper(SoloMCTSGymEnv, gym.Wrapper):
         return copied_state
     def action_masks(self) -> np.ndarray | None:
+        """
+        Returns the action masks for the environment. If the action_mask_fn is not set, it returns None.
+        :return: the action masks for the environment
+        """
         return self._action_mask_fn(self.env) if self._action_mask_fn is not None else None
     def get_valid_actions(self) -> list[int]:
+        """
+        Returns a list of valid actions for the current state of the environment.
+        This used to obtain potential actions/subsequent sates for the MCTS tree.
+        :return: the list of valid actions
+        """
         if self._action_mask_fn is None:
             action_space: gym.spaces.Discrete = self.env.action_space  # Type hinting
             return list(range(action_space.n))
@@ -83,6 +126,14 @@ class NaiveSoloMCTSGymEnvWrapper(SoloMCTSGymEnv, gym.Wrapper):
     def step(
             self, action: WrapperActType
     ) -> tuple[WrapperObsType, SupportsFloat, bool, bool, dict[str, Any]]:
+        """
+        Performs a step in the environment.
+        This method is used to update the wrapper with the new state and the new action, to realize the terminal state
+        functionality.
+        :param action: action to perform in the environment
+        :return: the step tuple of the environment (obs, reward, terminated, truncated, info)
+        """
         step_tuple = self.env.step(action)
         obs, reward, terminated, truncated, info = step_tuple
@@ -93,6 +144,12 @@ class NaiveSoloMCTSGymEnvWrapper(SoloMCTSGymEnv, gym.Wrapper):
     def rollout(self) -> float:
+        """
+        Performs a rollout from the current state of the environment and returns the return (sum of rewards) of the
+        rollout.
+        :return: the return of the rollout
+        """
         log.debug("performing rollout")
         # random rollout
         # perform random valid action util terminal

gymcts 1.0.0__py3-none-any.whl → 1.2.1__py3-none-any.whl

gymcts 1.0.0py3-none-any.whl → 1.2.1py3-none-any.whl