PyPI - gymcts - Versions diffs - 1.2.0__py3-none-any.whl → 1.2.1__py3-none-any.whl - Mend

gymcts 1.2.0py3-none-any.whl → 1.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

gymcts/colorful_console_utils.py +22 -0
gymcts/gymcts_action_history_wrapper.py +72 -2
gymcts/gymcts_agent.py +5 -1
gymcts/gymcts_deepcopy_wrapper.py +59 -2
gymcts/gymcts_distributed_agent.py +30 -12
gymcts/gymcts_env_abc.py +33 -0
gymcts/gymcts_node.py +85 -8
gymcts/gymcts_tree_plotter.py +22 -1
gymcts/logger.py +1 -4
{gymcts-1.2.0.dist-info → gymcts-1.2.1.dist-info}/METADATA +31 -35
gymcts-1.2.1.dist-info/RECORD +15 -0
{gymcts-1.2.0.dist-info → gymcts-1.2.1.dist-info}/WHEEL +1 -1
gymcts-1.2.0.dist-info/RECORD +0 -15
{gymcts-1.2.0.dist-info → gymcts-1.2.1.dist-info}/licenses/LICENSE +0 -0
{gymcts-1.2.0.dist-info → gymcts-1.2.1.dist-info}/top_level.txt +0 -0

gymcts/colorful_console_utils.py CHANGED Viewed

@@ -106,6 +106,18 @@ def wrap_with_color_codes(s: object, /, r: int | float, g: int | float, b: int |
 def wrap_evenly_spaced_color(s: Any, n_of_item: int, n_classes: int, c_map="rainbow") -> str:
+    """
+    Wraps a string with a color scale (a matplotlib c_map) based on the n_of_item and n_classes.
+    This function is used to color code the available actions in the MCTS tree visualisation.
+    The children of the MCTS tree are colored based on their action for a clearer visualisation.
+    :param s: the string (or object) to be wrapped. objects are converted to string (using the __str__ function).
+    :param n_of_item: the index of the item to be colored. In a mcts tree, this is the (parent-)action of the node.
+    :param n_classes: the number of classes (or items) to be colored. In a mcts tree, this is the number of available actions.
+    :param c_map: the colormap to be used (default is 'rainbow').
+                  The colormap can be any matplotlib colormap, e.g. 'viridis', 'plasma', 'inferno', 'magma', 'cividis'.
+    :return: a string that contains the color-codes (prefix and suffix) and the string s in between.
+    """
     if s is None or n_of_item is None or n_classes is None:
         return s
@@ -119,6 +131,16 @@ def wrap_evenly_spaced_color(s: Any, n_of_item: int, n_classes: int, c_map="rain
 def wrap_with_color_scale(s: str, value: float, min_val: float, max_val: float, c_map=None) -> str:
+    """
+    Wraps a string with a color scale (a matplotlib c_map) based on the value, min_val, and max_val.
+    :param s: the string to be wrapped
+    :param value: the value to be mapped to a color
+    :param min_val: the minimum value of the scale
+    :param max_val: the maximum value of the scale
+    :param c_map: the colormap to be used (default is 'rainbow')
+    :return:
+    """
     if s is None or min_val is None or max_val is None or min_val >= max_val:
         return s

gymcts/gymcts_action_history_wrapper.py CHANGED Viewed

@@ -1,8 +1,7 @@
 import random
-import copy
 import numpy as np
-from typing import TypeVar, Any, SupportsFloat, Callable
+from typing import Any, SupportsFloat, Callable
 import gymnasium as gym
 from gymnasium.core import WrapperActType, WrapperObsType
 from gymnasium.wrappers import RecordEpisodeStatistics
@@ -13,6 +12,21 @@ from gymcts.logger import log
 class ActionHistoryMCTSGymEnvWrapper(GymctsABC, gym.Wrapper):
+    """
+    A wrapper for gym environments that implements the GymctsABC interface.
+    It uses the action history as state representation.
+    Please note that this is not the most efficient way to implement the state representation.
+    It is supposed to be used to see if your use-case works well with the MCTS algorithm.
+    If it does, you can consider implementing all GymctsABC methods in a more efficient way.
+    The action history is a list of actions taken in the environment.
+    The state is represented as a list of actions taken in the environment.
+    The state is used to restore the environment using the load_state method.
+    It is supposed to be used to see if your use-case works well with the MCTS algorithm.
+    If it does, you can consider implementing all GymctsABC methods in a more efficient way.
+    """
+    # helper attributes for the wrapper
     _terminal_flag: bool = False
     _last_reward: SupportsFloat = 0
     _step_tuple: tuple[WrapperObsType, SupportsFloat, bool, bool, dict[str, Any]] = None
@@ -25,6 +39,17 @@ class ActionHistoryMCTSGymEnvWrapper(GymctsABC, gym.Wrapper):
             action_mask_fn: str | Callable[[gym.Env], np.ndarray] | None = None,
             buffer_length: int = 100,
     ):
+        """
+        A wrapper for gym environments that implements the GymctsABC interface.
+        It uses the action history as state representation.
+        Please note that this is not the most efficient way to implement the state representation.
+        It is supposed to be used to see if your use-case works well with the MCTS algorithm.
+        If it does, you can consider implementing all GymctsABC methods in a more efficient way.
+        :param env: the environment to wrap
+        :param action_mask_fn: a function that takes the environment as input and returns a mask of valid actions
+        :param buffer_length: the length of the buffer for recording episodes for determining their rollout returns
+        """
         # wrap with RecordEpisodeStatistics if it is not already wrapped
         env = RecordEpisodeStatistics(env, buffer_length=buffer_length)
@@ -48,6 +73,17 @@ class ActionHistoryMCTSGymEnvWrapper(GymctsABC, gym.Wrapper):
                 self._action_mask_fn = action_mask_fn
     def load_state(self, state: list[int]) -> None:
+        """
+        Loads the state of the environment. The state is a list of actions taken in the environment.
+        The environment is reset and all actions in the state are performed in order to restore the environment to the
+        same state.
+        This works only for deterministic environments!
+        :param state: the state to load
+        :return: None
+        """
         self.env.reset()
         self._wrapper_action_history = []
@@ -56,15 +92,30 @@ class ActionHistoryMCTSGymEnvWrapper(GymctsABC, gym.Wrapper):
             self._wrapper_action_history.append(action)
     def is_terminal(self) -> bool:
+        """
+        Returns True if the environment is in a terminal state, False otherwise.
+        :return:
+        """
         if not len(self.get_valid_actions()):
             return True
         else:
             return self._terminal_flag
     def action_masks(self) -> np.ndarray | None:
+        """
+        Returns the action masks for the environment. If the action_mask_fn is not set, it returns None.
+        :return:
+        """
         return self._action_mask_fn(self.env) if self._action_mask_fn is not None else None
     def get_valid_actions(self) -> list[int]:
+        """
+        Returns a list of valid actions for the current state of the environment.
+        :return: a list of valid actions
+        """
         if self._action_mask_fn is None:
             action_space: gym.spaces.Discrete = self.env.action_space  # Type hinting
             return list(range(action_space.n))
@@ -72,6 +123,12 @@ class ActionHistoryMCTSGymEnvWrapper(GymctsABC, gym.Wrapper):
             return [i for i, mask in enumerate(self.action_masks()) if mask]
     def rollout(self) -> float:
+        """
+        Performs a random rollout from the current state of the environment and returns the return (sum of rewards)
+        of the rollout.
+        :return: the return of the rollout
+        """
         log.debug("performing rollout")
         # random rollout
         # perform random valid action util terminal
@@ -92,11 +149,24 @@ class ActionHistoryMCTSGymEnvWrapper(GymctsABC, gym.Wrapper):
         return episode_return
     def get_state(self) -> list[int]:
+        """
+        Returns the current state of the environment. The state is a list of actions taken in the environment,
+        namely all action that have been taken in the environment so far (since the last reset).
+        :return: a list of actions taken in the environment
+        """
         return self._wrapper_action_history.copy()
     def step(
             self, action: WrapperActType
     ) -> tuple[WrapperObsType, SupportsFloat, bool, bool, dict[str, Any]]:
+        """
+        Performs a step in the environment. It adds the action to the action history and updates the terminal flag.
+        :param action: action to perform in the environment
+        :return: the step tuple of the environment (obs, reward, terminated, truncated, info)
+        """
         step_tuple = self.env.step(action)
         self._wrapper_action_history.append(action)
         obs, reward, terminated, truncated, info = step_tuple

gymcts/gymcts_agent.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import copy
+import random
 import gymnasium as gym
 from typing import TypeVar, Any, SupportsFloat, Callable
@@ -63,7 +64,10 @@ class GymctsAgent:
         # NAVIGATION STRATEGY
         # select child with highest UCB score
         while not temp_node.is_leaf():
-            temp_node = max(temp_node.children.values(), key=lambda child: child.ucb_score())
+            children = list(temp_node.children.values())
+            max_ucb_score = max(child.ucb_score() for child in children)
+            best_children = [child for child in children if child.ucb_score() == max_ucb_score]
+            temp_node = random.choice(best_children)
         log.debug(f"Selected leaf node: {temp_node}")
         return temp_node

gymcts/gymcts_deepcopy_wrapper.py CHANGED Viewed

@@ -13,8 +13,15 @@ from gymcts.logger import log
 class DeepCopyMCTSGymEnvWrapper(GymctsABC, gym.Wrapper):
+    """
+    A wrapper for gym environments that implements the GymctsABC interface.
+    It uses deepcopys as state representation.
+    Please note that this is not the most efficient way to implement the state representation.
+    It is supposed to be used to see if your use-case works well with the MCTS algorithm.
+    If it does, you can consider implementing all GymctsABC methods in a more efficient way.
+    """
+    # helper attributes for the wrapper
     _terminal_flag:bool = False
     _last_reward: SupportsFloat = 0
     _step_tuple: tuple[WrapperObsType, SupportsFloat, bool, bool, dict[str, Any]] = None
@@ -22,9 +29,21 @@ class DeepCopyMCTSGymEnvWrapper(GymctsABC, gym.Wrapper):
     _action_mask_fn: Callable[[gym.Env], np.ndarray] | None = None
     def is_terminal(self) -> bool:
+        """
+        Returns True if the environment is in a terminal state, False otherwise.
+        :return: True if the environment is in a terminal state, False otherwise.
+        """
         return self._terminal_flag
     def load_state(self, state: Any) -> None:
+        """
+        The load_state method is not implemented. The state is loaded by replacing the env with the 'state' (the copy
+        provided my 'get_state'). 'self' in a method cannot be replaced with another object (as far as i know).
+        :param state: a deepcopy of the environment
+        :return: None
+        """
         msg = """
         The NaiveSoloMCTSGymEnvWrapper uses deepcopies of the entire env as the state.
         The loading of the state is done by replacing the env with the 'state' (the copy provided my 'get_state').
@@ -39,6 +58,16 @@ class DeepCopyMCTSGymEnvWrapper(GymctsABC, gym.Wrapper):
                  buffer_length: int = 100,
                  record_video: bool = False,
                  ):
+        """
+        The constructor of the wrapper. It wraps the environment with RecordEpisodeStatistics and checks if the action
+        space is discrete. It also checks if the action_mask_fn is a string or a callable. If it is a string, it tries to
+        find the method in the environment. If it is a callable, it assigns it to the _action_mask_fn attribute.
+        :param env: the environment to wrap
+        :param action_mask_fn:
+        :param buffer_length:
+        :param record_video:
+        """
         # wrap with RecordEpisodeStatistics if it is not already wrapped
         env = RecordEpisodeStatistics(env, buffer_length=buffer_length)
@@ -61,6 +90,10 @@ class DeepCopyMCTSGymEnvWrapper(GymctsABC, gym.Wrapper):
                 self._action_mask_fn = action_mask_fn
     def get_state(self) -> Any:
+        """
+        Returns the current state of the environment as a deepcopy of the environment.
+        :return: a deepcopy of the environment
+        """
         log.debug("getting state")
         original_state = self
         copied_state = copy.deepcopy(self)
@@ -71,9 +104,19 @@ class DeepCopyMCTSGymEnvWrapper(GymctsABC, gym.Wrapper):
         return copied_state
     def action_masks(self) -> np.ndarray | None:
+        """
+        Returns the action masks for the environment. If the action_mask_fn is not set, it returns None.
+        :return: the action masks for the environment
+        """
         return self._action_mask_fn(self.env) if self._action_mask_fn is not None else None
     def get_valid_actions(self) -> list[int]:
+        """
+        Returns a list of valid actions for the current state of the environment.
+        This used to obtain potential actions/subsequent sates for the MCTS tree.
+        :return: the list of valid actions
+        """
         if self._action_mask_fn is None:
             action_space: gym.spaces.Discrete = self.env.action_space  # Type hinting
             return list(range(action_space.n))
@@ -83,6 +126,14 @@ class DeepCopyMCTSGymEnvWrapper(GymctsABC, gym.Wrapper):
     def step(
             self, action: WrapperActType
     ) -> tuple[WrapperObsType, SupportsFloat, bool, bool, dict[str, Any]]:
+        """
+        Performs a step in the environment.
+        This method is used to update the wrapper with the new state and the new action, to realize the terminal state
+        functionality.
+        :param action: action to perform in the environment
+        :return: the step tuple of the environment (obs, reward, terminated, truncated, info)
+        """
         step_tuple = self.env.step(action)
         obs, reward, terminated, truncated, info = step_tuple
@@ -93,6 +144,12 @@ class DeepCopyMCTSGymEnvWrapper(GymctsABC, gym.Wrapper):
     def rollout(self) -> float:
+        """
+        Performs a rollout from the current state of the environment and returns the return (sum of rewards) of the
+        rollout.
+        :return: the return of the rollout
+        """
         log.debug("performing rollout")
         # random rollout
         # perform random valid action util terminal

gymcts/gymcts_distributed_agent.py CHANGED Viewed

@@ -118,6 +118,7 @@ class DistributedGymctsAgent:
                  render_tree_after_step: bool = False,
                  render_tree_max_depth: int = 2,
                  num_parallel: int = 4,
+                 clear_mcts_tree_after_step: bool = False,
                  number_of_simulations_per_step: int = 25,
                  exclude_unvisited_nodes_from_render: bool = False
                  ):
@@ -134,6 +135,7 @@ class DistributedGymctsAgent:
         self.number_of_simulations_per_step = number_of_simulations_per_step
         self.env = env
+        self.clear_mcts_tree_after_step = clear_mcts_tree_after_step
         self.search_root_node = GymctsNode(
             action=None,
@@ -206,6 +208,8 @@ class DistributedGymctsAgent:
                 ready_node = ray.get(ready_node_ref)
                 # merge the tree
+                if not self.clear_mcts_tree_after_step:
+                    self.backpropagation(search_start_node, ready_node.mean_value, ready_node.visit_count)
                 search_start_node = merge_nodes(search_start_node, ready_node)
         action = search_start_node.get_best_action()
@@ -217,22 +221,34 @@ class DistributedGymctsAgent:
                 tree_max_depth=self.render_tree_max_depth
             )
-        # to clear memory we need to remove all nodes except the current node
-        # this is done by setting the root node to the current node
-        # and setting the parent of the current node to None
-        # we also need to reset the children of the current node
-        # this is done by calling the reset method
-        #
-        # in a distributed setting we need we delete all previous nodes
-        # this is because backpropagation merging trees is already computationally expensive
-        # and backpropagating the whole tree would be even more expensive
-        next_node.reset()
+        if self.clear_mcts_tree_after_step:
+            # to clear memory we need to remove all nodes except the current node
+            # this is done by setting the root node to the current node
+            # and setting the parent of the current node to None
+            # we also need to reset the children of the current node
+            # this is done by calling the reset method
+            next_node.reset()
         self.search_root_node = next_node
         return action, next_node
+    def backpropagation(self, node: GymctsNode, average_episode_return: float, num_episodes: int) -> None:
+        log.debug(f"performing backpropagation from leaf node: {node}")
+        while not node.is_root():
+            node.mean_value = (node.mean_value * node.visit_count + average_episode_return * num_episodes) / (
+                        node.visit_count + num_episodes)
+            node.visit_count += num_episodes
+            node.max_value = max(node.max_value, average_episode_return)
+            node.min_value = min(node.min_value, average_episode_return)
+            node = node.parent
+        # also update root node
+        node.mean_value = (node.mean_value * node.visit_count + average_episode_return * num_episodes) / (
+                    node.visit_count + num_episodes)
+        node.visit_count += num_episodes
+        node.max_value = max(node.max_value, average_episode_return)
+        node.min_value = min(node.min_value, average_episode_return)
     def show_mcts_tree(self, start_node: GymctsNode = None, tree_max_depth: int = None) -> None:
         if start_node is None:
@@ -268,7 +284,7 @@ if __name__ == '__main__':
     agent1 = DistributedGymctsAgent(
         env=env,
         render_tree_after_step=True,
-        number_of_simulations_per_step=1000,
+        number_of_simulations_per_step=10,
         exclude_unvisited_nodes_from_render=True,
         num_parallel=1,
     )
@@ -278,4 +294,6 @@ if __name__ == '__main__':
     actions = agent1.solve()
     end_time = time.perf_counter()
+    agent1.show_mcts_tree_from_root()
     print(f"solution time pro action: {end_time - start_time}/{len(actions)}")

gymcts/gymcts_env_abc.py CHANGED Viewed

@@ -9,20 +9,53 @@ class GymctsABC(ABC, gym.Env):
     @abstractmethod
     def get_state(self) -> Any:
+        """
+        Returns the current state of the environment. The state can be any datatype in principle, that allows to restore
+        the environment to the same state. The state is used to restore the environment unsing the load_state method.
+        It's recommended to use a numpy array if possible, as it is easy to serialize and deserialize.
+        :return: the current state of the environment
+        """
         pass
     @abstractmethod
     def load_state(self, state: Any) -> None:
+        """
+        Loads the state of the environment. The state can be any datatype in principle, that allows to restore the
+        environment to the same state. The state is used to restore the environment unsing the load_state method.
+        :param state: the state to load
+        :return: None
+        """
         pass
     @abstractmethod
     def is_terminal(self) -> bool:
+        """
+        Returns True if the environment is in a terminal state, False otherwise.
+        :return:
+        """
         pass
     @abstractmethod
     def get_valid_actions(self) -> list[int]:
+        """
+        Returns a list of valid actions for the current state of the environment.
+        This used to obtain potential actions/subsequent sates for the MCTS tree.
+        :return: the list of valid actions
+        """
         pass
     @abstractmethod
     def rollout(self) -> float:
+        """
+        Performs a rollout from the current state of the environment and returns the return (sum of rewards) of the rollout.
+        Please make sure the return value is in the interval [-1, 1].
+        Otherwise, the MCTS algorithm will not work as expected (due to a male-fitted exploration coefficient;
+        exploration and exploitation are not well-balanced then).
+        :return: the return of the rollout
+        """
         pass

gymcts/gymcts_node.py CHANGED Viewed

@@ -13,18 +13,32 @@ TGymctsNode = TypeVar("TGymctsNode", bound="GymctsNode")
 class GymctsNode:
     # static properties
-    best_action_weight: float = 0.05
-    ubc_c = 0.707
+    best_action_weight: float = 0.05 # weight for the best action
+    ubc_c = 0.707 # exploration coefficient
     # attributes
-    visit_count: int = 0
-    mean_value: float = 0
-    max_value: float = -float("inf")
-    min_value: float = +float("inf")
-    terminal: bool = False
-    state: Any
+    #
+    # Note these attributes are not static. Their defined here to give developers a hint what fields are available
+    # in the class. They are not static because they are not shared between instances of the class in scope of
+    # this library.
+    visit_count: int = 0 # number of times the node has been visited
+    mean_value: float = 0 # mean value of the node
+    max_value: float = -float("inf") # maximum value of the node
+    min_value: float = +float("inf") # minimum value of the node
+    terminal: bool = False # whether the node is terminal or not
+    state: Any = None # state of the node
     def __str__(self, colored=False, action_space_n=None) -> str:
+        """
+        Returns a string representation of the node. The string representation is used for visualisation purposes.
+        It is used for example in the mcts tree visualisation functionality.
+        :param colored: true if the string representation should be colored, false otherwise. (ture is used by the mcts tree visualisation)
+        :param action_space_n: the number of actions in the action space. This is used for coloring the action in the string representation.
+        :return: a potentially colored string representation of the node.
+        """
         if not colored:
             if not self.is_root():
@@ -72,22 +86,44 @@ class GymctsNode:
                 (f", {p}ubc{e}={colorful_value(self.ucb_score())})" if not self.is_root() else ")"))
     def traverse_nodes(self) -> Generator[TGymctsNode, None, None]:
+        """
+        Traverse the tree and yield all nodes in the tree.
+        :return: a generator that yields all nodes in the tree.
+        """
         yield self
         if self.children:
             for child in self.children.values():
                 yield from child.traverse_nodes()
     def get_root(self) -> TGymctsNode:
+        """
+        Returns the root node of the tree. The root node is the node that has no parent.
+        :return: the root node of the tree.
+        """
         if self.is_root():
             return self
         return self.parent.get_root()
     def max_tree_depth(self):
+        """
+        Returns the maximum depth of the tree. The depth of a node is the number of edges from
+        the node to the root node.
+        :return: the maximum depth of the tree.
+        """
         if self.is_leaf():
             return 0
         return 1 + max(child.max_tree_depth() for child in self.children.values())
     def n_children_recursively(self):
+        """
+        Returns the number of children of the node recursively. The number of children of a node is the number of
+        children of the node plus the number of children of all children of the node.
+        :return: the number of children of the node recursively.
+        """
         if self.is_leaf():
             return 0
         return len(self.children) + sum(child.n_children_recursively() for child in self.children.values())
@@ -97,6 +133,14 @@ class GymctsNode:
                  parent: TGymctsNode | None,
                  env_reference: GymctsABC,
                  ):
+        """
+        Initializes the node. The node is initialized with the state of the environment and the action that was taken to
+        reach the node. The node is also initialized with the parent node and the environment reference.
+        :param action: the action that was taken to reach the node. If the node is a root node, this parameter is None.
+        :param parent: the parent node of the node. If the node is a root node, this parameter is None.
+        :param env_reference: a reference to the environment. The environment is used to get the state of the node and the valid actions.
+        """
         # field depending on whether the node is a root node or not
         self.action: int | None
@@ -149,21 +193,49 @@ class GymctsNode:
             self.parent.reset()
     def is_root(self) -> bool:
+        """
+        Returns true if the node is a root node. A root node is a node that has no parent.
+        :return: true if the node is a root node, false otherwise.
+        """
         return self.parent is None
     def is_leaf(self) -> bool:
+        """
+        Returns true if the node is a leaf node. A leaf node is a node that has no children. A leaf node is a node that has no children.
+        :return: true if the node is a leaf node, false otherwise.
+        """
         return self.children is None or len(self.children) == 0
     def get_random_child(self) -> TGymctsNode:
+        """
+        Returns a random child of the node. A random child is a child that is selected randomly from the list of children.
+        :return:
+        """
         if self.is_leaf():
             raise ValueError("cannot get random child of leaf node")  # todo: maybe return self instead?
         return list(self.children.values())[random.randint(0, len(self.children) - 1)]
     def get_best_action(self) -> int:
+        """
+        Returns the best action of the node. The best action is the action that has the highest score.
+        The score is calculated using the get_score() method. The best action is the action that has the highest score.
+        The best action is the action that has the highest score.
+        :return: the best action of the node.
+        """
         return max(self.children.values(), key=lambda child: child.get_score()).action
     def get_score(self) -> float:  # todo: make it an attribute?
+        """
+        Returns the score of the node. The score is calculated using the mean value and the maximum value of the node.
+        The score is calculated using the formula: score = (1 - a) * mean_value + a * max_value
+        where a is the best action weight.
+        :return: the score of the node.
+        """
         # return self.mean_value
         assert 0 <= GymctsNode.best_action_weight <= 1
         a = GymctsNode.best_action_weight
@@ -173,6 +245,11 @@ class GymctsNode:
         return self.mean_value
     def get_max_value(self) -> float:
+        """
+        Returns the maximum value of the node. The maximum value is the maximum value of the node.
+        :return: the maximum value of the node.
+        """
         return self.max_value
     def ucb_score(self):

gymcts/gymcts_tree_plotter.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from typing import Any, Generator
 from gymcts.gymcts_node import GymctsNode
 from gymcts.logger import log
@@ -9,7 +11,19 @@ def _generate_mcts_tree(
         depth: int = None,
         exclude_unvisited_nodes_from_render: bool = True,
         action_space_n: int = None
-) -> list[str]:
+) -> Generator[str, Any | None, None]:
+    """
+    Generates a tree representation of the MCTS tree starting from the given node.
+    This is a recursive function that generates a tree representation of the MCTS tree starting from the given node. The
+    :param start_node: the node to start from
+    :param prefix: used to format the tree
+    :param depth: used to limit the depth of the tree
+    :param exclude_unvisited_nodes_from_render: used to exclude unvisited nodes from the render
+    :param action_space_n: the number of actions in the action space
+    :return: a list of strings representing the tree
+    """
     if prefix is None:
         prefix = ""
     import gymcts.colorful_console_utils as ccu
@@ -70,6 +84,13 @@ def show_mcts_tree(
         tree_max_depth: int = None,
         action_space_n: int = None
 ) -> None:
+    """
+    Renders the MCTS tree starting from the given node.
+    :param start_node: the node to start from
+    :param tree_max_depth: the maximum depth of the tree to render
+    :param action_space_n: the number of actions in the action space
+    """
     print(start_node.__str__(colored=True, action_space_n=action_space_n))
     for line in _generate_mcts_tree(start_node=start_node, depth=tree_max_depth):
         print(line)

gymcts/logger.py CHANGED Viewed

@@ -18,10 +18,7 @@ banner_sw = f"""
      ▟█▛      ▜██▛ ▟█▛██▛██▛▟█▛     ▟█▛  ▜███▙
     ▟█▛ ▟█▛   ▟█▛ ▟█▛   ▟█▛▟█▛     ▟█▛      ▟█▛
     ▜████▛   ▟█▛ ▟█▛   ▟█▛ ▜████▛ ▟█▛  ▟████▛
 """

{gymcts-1.2.0.dist-info → gymcts-1.2.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: gymcts
-Version: 1.2.0
+Version: 1.2.1
 Summary: A minimalistic implementation of the Monte Carlo Tree Search algorithm for planning problems fomulated as gymnaisum reinforcement learning environments.
 Author: Alexander Nasuta
 Author-email: Alexander Nasuta <alexander.nasuta@wzl-iqs.rwth-aachen.de>
@@ -47,7 +47,7 @@ Requires-Dist: graph-matrix-jsp-env; extra == "examples"
 Requires-Dist: graph-jsp-env; extra == "examples"
 Provides-Extra: dev
 Requires-Dist: jsp-instance-utils; extra == "dev"
-Requires-Dist: graph-matrix-jsp-env; extra == "dev"
+Requires-Dist: graph-matrix-jsp-env>=0.3.0; extra == "dev"
 Requires-Dist: graph-jsp-env; extra == "dev"
 Requires-Dist: JSSEnv; extra == "dev"
 Requires-Dist: pip-tools; extra == "dev"
@@ -59,21 +59,24 @@ Requires-Dist: stable_baselines3; extra == "dev"
 Requires-Dist: sphinx; extra == "dev"
 Requires-Dist: myst-parser; extra == "dev"
 Requires-Dist: sphinx-autobuild; extra == "dev"
+Requires-Dist: sphinx-copybutton; extra == "dev"
 Requires-Dist: furo; extra == "dev"
 Requires-Dist: twine; extra == "dev"
 Requires-Dist: sphinx-copybutton; extra == "dev"
 Requires-Dist: nbsphinx; extra == "dev"
+Requires-Dist: pandoc; extra == "dev"
 Requires-Dist: jupytext; extra == "dev"
 Requires-Dist: jupyter; extra == "dev"
+Requires-Dist: typing_extensions>=4.12.0; extra == "dev"
 Dynamic: license-file
 # Graph Matrix Job Shop Env
 A Monte Carlo Tree Search Implementation for Gymnasium-style Environments.
-- Github: [GYMCTS on Github](https://github.com/Alexander-Nasuta/GraphMatrixJobShopEnv)
-- Pypi: [GYMCTS on PyPi](https://pypi.org/project/graph-matrix-jsp-env/)
-- Documentation: [GYMCTS Docs](https://graphmatrixjobshopenv.readthedocs.io/en/latest/)
+- Github: [GYMCTS on Github](https://github.com/Alexander-Nasuta/gymcts)
+- Pypi: [GYMCTS on PyPi](https://pypi.org/project/gymcts/)
+- Documentation: [GYMCTS Docs](https://gymcts.readthedocs.io/en/latest/)
 ## Description
@@ -101,22 +104,26 @@ The usage of a MCTS agent can roughly organised into the following steps:
 - Render the solution
 The GYMCTS package provides a two types of wrappers for Gymnasium-style environments:
-- `NaiveSoloMCTSGymEnvWrapper`: A wrapper that uses deepcopies of the environment to save a snapshot of the environment state for each node in the MCTS tree.
-- `DeterministicSoloMCTSGymEnvWrapper`: A wrapper that saves the action sequence that lead to the current state in the MCTS node.
+- `DeepCopyMCTSGymEnvWrapper`: A wrapper that uses deepcopies of the environment to save a snapshot of the environment state for each node in the MCTS tree.
+- `ActionHistoryMCTSGymEnvWrapper`: A wrapper that saves the action sequence that lead to the current state in the MCTS node.
-These wrappers can be used with the `SoloMCTSAgent` to solve the environment.
-The wrapper implement methods that are required by the `SoloMCTSAgent` to interact with the environment.
+These wrappers can be used with the `GymctsAgent` to solve the environment.
+The wrapper implement methods that are required by the `GymctsAgent` to interact with the environment.
 GYMCTS is designed to use a single environment instance and reconstructing the environment state form a state snapshot, when needed.
 NOTE: MCTS works best when the return of an episode is in the range of [-1, 1]. Please adjust the reward function of the environment accordingly (or change the ubc-scaling parameter of the MCTS agent).
 Adjusting the reward function of the environment is easily done with a [NormalizeReward](https://gymnasium.farama.org/api/wrappers/reward_wrappers/#gymnasium.wrappers.NormalizeReward) or [TransformReward](https://gymnasium.farama.org/api/wrappers/reward_wrappers/#gymnasium.wrappers.TransformReward) Wrapper.
+```python
+env = NormalizeReward(env, gamma=0.99, epsilon=1e-8)
+```
-NormalizeReward(env, gamma=0.99, epsilon=1e-8)
-env = TransformReward(env, lambda r: r / 36)
-### FrozenLake Example (NaiveSoloMCTSGymEnvWrapper)
+```python
+env = TransformReward(env, lambda r: r / n_steps_per_episode)
+```
+### FrozenLake Example (DeepCopyMCTSGymEnvWrapper)
 A minimal example of how to use the package with the FrozenLake environment and the NaiveSoloMCTSGymEnvWrapper is provided in the following code snippet below.
-The NaiveSoloMCTSGymEnvWrapper can be used with non-deterministic environments, such as the FrozenLake environment with slippery ice.
+The DeepCopyMCTSGymEnvWrapper can be used with non-deterministic environments, such as the FrozenLake environment with slippery ice.
 ```python
 import gymnasium as gym
@@ -135,7 +142,7 @@ if __name__ == '__main__':
     env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=True, render_mode="ansi")
     env.reset()
-    # 1. wrap the environment with the naive wrapper or a custom gymcts wrapper
+    # 1. wrap the environment with the deep copy wrapper or a custom gymcts wrapper
     env = DeepCopyMCTSGymEnvWrapper(env)
     # 2. create the agent
@@ -158,7 +165,7 @@ if __name__ == '__main__':
     # 5. print the solution
     # read the solution from the info provided by the RecordEpisodeStatistics wrapper
-    # (that NaiveSoloMCTSGymEnvWrapper uses internally)
+    # (that DeepCopyMCTSGymEnvWrapper uses internally)
     episode_length = info["episode"]["l"]
     episode_return = info["episode"]["r"]
@@ -251,7 +258,7 @@ if __name__ == '__main__':
     env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=False, render_mode="rgb_array")
     env.reset()
-    # 1. wrap the environment with the naive wrapper or a custom gymcts wrapper
+    # 1. wrap the environment with the deep copy wrapper or a custom gymcts wrapper
     env = DeepCopyMCTSGymEnvWrapper(env)
     # 2. create the agent
@@ -280,7 +287,7 @@ if __name__ == '__main__':
     env.close()
     # 5. print the solution
-    # read the solution from the info provided by the RecordEpisodeStatistics wrapper (that NaiveSoloMCTSGymEnvWrapper wraps internally)
+    # read the solution from the info provided by the RecordEpisodeStatistics wrapper (that DeepCopyMCTSGymEnvWrapper wraps internally)
     episode_length = info["episode"]["l"]
     episode_return = info["episode"]["r"]
@@ -321,13 +328,13 @@ import gymnasium as gym
 from graph_jsp_env.disjunctive_graph_jsp_env import DisjunctiveGraphJspEnv
 from jsp_instance_utils.instances import ft06, ft06_makespan
-from gymcts.gymcts_agent import SoloMCTSAgent
-from gymcts.gymcts_gym_env import SoloMCTSGymEnv
+from gymcts.gymcts_agent import GymctsAgent
+from gymcts.gymcts_env_abc import GymctsABC
 from gymcts.logger import log
-class GraphJspGYMCTSWrapper(SoloMCTSGymEnv, gym.Wrapper):
+class GraphJspGYMCTSWrapper(GymctsABC, gym.Wrapper):
     def __init__(self, env: DisjunctiveGraphJspEnv):
         gym.Wrapper.__init__(self, env)
@@ -378,7 +385,7 @@ if __name__ == '__main__':
     env = GraphJspGYMCTSWrapper(env)
-    agent = SoloMCTSAgent(
+    agent = GymctsAgent(
         env=env,
         clear_mcts_tree_after_step=True,
         render_tree_after_step=True,
@@ -421,7 +428,6 @@ import gymnasium as gym
 from gymcts.gymcts_agent import GymctsAgent
 from gymcts.gymcts_action_history_wrapper import ActionHistoryMCTSGymEnvWrapper
-from gymcts.gymcts_deepcopy_wrapper import DeepCopyMCTSGymEnvWrapper
 from gymcts.logger import log
@@ -434,7 +440,7 @@ if __name__ == '__main__':
     env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=False, render_mode="ansi")
     env.reset()
-    # wrap the environment with the naive wrapper or a custom gymcts wrapper
+    # wrap the environment with the wrapper or a custom gymcts wrapper
     env = ActionHistoryMCTSGymEnvWrapper(env)
     # create the agent
@@ -505,11 +511,11 @@ clone the repository in your favorite code editor (for example PyCharm, VSCode,
 using https:
 ```shell
-git clone https://github.com/Alexander-Nasuta/todo
+git clone https://github.com/Alexander-Nasuta/gymcts.git
 ```
 or by using the GitHub CLI:
 ```shell
-gh repo clone Alexander-Nasuta/todo
+gh repo clone Alexander-Nasuta/gymcts
 ```
 if you are using PyCharm, I recommend doing the following additional steps:
@@ -518,9 +524,6 @@ if you are using PyCharm, I recommend doing the following additional steps:
 - mark the `tests` folder as test root (by right-clicking on the folder and selecting `Mark Directory as` -> `Test Sources Root`)
 - mark the `resources` folder as resources root (by right-clicking on the folder and selecting `Mark Directory as` -> `Resources Root`)
-at the end your project structure should look like this:
-todo
 ### Create a Virtual Environment (optional)
@@ -586,12 +589,6 @@ For testing with `tox` run the following command:
 tox
 ```
-Here is a screenshot of what the output might look like:
-![](https://github.com/Alexander-Nasuta/GraphMatrixJobShopEnv/raw/master/resources/tox-screenshot.png)
-Tox will run the tests in a separate environment and will also check if the requirements are installed correctly.
 ### Builing and Publishing the Project to PyPi
 In order to publish the project to PyPi, the project needs to be built and then uploaded to PyPi.
@@ -630,7 +627,6 @@ sphinx-autobuild ./docs/source/ ./docs/build/html/
 This project features most of the extensions featured in this Tutorial: [Document Your Scientific Project With Markdown, Sphinx, and Read the Docs | PyData Global 2021](https://www.youtube.com/watch?v=qRSb299awB0).
 ## Contact
 If you have any questions or feedback, feel free to contact me via [email](mailto:alexander.nasuta@wzl-iqs.rwth-aachen.de) or open an issue on repository.

gymcts-1.2.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,15 @@
+gymcts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+gymcts/colorful_console_utils.py,sha256=n7nymC8kKZnA_8nXcdn201NAzjZjgEHfKpbBcnl4oAE,5891
+gymcts/gymcts_action_history_wrapper.py,sha256=7-p17Fgb80SRCBaCm6G8SJrEPsl2Y4aIO3InviuQP08,6993
+gymcts/gymcts_agent.py,sha256=f2imP-Wv-E7EYE0-iWd86hY9cx-rqHZMlDusp-aE-ps,8698
+gymcts/gymcts_deepcopy_wrapper.py,sha256=lCCT5-6JVCwUCP__4uPMMkT5HnO2JWm2ebzJ69zXp9c,6792
+gymcts/gymcts_distributed_agent.py,sha256=Ha9UBQvFjoErfMWvPyN0JcTYz-JaiJ4eWjLMikp9Yhs,11569
+gymcts/gymcts_env_abc.py,sha256=U1mPz0NWZZL1sdHX7oUP1UFKtmbHwyqHQOQidyh_Uck,2107
+gymcts/gymcts_node.py,sha256=pxjY2Zb0kPuFQ5mWEs0ct3qXoyB47NZK7h2ZGbLJbRA,11052
+gymcts/gymcts_tree_plotter.py,sha256=PR6C7q9Q4kuz1aLGyD7-aZsxk3RqlHZpOqmOiRpCyK0,3547
+gymcts/logger.py,sha256=RI7B9cvbBGrj0_QIAI77wihzuu2tPG_-z9GM2Mw5aHE,926
+gymcts-1.2.1.dist-info/licenses/LICENSE,sha256=UGe75WojDiw_77SEnK2aysEDlElRlkWie7U7NaAFx00,1072
+gymcts-1.2.1.dist-info/METADATA,sha256=wUJEcWrAvdC42kl59qewCN5tK3DKMLxGWcCipnOX4pQ,23371
+gymcts-1.2.1.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
+gymcts-1.2.1.dist-info/top_level.txt,sha256=E8MoLsPimUPD0H1Y6lum4TVe-lhSDAyBAXGrkYIT52w,7
+gymcts-1.2.1.dist-info/RECORD,,

{gymcts-1.2.0.dist-info → gymcts-1.2.1.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (78.0.2)
+Generator: setuptools (79.0.1)
 Root-Is-Purelib: true
 Tag: py3-none-any

gymcts-1.2.0.dist-info/RECORD DELETED Viewed

@@ -1,15 +0,0 @@
-gymcts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-gymcts/colorful_console_utils.py,sha256=OhULcXHKbEA4uJDAEYCTcW6wUv0LsHX_XSYzZ_Szsv4,4553
-gymcts/gymcts_action_history_wrapper.py,sha256=AjvBBwd1t9-nTYP09aMdlScAkFNXf5vOagejpjWYOPo,3810
-gymcts/gymcts_agent.py,sha256=O2y98jKFjR5TzqVV7DO1jlcYDyzAgd_H2RF4-w4NP0g,8499
-gymcts/gymcts_deepcopy_wrapper.py,sha256=OleQTnvxv3gLEo8-2asyeo-CpZ4HEbgyFGS5DTCD7NM,4167
-gymcts/gymcts_distributed_agent.py,sha256=M7dyBfC8u3M99PJFoXKgIc_CPTyHGppmktkH-y9ci4U,10448
-gymcts/gymcts_env_abc.py,sha256=7nCRiiClmmVLX-d_Q1dxeztmuvmAtmWZwjT81zrG1_w,575
-gymcts/gymcts_node.py,sha256=PT_YZFwt1zjuvd8i9Wb5LEkHAqmJOFyPDp3GFD05lqM,7138
-gymcts/gymcts_tree_plotter.py,sha256=eg207wHcDepwWODXzmDYQn1Aai29Cs4jFS1HNvAhlXs,2651
-gymcts/logger.py,sha256=nAkUa4djiuCR7hF0EUsplhqFHCp76QcOX1cV3lIPzOI,937
-gymcts-1.2.0.dist-info/licenses/LICENSE,sha256=UGe75WojDiw_77SEnK2aysEDlElRlkWie7U7NaAFx00,1072
-gymcts-1.2.0.dist-info/METADATA,sha256=zhEIFo0rOnv5hCv6ukImkq-9nshO4EfXMbHlhNlYhyA,23640
-gymcts-1.2.0.dist-info/WHEEL,sha256=DK49LOLCYiurdXXOXwGJm6U4DkHkg4lcxjhqwRa0CP4,91
-gymcts-1.2.0.dist-info/top_level.txt,sha256=E8MoLsPimUPD0H1Y6lum4TVe-lhSDAyBAXGrkYIT52w,7
-gymcts-1.2.0.dist-info/RECORD,,

{gymcts-1.2.0.dist-info → gymcts-1.2.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{gymcts-1.2.0.dist-info → gymcts-1.2.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

gymcts 1.2.0__py3-none-any.whl → 1.2.1__py3-none-any.whl

gymcts 1.2.0py3-none-any.whl → 1.2.1py3-none-any.whl