PyPI - gymcts - Versions diffs - 1.3.0__py3-none-any.whl → 1.4.1__py3-none-any.whl - Mend

gymcts 1.3.0py3-none-any.whl → 1.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

gymcts/gymcts_agent.py CHANGED Viewed

@@ -49,6 +49,7 @@ class GymctsAgent:
                  calc_number_of_simulations_per_step: Callable[[int,int], int] = None,
                  score_variate: Literal["UCT_v0", "UCT_v1", "UCT_v2",] = "UCT_v0",
                  best_action_weight=None,
+                 keep_whole_tree_till_initial_root: bool = False,
                  ):
         # check if action space of env is discrete
         if not isinstance(env.action_space, gym.spaces.Discrete):
@@ -79,6 +80,7 @@ class GymctsAgent:
         self.env = env
         self.clear_mcts_tree_after_step = clear_mcts_tree_after_step
+        self.keep_whole_tree_till_initial_root = keep_whole_tree_till_initial_root
         self.search_root_node = GymctsNode(
             action=None,
@@ -190,7 +192,7 @@ class GymctsAgent:
             # we also need to reset the children of the current node
             # this is done by calling the reset method
             next_node.reset()
-        else:
+        elif not self.keep_whole_tree_till_initial_root:
             next_node.remove_parent()
         self.search_root_node = next_node

gymcts/gymcts_neural_agent.py CHANGED Viewed

@@ -162,37 +162,43 @@ class GymctsNeuralNode(GymctsNode):
         p_sa = self._selection_score_prior
         n_s = self.parent.visit_count
         n_sa = self.visit_count
+        assert 0 <= GymctsNode.best_action_weight <= 1
+        b = GymctsNode.best_action_weight
+        exploitation_term = 0.0 if self.visit_count == 0 else (1 - b) * self.mean_value + b * self.max_value
         if GymctsNeuralNode.score_variate == "PUCT_v0":
-            return self.mean_value + c * p_sa * math.sqrt(n_s) / (1 + n_sa)
+            return exploitation_term + c * p_sa * math.sqrt(n_s) / (1 + n_sa)
         elif GymctsNeuralNode.score_variate == "PUCT_v1":
-            return self.mean_value + c * p_sa * math.sqrt(2 * math.log(n_s) / (n_sa))
+            return exploitation_term + c * p_sa * math.sqrt(2 * math.log(n_s) / (n_sa))
         elif GymctsNeuralNode.score_variate == "PUCT_v2":
-            return self.mean_value + c * p_sa * math.sqrt(n_s) / n_sa
+            return exploitation_term + c * p_sa * math.sqrt(n_s) / n_sa
         elif GymctsNeuralNode.score_variate == "PUCT_v3":
-            return self.mean_value + c * (p_sa ** GymctsNeuralNode.PUCT_v3_mu) * math.sqrt(n_s / (1 + n_sa))
+            return exploitation_term + c * (p_sa ** GymctsNeuralNode.PUCT_v3_mu) * math.sqrt(n_s / (1 + n_sa))
         elif GymctsNeuralNode.score_variate == "PUCT_v4":
-            return self.mean_value + c * (p_sa / (1 + n_sa))
+            return exploitation_term + c * (p_sa / (1 + n_sa))
         elif GymctsNeuralNode.score_variate == "PUCT_v5":
-            return self.mean_value + c * p_sa * (math.sqrt(n_s) + 1) / (n_sa + 1)
+            return exploitation_term + c * p_sa * (math.sqrt(n_s) + 1) / (n_sa + 1)
         elif GymctsNeuralNode.score_variate == "PUCT_v6":
-            return self.mean_value + c * p_sa * n_s / (1 + n_sa)
+            return exploitation_term + c * p_sa * n_s / (1 + n_sa)
         elif GymctsNeuralNode.score_variate == "PUCT_v7":
             epsilon = 1e-8
-            return self.mean_value + c * p_sa * (math.sqrt(n_s) + epsilon) / (n_sa + 1)
+            return exploitation_term + c * p_sa * (math.sqrt(n_s) + epsilon) / (n_sa + 1)
         elif GymctsNeuralNode.score_variate == "PUCT_v8":
-            return self.mean_value + c * p_sa * math.sqrt((math.log(n_s) + 1) / (1 + n_sa))
+            return exploitation_term + c * p_sa * math.sqrt((math.log(n_s) + 1) / (1 + n_sa))
         elif GymctsNeuralNode.score_variate == "PUCT_v9":
-            return self.mean_value + c * p_sa * math.sqrt(n_s / (1 + n_sa))
+            return exploitation_term + c * p_sa * math.sqrt(n_s / (1 + n_sa))
         elif GymctsNeuralNode.score_variate == "PUCT_v10":
-            return self.mean_value + c * p_sa * math.sqrt(math.log(n_s) / (1 + n_sa))
+            return exploitation_term + c * p_sa * math.sqrt(math.log(n_s) / (1 + n_sa))
         elif GymctsNeuralNode.score_variate == "MuZero_v0":
             c1 = GymctsNeuralNode.MuZero_c1
             c2 = GymctsNeuralNode.MuZero_c2
-            return self.mean_value + c * p_sa * math.sqrt(n_s) / (1 + n_sa) * (c1 + math.log((n_s + c2 + 1) / c2))
+            return exploitation_term + c * p_sa * math.sqrt(n_s) / (1 + n_sa) * (c1 + math.log((n_s + c2 + 1) / c2))
         elif GymctsNeuralNode.score_variate == "MuZero_v1":
             c1 = GymctsNeuralNode.MuZero_c1
             c2 = GymctsNeuralNode.MuZero_c2
-            return self.mean_value + c * p_sa * math.sqrt(n_s) / (1 + n_sa) * (c1 + math.log((n_s + c2 + 1) / c2))
+            return exploitation_term + c * p_sa * math.sqrt(n_s) / (1 + n_sa) * (c1 + math.log((n_s + c2 + 1) / c2))
         exploration_term = self._selection_score_prior * c * math.sqrt(math.log(self.parent.visit_count) / (self.visit_count)) if self.visit_count > 0 else float("inf")
@@ -221,7 +227,7 @@ class GymctsNeuralNode(GymctsNode):
         if not colored:
             if not self.is_root():
-                return f"(a={self.action}, N={self.visit_count}, Q_v={self.mean_value:.2f}, best={self.max_value:.2f}, ubc={self.tree_policy_score():.2f})"
+                return f"(a={self.action}, N={self.visit_count}, Q_v={self.mean_value:.2f}, best={self.max_value:.2f}, {GymctsNeuralNode.score_variate}={self.tree_policy_score():.2f})"
             else:
                 return f"(N={self.visit_count}, Q_v={self.mean_value:.2f}, best={self.max_value:.2f}) [root]"

gymcts/gymcts_node.py CHANGED Viewed

@@ -61,7 +61,7 @@ class GymctsNode:
         if not colored:
             if not self.is_root():
-                return f"(a={self.action}, N={self.visit_count}, Q_v={self.mean_value:.2f}, best={self.max_value:.2f}, ubc={self.tree_policy_score():.2f})"
+                return f"(a={self.action}, N={self.visit_count}, Q_v={self.mean_value:.2f}, best={self.max_value:.2f}, {GymctsNode.score_variate}={self.tree_policy_score():.2f})"
             else:
                 return f"(N={self.visit_count}, Q_v={self.mean_value:.2f}, best={self.max_value:.2f}) [root]"
@@ -102,7 +102,7 @@ class GymctsNode:
                  f"{p}N{e}={colorful_value(self.visit_count)}, "
                  f"{p}Q_v{e}={ccu.wrap_with_color_scale(s=mean_val, value=self.mean_value, min_val=root_node.min_value, max_val=root_node.max_value)}, "
                  f"{p}best{e}={colorful_value(self.max_value)}") +
-                (f", {p}ubc{e}={colorful_value(self.tree_policy_score())})" if not self.is_root() else ")"))
+                (f", {p}{GymctsNode.score_variate}{e}={colorful_value(self.tree_policy_score())})" if not self.is_root() else ")"))
     def traverse_nodes(self) -> Generator[TGymctsNode, None, None]:
         """
@@ -326,16 +326,20 @@ class GymctsNode:
             raise ValueError("ucb_score can only be called on non-root nodes")
         c = GymctsNode.ubc_c # default is 0.707
+        assert 0 <= GymctsNode.best_action_weight <= 1
+        b = GymctsNode.best_action_weight
+        exploitation_term = 0.0 if self.visit_count == 0 else (1 - b) * self.mean_value + b * self.max_value
         if GymctsNode.score_variate == "UCT_v0":
             if self.visit_count == 0:
                 return float("inf")
-            return self.mean_value + c * math.sqrt( 2 * math.log(self.parent.visit_count) / (self.visit_count))
+            return exploitation_term + c * math.sqrt( 2 * math.log(self.parent.visit_count) / (self.visit_count))
         if GymctsNode.score_variate == "UCT_v1":
-            return self.mean_value + c * math.sqrt( math.log(self.parent.visit_count) / (1 + self.visit_count))
+            return exploitation_term + c * math.sqrt( math.log(self.parent.visit_count) / (1 + self.visit_count))
         if GymctsNode.score_variate == "UCT_v2":
-            return self.mean_value + c * math.sqrt(self.parent.visit_count) / (1 + self.visit_count)
+            return exploitation_term + c * math.sqrt(self.parent.visit_count) / (1 + self.visit_count)
         raise ValueError(f"unknown score variate: {GymctsNode.score_variate}. ")

{gymcts-1.3.0.dist-info → gymcts-1.4.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: gymcts
-Version: 1.3.0
+Version: 1.4.1
 Summary: A minimalistic implementation of the Monte Carlo Tree Search algorithm for planning problems fomulated as gymnaisum reinforcement learning environments.
 Author: Alexander Nasuta
 Author-email: Alexander Nasuta <alexander.nasuta@wzl-iqs.rwth-aachen.de>

{gymcts-1.3.0.dist-info → gymcts-1.4.1.dist-info}/RECORD RENAMED Viewed

@@ -1,16 +1,16 @@
 gymcts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 gymcts/colorful_console_utils.py,sha256=n7nymC8kKZnA_8nXcdn201NAzjZjgEHfKpbBcnl4oAE,5891
 gymcts/gymcts_action_history_wrapper.py,sha256=7-p17Fgb80SRCBaCm6G8SJrEPsl2Y4aIO3InviuQP08,6993
-gymcts/gymcts_agent.py,sha256=OAcN2-mFCR2AVJrRZlRtROF_zHk90SIM-uAebKektIc,10768
+gymcts/gymcts_agent.py,sha256=FzMPjHXyKN6enNJubmYEouvb0wBbE1-bpxuLuW4J1gU,10960
 gymcts/gymcts_deepcopy_wrapper.py,sha256=lCCT5-6JVCwUCP__4uPMMkT5HnO2JWm2ebzJ69zXp9c,6792
 gymcts/gymcts_distributed_agent.py,sha256=Ha9UBQvFjoErfMWvPyN0JcTYz-JaiJ4eWjLMikp9Yhs,11569
 gymcts/gymcts_env_abc.py,sha256=iqrFNNSa-kZyAGk1UN2BjkdkV6NufAkYJT8d7PlQ07E,2525
-gymcts/gymcts_neural_agent.py,sha256=urYGA5D6idChPke8Ac9zqhKy2NqkJzt3Zt-j8V6OpuQ,15785
-gymcts/gymcts_node.py,sha256=-YKfK5fryPteCp-UTsAgzFVIBucZdXPMbXHCIb6mS24,13151
+gymcts/gymcts_neural_agent.py,sha256=kP2DwoZ6nM4lUYqePhEvUIAqmZegB0oxQ3uMtMFj-Hk,16049
+gymcts/gymcts_node.py,sha256=KAR5y1MrT8c_7ZXwTuCj77B7DiERDfHplF8avs76JHU,13410
 gymcts/gymcts_tree_plotter.py,sha256=PR6C7q9Q4kuz1aLGyD7-aZsxk3RqlHZpOqmOiRpCyK0,3547
 gymcts/logger.py,sha256=RI7B9cvbBGrj0_QIAI77wihzuu2tPG_-z9GM2Mw5aHE,926
-gymcts-1.3.0.dist-info/licenses/LICENSE,sha256=UGe75WojDiw_77SEnK2aysEDlElRlkWie7U7NaAFx00,1072
-gymcts-1.3.0.dist-info/METADATA,sha256=pyhdSu_PAMi9IbVeSsHU0EcJSasAMttrtz-pKIjbePw,23864
-gymcts-1.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-gymcts-1.3.0.dist-info/top_level.txt,sha256=E8MoLsPimUPD0H1Y6lum4TVe-lhSDAyBAXGrkYIT52w,7
-gymcts-1.3.0.dist-info/RECORD,,
+gymcts-1.4.1.dist-info/licenses/LICENSE,sha256=UGe75WojDiw_77SEnK2aysEDlElRlkWie7U7NaAFx00,1072
+gymcts-1.4.1.dist-info/METADATA,sha256=DsGxePuo5m6SgNPRjrkuxUK-em2IWBm0b20ET-CVdP0,23864
+gymcts-1.4.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+gymcts-1.4.1.dist-info/top_level.txt,sha256=E8MoLsPimUPD0H1Y6lum4TVe-lhSDAyBAXGrkYIT52w,7
+gymcts-1.4.1.dist-info/RECORD,,

{gymcts-1.3.0.dist-info → gymcts-1.4.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{gymcts-1.3.0.dist-info → gymcts-1.4.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{gymcts-1.3.0.dist-info → gymcts-1.4.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

gymcts 1.3.0__py3-none-any.whl → 1.4.1__py3-none-any.whl

gymcts 1.3.0py3-none-any.whl → 1.4.1py3-none-any.whl