PyPI - gymcts - Versions diffs - 1.3.0__tar.gz → 1.4.0__tar.gz - Mend

gymcts 1.3.0tar.gz → 1.4.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

{gymcts-1.3.0/src/gymcts.egg-info → gymcts-1.4.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: gymcts
-Version: 1.3.0
+Version: 1.4.0
 Summary: A minimalistic implementation of the Monte Carlo Tree Search algorithm for planning problems fomulated as gymnaisum reinforcement learning environments.
 Author: Alexander Nasuta
 Author-email: Alexander Nasuta <alexander.nasuta@wzl-iqs.rwth-aachen.de>

{gymcts-1.3.0 → gymcts-1.4.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "gymcts"
-version = "1.3.0"
+version = "1.4.0"
 description = "A minimalistic implementation of the Monte Carlo Tree Search algorithm for planning problems fomulated as gymnaisum reinforcement learning environments."
 readme = "README.md"
 authors = [{ name = "Alexander Nasuta", email = "alexander.nasuta@wzl-iqs.rwth-aachen.de" }]

{gymcts-1.3.0 → gymcts-1.4.0}/src/gymcts/gymcts_agent.py RENAMED Viewed

@@ -49,6 +49,7 @@ class GymctsAgent:
                  calc_number_of_simulations_per_step: Callable[[int,int], int] = None,
                  score_variate: Literal["UCT_v0", "UCT_v1", "UCT_v2",] = "UCT_v0",
                  best_action_weight=None,
+                 keep_whole_tree_till_initial_root: bool = False,
                  ):
         # check if action space of env is discrete
         if not isinstance(env.action_space, gym.spaces.Discrete):
@@ -79,6 +80,7 @@ class GymctsAgent:
         self.env = env
         self.clear_mcts_tree_after_step = clear_mcts_tree_after_step
+        self.keep_whole_tree_till_initial_root = keep_whole_tree_till_initial_root
         self.search_root_node = GymctsNode(
             action=None,
@@ -190,7 +192,7 @@ class GymctsAgent:
             # we also need to reset the children of the current node
             # this is done by calling the reset method
             next_node.reset()
-        else:
+        elif not self.keep_whole_tree_till_initial_root:
             next_node.remove_parent()
         self.search_root_node = next_node

{gymcts-1.3.0 → gymcts-1.4.0}/src/gymcts/gymcts_neural_agent.py RENAMED Viewed

@@ -162,37 +162,43 @@ class GymctsNeuralNode(GymctsNode):
         p_sa = self._selection_score_prior
         n_s = self.parent.visit_count
         n_sa = self.visit_count
+        assert 0 <= GymctsNode.best_action_weight <= 1
+        b = GymctsNode.best_action_weight
+        exploitation_term = 0.0 if self.visit_count == 0 else (1 - b) * self.mean_value + b * self.max_value
         if GymctsNeuralNode.score_variate == "PUCT_v0":
-            return self.mean_value + c * p_sa * math.sqrt(n_s) / (1 + n_sa)
+            return exploitation_term + c * p_sa * math.sqrt(n_s) / (1 + n_sa)
         elif GymctsNeuralNode.score_variate == "PUCT_v1":
-            return self.mean_value + c * p_sa * math.sqrt(2 * math.log(n_s) / (n_sa))
+            return exploitation_term + c * p_sa * math.sqrt(2 * math.log(n_s) / (n_sa))
         elif GymctsNeuralNode.score_variate == "PUCT_v2":
-            return self.mean_value + c * p_sa * math.sqrt(n_s) / n_sa
+            return exploitation_term + c * p_sa * math.sqrt(n_s) / n_sa
         elif GymctsNeuralNode.score_variate == "PUCT_v3":
-            return self.mean_value + c * (p_sa ** GymctsNeuralNode.PUCT_v3_mu) * math.sqrt(n_s / (1 + n_sa))
+            return exploitation_term + c * (p_sa ** GymctsNeuralNode.PUCT_v3_mu) * math.sqrt(n_s / (1 + n_sa))
         elif GymctsNeuralNode.score_variate == "PUCT_v4":
-            return self.mean_value + c * (p_sa / (1 + n_sa))
+            return exploitation_term + c * (p_sa / (1 + n_sa))
         elif GymctsNeuralNode.score_variate == "PUCT_v5":
-            return self.mean_value + c * p_sa * (math.sqrt(n_s) + 1) / (n_sa + 1)
+            return exploitation_term + c * p_sa * (math.sqrt(n_s) + 1) / (n_sa + 1)
         elif GymctsNeuralNode.score_variate == "PUCT_v6":
-            return self.mean_value + c * p_sa * n_s / (1 + n_sa)
+            return exploitation_term + c * p_sa * n_s / (1 + n_sa)
         elif GymctsNeuralNode.score_variate == "PUCT_v7":
             epsilon = 1e-8
-            return self.mean_value + c * p_sa * (math.sqrt(n_s) + epsilon) / (n_sa + 1)
+            return exploitation_term + c * p_sa * (math.sqrt(n_s) + epsilon) / (n_sa + 1)
         elif GymctsNeuralNode.score_variate == "PUCT_v8":
-            return self.mean_value + c * p_sa * math.sqrt((math.log(n_s) + 1) / (1 + n_sa))
+            return exploitation_term + c * p_sa * math.sqrt((math.log(n_s) + 1) / (1 + n_sa))
         elif GymctsNeuralNode.score_variate == "PUCT_v9":
-            return self.mean_value + c * p_sa * math.sqrt(n_s / (1 + n_sa))
+            return exploitation_term + c * p_sa * math.sqrt(n_s / (1 + n_sa))
         elif GymctsNeuralNode.score_variate == "PUCT_v10":
-            return self.mean_value + c * p_sa * math.sqrt(math.log(n_s) / (1 + n_sa))
+            return exploitation_term + c * p_sa * math.sqrt(math.log(n_s) / (1 + n_sa))
         elif GymctsNeuralNode.score_variate == "MuZero_v0":
             c1 = GymctsNeuralNode.MuZero_c1
             c2 = GymctsNeuralNode.MuZero_c2
-            return self.mean_value + c * p_sa * math.sqrt(n_s) / (1 + n_sa) * (c1 + math.log((n_s + c2 + 1) / c2))
+            return exploitation_term + c * p_sa * math.sqrt(n_s) / (1 + n_sa) * (c1 + math.log((n_s + c2 + 1) / c2))
         elif GymctsNeuralNode.score_variate == "MuZero_v1":
             c1 = GymctsNeuralNode.MuZero_c1
             c2 = GymctsNeuralNode.MuZero_c2
-            return self.mean_value + c * p_sa * math.sqrt(n_s) / (1 + n_sa) * (c1 + math.log((n_s + c2 + 1) / c2))
+            return exploitation_term + c * p_sa * math.sqrt(n_s) / (1 + n_sa) * (c1 + math.log((n_s + c2 + 1) / c2))
         exploration_term = self._selection_score_prior * c * math.sqrt(math.log(self.parent.visit_count) / (self.visit_count)) if self.visit_count > 0 else float("inf")

{gymcts-1.3.0 → gymcts-1.4.0}/src/gymcts/gymcts_node.py RENAMED Viewed

@@ -326,16 +326,20 @@ class GymctsNode:
             raise ValueError("ucb_score can only be called on non-root nodes")
         c = GymctsNode.ubc_c # default is 0.707
+        assert 0 <= GymctsNode.best_action_weight <= 1
+        b = GymctsNode.best_action_weight
+        exploitation_term = 0.0 if self.visit_count == 0 else (1 - b) * self.mean_value + b * self.max_value
         if GymctsNode.score_variate == "UCT_v0":
             if self.visit_count == 0:
                 return float("inf")
-            return self.mean_value + c * math.sqrt( 2 * math.log(self.parent.visit_count) / (self.visit_count))
+            return exploitation_term + c * math.sqrt( 2 * math.log(self.parent.visit_count) / (self.visit_count))
         if GymctsNode.score_variate == "UCT_v1":
-            return self.mean_value + c * math.sqrt( math.log(self.parent.visit_count) / (1 + self.visit_count))
+            return exploitation_term + c * math.sqrt( math.log(self.parent.visit_count) / (1 + self.visit_count))
         if GymctsNode.score_variate == "UCT_v2":
-            return self.mean_value + c * math.sqrt(self.parent.visit_count) / (1 + self.visit_count)
+            return exploitation_term + c * math.sqrt(self.parent.visit_count) / (1 + self.visit_count)
         raise ValueError(f"unknown score variate: {GymctsNode.score_variate}. ")

{gymcts-1.3.0 → gymcts-1.4.0/src/gymcts.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: gymcts
-Version: 1.3.0
+Version: 1.4.0
 Summary: A minimalistic implementation of the Monte Carlo Tree Search algorithm for planning problems fomulated as gymnaisum reinforcement learning environments.
 Author: Alexander Nasuta
 Author-email: Alexander Nasuta <alexander.nasuta@wzl-iqs.rwth-aachen.de>

{gymcts-1.3.0 → gymcts-1.4.0}/tests/test_number_of_visits.py RENAMED Viewed

@@ -4,7 +4,7 @@ from gymcts.gymcts_agent import GymctsAgent
 def test_number_of_visits_without_clearing_root(graph_matrix_env_naive_wrapper_singe_job_jsp_instance):
     env = graph_matrix_env_naive_wrapper_singe_job_jsp_instance
-    agent = GymctsAgent(env=env, clear_mcts_tree_after_step=False)
+    agent = GymctsAgent(env=env, clear_mcts_tree_after_step=False, keep_whole_tree_till_initial_root=True)
     assert agent.search_root_node.visit_count == 0
     agent.vanilla_mcts_search(search_start_node=agent.search_root_node, num_simulations=10)
@@ -20,7 +20,7 @@ def test_number_of_visits_without_clearing_root(graph_matrix_env_naive_wrapper_s
 def test_number_of_visits_without_clearing(graph_matrix_env_naive_wrapper_singe_job_jsp_instance):
     env = graph_matrix_env_naive_wrapper_singe_job_jsp_instance
-    agent = GymctsAgent(env=env, clear_mcts_tree_after_step=False)
+    agent = GymctsAgent(env=env, clear_mcts_tree_after_step=False, keep_whole_tree_till_initial_root=True)
     assert agent.search_root_node.visit_count == 0
     actions = agent.solve(num_simulations_per_step=10)
@@ -43,7 +43,7 @@ def test_number_of_visits_without_clearing_root_dynamic_step_size(graph_matrix_e
     env = graph_matrix_env_naive_wrapper_singe_job_jsp_instance
-    agent = GymctsAgent(env=env, clear_mcts_tree_after_step=False)
+    agent = GymctsAgent(env=env, clear_mcts_tree_after_step=False, keep_whole_tree_till_initial_root=True)
     tree_root = agent.search_root_node
@@ -89,6 +89,7 @@ def test_number_of_visits_with_clearing_root2(graph_matrix_env_naive_wrapper_two
     agent = GymctsAgent(
         env=env,
         clear_mcts_tree_after_step=False,
+        keep_whole_tree_till_initial_root=True,
         number_of_simulations_per_step=50
     )