PyPI - gymcts - Versions diffs - 1.4.2__py3-none-any.whl → 1.4.4__py3-none-any.whl - Mend

gymcts 1.4.2py3-none-any.whl → 1.4.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

gymcts/gymcts_neural_agent.py CHANGED Viewed

@@ -55,14 +55,11 @@ class GraphJspNeuralGYMCTSWrapper(GymctsABC, gym.Wrapper):
     def get_state(self) -> Any:
         return env.unwrapped.get_action_history()
     def action_masks(self) -> np.ndarray | None:
         """Return the action mask for the current state."""
         return self.env.unwrapped.valid_action_mask()
 class GymctsNeuralNode(GymctsNode):
     PUCT_v3_mu = 0.95
@@ -146,13 +143,12 @@ class GymctsNeuralNode(GymctsNode):
             env_reference: GymctsABC,
             prior_selection_score: float,
             observation: np.ndarray | None = None,
-        ):
+    ):
         super().__init__(action, parent, env_reference)
         self._obs = observation
         self._selection_score_prior = prior_selection_score
     def tree_policy_score(self) -> float:
         # call the superclass (GymctsNode) for ucb_score
         c = GymctsNode.ubc_c
@@ -167,12 +163,13 @@ class GymctsNeuralNode(GymctsNode):
         b = GymctsNode.best_action_weight
         exploitation_term = 0.0 if self.visit_count == 0 else (1 - b) * self.mean_value + b * self.max_value
         if GymctsNeuralNode.score_variate == "PUCT_v0":
             return exploitation_term + c * p_sa * math.sqrt(n_s) / (1 + n_sa)
         elif GymctsNeuralNode.score_variate == "PUCT_v1":
             return exploitation_term + c * p_sa * math.sqrt(2 * math.log(n_s) / (n_sa))
         elif GymctsNeuralNode.score_variate == "PUCT_v2":
+            if n_sa == 0:
+                return float("inf")  # Avoid division by zero
             return exploitation_term + c * p_sa * math.sqrt(n_s) / n_sa
         elif GymctsNeuralNode.score_variate == "PUCT_v3":
             return exploitation_term + c * (p_sa ** GymctsNeuralNode.PUCT_v3_mu) * math.sqrt(n_s / (1 + n_sa))
@@ -200,11 +197,10 @@ class GymctsNeuralNode(GymctsNode):
             c2 = GymctsNeuralNode.MuZero_c2
             return exploitation_term + c * p_sa * math.sqrt(n_s) / (1 + n_sa) * (c1 + math.log((n_s + c2 + 1) / c2))
-        exploration_term = self._selection_score_prior * c * math.sqrt(math.log(self.parent.visit_count) / (self.visit_count)) if self.visit_count > 0 else float("inf")
+        exploration_term = self._selection_score_prior * c * math.sqrt(
+            math.log(self.parent.visit_count) / (self.visit_count)) if self.visit_count > 0 else float("inf")
         return self.mean_value + exploration_term
     def get_best_action(self) -> int:
         """
         Returns the best action of the node. The best action is the action with the highest score.
@@ -214,7 +210,6 @@ class GymctsNeuralNode(GymctsNode):
         """
         return max(self.children.values(), key=lambda child: child.max_value).action
     def __str__(self, colored=False, action_space_n=None) -> str:
         """
         Returns a string representation of the node. The string representation is used for visualisation purposes.
@@ -263,14 +258,13 @@ class GymctsNeuralNode(GymctsNode):
         root_node = self.get_root()
         mean_val = f"{self.mean_value:.2f}"
         return ((f"("
                  f"{p}a{e}={ccu.wrap_evenly_spaced_color(s=self.action, n_of_item=self.action, n_classes=action_space_n)}, "
                  f"{p}N{e}={colorful_value(self.visit_count)}, "
                  f"{p}Q_v{e}={ccu.wrap_with_color_scale(s=mean_val, value=self.mean_value, min_val=root_node.min_value, max_val=root_node.max_value)}, "
                  f"{p}best{e}={colorful_value(self.max_value)}") +
-                (f", {p}{GymctsNeuralNode.score_variate}{e}={colorful_value(self.tree_policy_score())})" if not self.is_root() else ")"))
+                (
+                    f", {p}{GymctsNeuralNode.score_variate}{e}={colorful_value(self.tree_policy_score())})" if not self.is_root() else ")"))
 class GymctsNeuralAgent(GymctsAgent):
@@ -282,15 +276,15 @@ class GymctsNeuralAgent(GymctsAgent):
                  score_variate: Literal[
                      "PUCT_v0",
                      "PUCT_v1",
-                     "PUTC_v2",
-                     "PUTC_v3",
-                     "PUTC_v4",
-                     "PUTC_v5",
-                     "PUTC_v6",
-                     "PUTC_v7",
-                     "PUTC_v8",
-                     "PUTC_v9",
-                     "PUTC_v10",
+                     "PUCT_v2",
+                     "PUCT_v3",
+                     "PUCT_v4",
+                     "PUCT_v5",
+                     "PUCT_v6",
+                     "PUCT_v7",
+                     "PUCT_v8",
+                     "PUCT_v9",
+                     "PUCT_v10",
                      "MuZero_v0",
                      "MuZero_v1",
                  ] = "PUCT_v0",
@@ -304,15 +298,23 @@ class GymctsNeuralAgent(GymctsAgent):
             **kwargs
         )
         if score_variate not in [
-            "PUCT_v0", "PUCT_v1", "PUTC_v2",
-            "PUTC_v3", "PUTC_v4", "PUTC_v5",
-            "PUTC_v6", "PUTC_v7", "PUTC_v8",
-            "PUTC_v9", "PUTC_v10",
-            "MuZero_v0", "MuZero_v1"
+            "PUCT_v0",
+            "PUCT_v1",
+            "PUCT_v2",
+            "PUCT_v3",
+            "PUCT_v4",
+            "PUCT_v5",
+            "PUCT_v6",
+            "PUCT_v7",
+            "PUCT_v8",
+            "PUCT_v9",
+            "PUCT_v10",
+            "MuZero_v0",
+            "MuZero_v1",
         ]:
             raise ValueError(f"Invalid score_variate: {score_variate}. Must be one of: "
-                             f"PUCT_v0, PUCT_v1, PUTC_v2, PUTC_v3, PUTC_v4, PUTC_v5, "
-                             f"PUTC_v6, PUTC_v7, PUTC_v8, PUTC_v9, PUTC_v10, MuZero_v0, MuZero_v1")
+                             f"['PUCT_v0', 'PUCT_v1', 'PUCT_v2', 'PUCT_v3', 'PUCT_v4', 'PUCT_v5', "
+                             f"'PUCT_v6', 'PUCT_v7', 'PUCT_v8', 'PUCT_v9', 'PUCT_v10', 'MuZero_v0', 'MuZero_v1']")
         GymctsNeuralNode.score_variate = score_variate
         if model_kwargs is None:
@@ -336,22 +338,17 @@ class GymctsNeuralAgent(GymctsAgent):
         env = ActionMasker(env, action_mask_fn=mask_fn)
         model_kwargs = {
-            "policy": MaskableActorCriticPolicy,
-            "env": env,
-            "verbose": 1,
-        } | model_kwargs
+                           "policy": MaskableActorCriticPolicy,
+                           "env": env,
+                           "verbose": 1,
+                       } | model_kwargs
         self._model = sb3_contrib.MaskablePPO(**model_kwargs)
-    def learn(self, total_timesteps:int, **kwargs) -> None:
+    def learn(self, total_timesteps: int, **kwargs) -> None:
         """Learn from the environment using the MaskablePPO model."""
         self._model.learn(total_timesteps=total_timesteps, **kwargs)
     def expand_node(self, node: GymctsNeuralNode) -> None:
         log.debug(f"expanding node: {node}")
         # EXPANSION STRATEGY
@@ -395,7 +392,6 @@ class GymctsNeuralAgent(GymctsAgent):
             if prob == 0.0:
                 continue
             assert action in node.valid_actions, f"Action {action} is not in valid actions: {node.valid_actions}"
             obs, reward, terminal, truncated, _ = self.env.step(action)
@@ -411,9 +407,6 @@ class GymctsNeuralAgent(GymctsAgent):
         # print(f"Expanded node {node} with {len(node.children)} children.")
 if __name__ == '__main__':
     log.setLevel(20)
@@ -426,14 +419,13 @@ if __name__ == '__main__':
         "reward_function": "nasuta",
     }
     env = DisjunctiveGraphJspEnv(**env_kwargs)
     env.reset()
     env = GraphJspNeuralGYMCTSWrapper(env)
     import torch
     model_kwargs = {
         "gamma": 0.99013,
         "gae_lambda": 0.9,
@@ -467,7 +459,6 @@ if __name__ == '__main__':
     agent.learn(total_timesteps=10_000)
     agent.solve()
     actions = agent.solve(render_tree_after_step=True)
@@ -477,9 +468,3 @@ if __name__ == '__main__':
     env.render()
     makespan = env.unwrapped.get_makespan()
     print(f"makespan: {makespan}")

{gymcts-1.4.2.dist-info → gymcts-1.4.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: gymcts
-Version: 1.4.2
+Version: 1.4.4
 Summary: A minimalistic implementation of the Monte Carlo Tree Search algorithm for planning problems fomulated as gymnaisum reinforcement learning environments.
 Author: Alexander Nasuta
 Author-email: Alexander Nasuta <alexander.nasuta@wzl-iqs.rwth-aachen.de>

{gymcts-1.4.2.dist-info → gymcts-1.4.4.dist-info}/RECORD RENAMED Viewed

@@ -5,12 +5,12 @@ gymcts/gymcts_agent.py,sha256=FzMPjHXyKN6enNJubmYEouvb0wBbE1-bpxuLuW4J1gU,10960
 gymcts/gymcts_deepcopy_wrapper.py,sha256=lCCT5-6JVCwUCP__4uPMMkT5HnO2JWm2ebzJ69zXp9c,6792
 gymcts/gymcts_distributed_agent.py,sha256=Ha9UBQvFjoErfMWvPyN0JcTYz-JaiJ4eWjLMikp9Yhs,11569
 gymcts/gymcts_env_abc.py,sha256=iqrFNNSa-kZyAGk1UN2BjkdkV6NufAkYJT8d7PlQ07E,2525
-gymcts/gymcts_neural_agent.py,sha256=FAXyskLhhsghi-m67ZgpKODHNQaWESuWSXyAQ5CRMkc,16049
+gymcts/gymcts_neural_agent.py,sha256=_PV_lNYVyZDjrPBRYK-DWiQRwUGnleAt3SKbwCZKCWU,16326
 gymcts/gymcts_node.py,sha256=KAR5y1MrT8c_7ZXwTuCj77B7DiERDfHplF8avs76JHU,13410
 gymcts/gymcts_tree_plotter.py,sha256=PR6C7q9Q4kuz1aLGyD7-aZsxk3RqlHZpOqmOiRpCyK0,3547
 gymcts/logger.py,sha256=RI7B9cvbBGrj0_QIAI77wihzuu2tPG_-z9GM2Mw5aHE,926
-gymcts-1.4.2.dist-info/licenses/LICENSE,sha256=UGe75WojDiw_77SEnK2aysEDlElRlkWie7U7NaAFx00,1072
-gymcts-1.4.2.dist-info/METADATA,sha256=iRulwz_JYvhgIqSNSsqiIzVmAFCIPABIZcu_qHocVDc,23864
-gymcts-1.4.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-gymcts-1.4.2.dist-info/top_level.txt,sha256=E8MoLsPimUPD0H1Y6lum4TVe-lhSDAyBAXGrkYIT52w,7
-gymcts-1.4.2.dist-info/RECORD,,
+gymcts-1.4.4.dist-info/licenses/LICENSE,sha256=UGe75WojDiw_77SEnK2aysEDlElRlkWie7U7NaAFx00,1072
+gymcts-1.4.4.dist-info/METADATA,sha256=y_-_ktxyZpaLdB0i81ggKepZNycG-P1jiqqadBMwSzI,23864
+gymcts-1.4.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+gymcts-1.4.4.dist-info/top_level.txt,sha256=E8MoLsPimUPD0H1Y6lum4TVe-lhSDAyBAXGrkYIT52w,7
+gymcts-1.4.4.dist-info/RECORD,,

{gymcts-1.4.2.dist-info → gymcts-1.4.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{gymcts-1.4.2.dist-info → gymcts-1.4.4.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{gymcts-1.4.2.dist-info → gymcts-1.4.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

gymcts 1.4.2__py3-none-any.whl → 1.4.4__py3-none-any.whl

gymcts 1.4.2py3-none-any.whl → 1.4.4py3-none-any.whl