gymcts 1.2.1__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gymcts/gymcts_agent.py +53 -8
- gymcts/gymcts_env_abc.py +12 -2
- gymcts/gymcts_neural_agent.py +485 -0
- gymcts/gymcts_node.py +80 -9
- {gymcts-1.2.1.dist-info → gymcts-1.4.0.dist-info}/METADATA +9 -5
- gymcts-1.4.0.dist-info/RECORD +16 -0
- {gymcts-1.2.1.dist-info → gymcts-1.4.0.dist-info}/WHEEL +1 -1
- gymcts-1.2.1.dist-info/RECORD +0 -15
- {gymcts-1.2.1.dist-info → gymcts-1.4.0.dist-info}/licenses/LICENSE +0 -0
- {gymcts-1.2.1.dist-info → gymcts-1.4.0.dist-info}/top_level.txt +0 -0
gymcts/gymcts_agent.py
CHANGED
|
@@ -2,7 +2,7 @@ import copy
|
|
|
2
2
|
import random
|
|
3
3
|
import gymnasium as gym
|
|
4
4
|
|
|
5
|
-
from typing import TypeVar, Any, SupportsFloat, Callable
|
|
5
|
+
from typing import TypeVar, Any, SupportsFloat, Callable, Literal
|
|
6
6
|
|
|
7
7
|
from gymcts.gymcts_env_abc import GymctsABC
|
|
8
8
|
from gymcts.gymcts_deepcopy_wrapper import DeepCopyMCTSGymEnvWrapper
|
|
@@ -11,7 +11,9 @@ from gymcts.gymcts_tree_plotter import _generate_mcts_tree
|
|
|
11
11
|
|
|
12
12
|
from gymcts.logger import log
|
|
13
13
|
|
|
14
|
-
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
|
|
15
17
|
|
|
16
18
|
|
|
17
19
|
class GymctsAgent:
|
|
@@ -24,17 +26,51 @@ class GymctsAgent:
|
|
|
24
26
|
search_root_node: GymctsNode # NOTE: this is not the same as the root of the tree!
|
|
25
27
|
clear_mcts_tree_after_step: bool
|
|
26
28
|
|
|
29
|
+
|
|
30
|
+
# (num_simulations: int, step_idx: int) -> int
|
|
31
|
+
@staticmethod
|
|
32
|
+
def calc_number_of_simulations_per_step(num_simulations: int, step_idx: int) -> int:
|
|
33
|
+
"""
|
|
34
|
+
A function that returns a constant number of simulations per step.
|
|
35
|
+
|
|
36
|
+
:param num_simulations: The number of simulations to return.
|
|
37
|
+
:param step_idx: The current step index (not used in this function).
|
|
38
|
+
:return: A callable that takes an environment as input and returns the constant number of simulations.
|
|
39
|
+
"""
|
|
40
|
+
return num_simulations
|
|
41
|
+
|
|
27
42
|
def __init__(self,
|
|
28
43
|
env: GymctsABC,
|
|
29
44
|
clear_mcts_tree_after_step: bool = True,
|
|
30
45
|
render_tree_after_step: bool = False,
|
|
31
46
|
render_tree_max_depth: int = 2,
|
|
32
47
|
number_of_simulations_per_step: int = 25,
|
|
33
|
-
exclude_unvisited_nodes_from_render: bool = False
|
|
48
|
+
exclude_unvisited_nodes_from_render: bool = False,
|
|
49
|
+
calc_number_of_simulations_per_step: Callable[[int,int], int] = None,
|
|
50
|
+
score_variate: Literal["UCT_v0", "UCT_v1", "UCT_v2",] = "UCT_v0",
|
|
51
|
+
best_action_weight=None,
|
|
52
|
+
keep_whole_tree_till_initial_root: bool = False,
|
|
34
53
|
):
|
|
35
54
|
# check if action space of env is discrete
|
|
36
55
|
if not isinstance(env.action_space, gym.spaces.Discrete):
|
|
37
56
|
raise ValueError("Action space must be discrete.")
|
|
57
|
+
if calc_number_of_simulations_per_step is not None:
|
|
58
|
+
# check if the provided function is callable
|
|
59
|
+
if not callable(calc_number_of_simulations_per_step):
|
|
60
|
+
raise ValueError("calc_number_of_simulations_per_step must be a callable accepting two arguments: num_simulations and step_idx.")
|
|
61
|
+
# assign the provided function to the attribute
|
|
62
|
+
# it needs to be staticmethod to be used as a class attribute
|
|
63
|
+
print("Using provided calc_number_of_simulations_per_step function.")
|
|
64
|
+
self.calc_number_of_simulations_per_step = staticmethod(calc_number_of_simulations_per_step)
|
|
65
|
+
if score_variate not in ["UCT_v0", "UCT_v1", "UCT_v2"]:
|
|
66
|
+
raise ValueError("score_variate must be one of ['UCT_v0', 'UCT_v1', 'UCT_v2'].")
|
|
67
|
+
GymctsNode.score_variate = score_variate
|
|
68
|
+
|
|
69
|
+
if best_action_weight is not None:
|
|
70
|
+
if best_action_weight < 0 or best_action_weight > 1:
|
|
71
|
+
raise ValueError("best_action_weight must be in range [0, 1].")
|
|
72
|
+
GymctsNode.best_action_weight = best_action_weight
|
|
73
|
+
|
|
38
74
|
|
|
39
75
|
self.render_tree_after_step = render_tree_after_step
|
|
40
76
|
self.exclude_unvisited_nodes_from_render = exclude_unvisited_nodes_from_render
|
|
@@ -44,6 +80,7 @@ class GymctsAgent:
|
|
|
44
80
|
|
|
45
81
|
self.env = env
|
|
46
82
|
self.clear_mcts_tree_after_step = clear_mcts_tree_after_step
|
|
83
|
+
self.keep_whole_tree_till_initial_root = keep_whole_tree_till_initial_root
|
|
47
84
|
|
|
48
85
|
self.search_root_node = GymctsNode(
|
|
49
86
|
action=None,
|
|
@@ -65,8 +102,8 @@ class GymctsAgent:
|
|
|
65
102
|
# select child with highest UCB score
|
|
66
103
|
while not temp_node.is_leaf():
|
|
67
104
|
children = list(temp_node.children.values())
|
|
68
|
-
max_ucb_score = max(child.
|
|
69
|
-
best_children = [child for child in children if child.
|
|
105
|
+
max_ucb_score = max(child.tree_policy_score() for child in children)
|
|
106
|
+
best_children = [child for child in children if child.tree_policy_score() == max_ucb_score]
|
|
70
107
|
temp_node = random.choice(best_children)
|
|
71
108
|
log.debug(f"Selected leaf node: {temp_node}")
|
|
72
109
|
return temp_node
|
|
@@ -88,7 +125,6 @@ class GymctsAgent:
|
|
|
88
125
|
parent=node,
|
|
89
126
|
env_reference=self.env,
|
|
90
127
|
)
|
|
91
|
-
|
|
92
128
|
node.children = child_dict
|
|
93
129
|
|
|
94
130
|
def solve(self, num_simulations_per_step: int = None, render_tree_after_step: bool = None) -> list[int]:
|
|
@@ -104,13 +140,20 @@ class GymctsAgent:
|
|
|
104
140
|
|
|
105
141
|
action_list = []
|
|
106
142
|
|
|
143
|
+
idx = 0
|
|
107
144
|
while not current_node.terminal:
|
|
108
|
-
|
|
145
|
+
num_sims = self.calc_number_of_simulations_per_step(num_simulations_per_step, idx)
|
|
146
|
+
|
|
147
|
+
log.info(f"Performing MCTS step {idx} with {num_sims} simulations.")
|
|
148
|
+
|
|
149
|
+
next_action, current_node = self.perform_mcts_step(num_simulations=num_sims,
|
|
109
150
|
render_tree_after_step=render_tree_after_step)
|
|
110
|
-
log.info(f"selected action {next_action} after {
|
|
151
|
+
log.info(f"selected action {next_action} after {num_sims} simulations.")
|
|
111
152
|
action_list.append(next_action)
|
|
112
153
|
log.info(f"current action list: {action_list}")
|
|
113
154
|
|
|
155
|
+
idx += 1
|
|
156
|
+
|
|
114
157
|
log.info(f"Final action list: {action_list}")
|
|
115
158
|
# restore state of current node
|
|
116
159
|
return action_list
|
|
@@ -149,6 +192,8 @@ class GymctsAgent:
|
|
|
149
192
|
# we also need to reset the children of the current node
|
|
150
193
|
# this is done by calling the reset method
|
|
151
194
|
next_node.reset()
|
|
195
|
+
elif not self.keep_whole_tree_till_initial_root:
|
|
196
|
+
next_node.remove_parent()
|
|
152
197
|
|
|
153
198
|
self.search_root_node = next_node
|
|
154
199
|
|
gymcts/gymcts_env_abc.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
from typing import TypeVar, Any, SupportsFloat, Callable
|
|
2
2
|
from abc import ABC, abstractmethod
|
|
3
3
|
import gymnasium as gym
|
|
4
|
-
|
|
5
|
-
TSoloMCTSNode = TypeVar("TSoloMCTSNode", bound="SoloMCTSNode")
|
|
4
|
+
import numpy as np
|
|
6
5
|
|
|
7
6
|
|
|
8
7
|
class GymctsABC(ABC, gym.Env):
|
|
@@ -47,6 +46,17 @@ class GymctsABC(ABC, gym.Env):
|
|
|
47
46
|
"""
|
|
48
47
|
pass
|
|
49
48
|
|
|
49
|
+
@abstractmethod
|
|
50
|
+
def action_masks(self) -> np.ndarray | None:
|
|
51
|
+
"""
|
|
52
|
+
Returns a numpy array of action masks for the environment. The array should have the same length as the number
|
|
53
|
+
of actions in the action space. If an action is valid, the corresponding mask value should be 1, otherwise 0.
|
|
54
|
+
If no action mask is available, it should return None.
|
|
55
|
+
|
|
56
|
+
:return: a numpy array of action masks or None
|
|
57
|
+
"""
|
|
58
|
+
pass
|
|
59
|
+
|
|
50
60
|
@abstractmethod
|
|
51
61
|
def rollout(self) -> float:
|
|
52
62
|
"""
|
|
@@ -0,0 +1,485 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import sys
|
|
3
|
+
from typing import Any, Literal
|
|
4
|
+
|
|
5
|
+
import random
|
|
6
|
+
import math
|
|
7
|
+
import sb3_contrib
|
|
8
|
+
|
|
9
|
+
import gymnasium as gym
|
|
10
|
+
import numpy as np
|
|
11
|
+
|
|
12
|
+
from graph_jsp_env.disjunctive_graph_jsp_env import DisjunctiveGraphJspEnv
|
|
13
|
+
from jsp_instance_utils.instances import ft06, ft06_makespan
|
|
14
|
+
from sb3_contrib.common.maskable.distributions import MaskableCategoricalDistribution
|
|
15
|
+
from sb3_contrib.common.maskable.policies import MaskableActorCriticPolicy
|
|
16
|
+
from sb3_contrib.common.wrappers import ActionMasker
|
|
17
|
+
|
|
18
|
+
from gymcts.gymcts_agent import GymctsAgent
|
|
19
|
+
from gymcts.gymcts_env_abc import GymctsABC
|
|
20
|
+
from gymcts.gymcts_node import GymctsNode
|
|
21
|
+
|
|
22
|
+
from gymcts.logger import log
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class GraphJspNeuralGYMCTSWrapper(GymctsABC, gym.Wrapper):
|
|
26
|
+
|
|
27
|
+
def __init__(self, env: DisjunctiveGraphJspEnv):
|
|
28
|
+
gym.Wrapper.__init__(self, env)
|
|
29
|
+
|
|
30
|
+
def load_state(self, state: Any) -> None:
|
|
31
|
+
self.env.reset()
|
|
32
|
+
for action in state:
|
|
33
|
+
self.env.step(action)
|
|
34
|
+
|
|
35
|
+
def is_terminal(self) -> bool:
|
|
36
|
+
return self.env.unwrapped.is_terminal()
|
|
37
|
+
|
|
38
|
+
def get_valid_actions(self) -> list[int]:
|
|
39
|
+
return list(self.env.unwrapped.valid_actions())
|
|
40
|
+
|
|
41
|
+
def rollout(self) -> float:
|
|
42
|
+
terminal = env.is_terminal()
|
|
43
|
+
|
|
44
|
+
if terminal:
|
|
45
|
+
lower_bound = env.unwrapped.reward_function_parameters['scaling_divisor']
|
|
46
|
+
return - env.unwrapped.get_makespan() / lower_bound + 2
|
|
47
|
+
|
|
48
|
+
reward = 0
|
|
49
|
+
while not terminal:
|
|
50
|
+
action = random.choice(self.get_valid_actions())
|
|
51
|
+
obs, reward, terminal, truncated, _ = env.step(action)
|
|
52
|
+
|
|
53
|
+
return reward + 2
|
|
54
|
+
|
|
55
|
+
def get_state(self) -> Any:
|
|
56
|
+
return env.unwrapped.get_action_history()
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def action_masks(self) -> np.ndarray | None:
|
|
60
|
+
"""Return the action mask for the current state."""
|
|
61
|
+
return self.env.unwrapped.valid_action_mask()
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class GymctsNeuralNode(GymctsNode):
|
|
67
|
+
PUCT_v3_mu = 0.95
|
|
68
|
+
|
|
69
|
+
MuZero_c1 = 1.25
|
|
70
|
+
MuZero_c2 = 19652.0
|
|
71
|
+
|
|
72
|
+
"""
|
|
73
|
+
PUCT (Predictor + UCT) exploration terms:
|
|
74
|
+
|
|
75
|
+
PUCT_v0:
|
|
76
|
+
c * P(s, a) * √( N(s) / (1 + N(s,a)) )
|
|
77
|
+
|
|
78
|
+
PUCT_v1:
|
|
79
|
+
c * P(s, a) * √( 2 * ln(N(s)) / N(s,a) )
|
|
80
|
+
|
|
81
|
+
PUCT_v2:
|
|
82
|
+
c * P(s, a) * √( N(s) ) / N(s,a)
|
|
83
|
+
|
|
84
|
+
PUCT_v3:
|
|
85
|
+
c * P(s, a)^μ * √( N(s) / (1 + N(s,a)) )
|
|
86
|
+
|
|
87
|
+
PUCT_v4:
|
|
88
|
+
c * ( P(s, a) / (1 + N(s,a)) )
|
|
89
|
+
|
|
90
|
+
PUCT_v5:
|
|
91
|
+
c * P(s, a) * ( √(N(s)) + 1 ) / (N(s,a) + 1)
|
|
92
|
+
|
|
93
|
+
PUCT_v6:
|
|
94
|
+
c * P(s, a) * N(s) / (1 + N(s,a))
|
|
95
|
+
|
|
96
|
+
PUCT_v7:
|
|
97
|
+
c * P(s, a) * ( √(N(s)) + ε ) / (N(s,a) + 1)
|
|
98
|
+
|
|
99
|
+
PUCT_v8:
|
|
100
|
+
c * P(s, a) * √( (ln(N(s)) + 1) / (1 + N(s,a)) )
|
|
101
|
+
|
|
102
|
+
PUCT_v9:
|
|
103
|
+
c * P(s, a) * √( N(s) / (1 + N(s,a)) )
|
|
104
|
+
|
|
105
|
+
PUCT_v10:
|
|
106
|
+
c * P(s, a) * √( ln(N(s)) / (1 + N(s,a)) )
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
MuZero exploration terms:
|
|
110
|
+
|
|
111
|
+
MuZero_v0:
|
|
112
|
+
P(s, a) * √( N(s) / (1 + N(s,a)) ) * [ c₁ + ln( (N(s) + c₂ + 1) / c₂ ) ]
|
|
113
|
+
|
|
114
|
+
MuZero_v1:
|
|
115
|
+
P(s, a) * √( N(s) / (1 + N(s,a)) ) * [ c₁ + ln( (N(s) + c₂ + 1) / c₂ ) ]
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
Where:
|
|
119
|
+
- N(s): number of times state s has been visited
|
|
120
|
+
- N(s,a): number of times action a was taken from state s
|
|
121
|
+
- P(s,a): prior probability of selecting action a from state s
|
|
122
|
+
- c, c₁, c₂: exploration constants
|
|
123
|
+
- μ: exponent applied to P(s,a) in some variants
|
|
124
|
+
- ε: small constant to avoid division by zero (in PUCT 7)
|
|
125
|
+
"""
|
|
126
|
+
score_variate: Literal[
|
|
127
|
+
"PUCT_v0",
|
|
128
|
+
"PUCT_v1",
|
|
129
|
+
"PUTC_v2",
|
|
130
|
+
"PUTC_v3",
|
|
131
|
+
"PUTC_v4",
|
|
132
|
+
"PUTC_v5",
|
|
133
|
+
"PUTC_v6",
|
|
134
|
+
"PUTC_v7",
|
|
135
|
+
"PUTC_v8",
|
|
136
|
+
"PUTC_v9",
|
|
137
|
+
"PUTC_v10",
|
|
138
|
+
"MuZero_v0",
|
|
139
|
+
"MuZero_v1",
|
|
140
|
+
] = "PUCT_v0"
|
|
141
|
+
|
|
142
|
+
def __init__(
|
|
143
|
+
self,
|
|
144
|
+
action: int,
|
|
145
|
+
parent: 'GymctsNeuralNode',
|
|
146
|
+
env_reference: GymctsABC,
|
|
147
|
+
prior_selection_score: float,
|
|
148
|
+
observation: np.ndarray | None = None,
|
|
149
|
+
):
|
|
150
|
+
super().__init__(action, parent, env_reference)
|
|
151
|
+
|
|
152
|
+
self._obs = observation
|
|
153
|
+
self._selection_score_prior = prior_selection_score
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def tree_policy_score(self) -> float:
|
|
157
|
+
# call the superclass (GymctsNode) for ucb_score
|
|
158
|
+
c = GymctsNode.ubc_c
|
|
159
|
+
# the way alpha zero does it
|
|
160
|
+
# exploration_term = self._selection_score_prior * c * math.sqrt(math.log(self.parent.visit_count)) / (1 + self.visit_count)
|
|
161
|
+
# the way the vanilla gymcts does it
|
|
162
|
+
p_sa = self._selection_score_prior
|
|
163
|
+
n_s = self.parent.visit_count
|
|
164
|
+
n_sa = self.visit_count
|
|
165
|
+
|
|
166
|
+
assert 0 <= GymctsNode.best_action_weight <= 1
|
|
167
|
+
b = GymctsNode.best_action_weight
|
|
168
|
+
exploitation_term = 0.0 if self.visit_count == 0 else (1 - b) * self.mean_value + b * self.max_value
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
if GymctsNeuralNode.score_variate == "PUCT_v0":
|
|
172
|
+
return exploitation_term + c * p_sa * math.sqrt(n_s) / (1 + n_sa)
|
|
173
|
+
elif GymctsNeuralNode.score_variate == "PUCT_v1":
|
|
174
|
+
return exploitation_term + c * p_sa * math.sqrt(2 * math.log(n_s) / (n_sa))
|
|
175
|
+
elif GymctsNeuralNode.score_variate == "PUCT_v2":
|
|
176
|
+
return exploitation_term + c * p_sa * math.sqrt(n_s) / n_sa
|
|
177
|
+
elif GymctsNeuralNode.score_variate == "PUCT_v3":
|
|
178
|
+
return exploitation_term + c * (p_sa ** GymctsNeuralNode.PUCT_v3_mu) * math.sqrt(n_s / (1 + n_sa))
|
|
179
|
+
elif GymctsNeuralNode.score_variate == "PUCT_v4":
|
|
180
|
+
return exploitation_term + c * (p_sa / (1 + n_sa))
|
|
181
|
+
elif GymctsNeuralNode.score_variate == "PUCT_v5":
|
|
182
|
+
return exploitation_term + c * p_sa * (math.sqrt(n_s) + 1) / (n_sa + 1)
|
|
183
|
+
elif GymctsNeuralNode.score_variate == "PUCT_v6":
|
|
184
|
+
return exploitation_term + c * p_sa * n_s / (1 + n_sa)
|
|
185
|
+
elif GymctsNeuralNode.score_variate == "PUCT_v7":
|
|
186
|
+
epsilon = 1e-8
|
|
187
|
+
return exploitation_term + c * p_sa * (math.sqrt(n_s) + epsilon) / (n_sa + 1)
|
|
188
|
+
elif GymctsNeuralNode.score_variate == "PUCT_v8":
|
|
189
|
+
return exploitation_term + c * p_sa * math.sqrt((math.log(n_s) + 1) / (1 + n_sa))
|
|
190
|
+
elif GymctsNeuralNode.score_variate == "PUCT_v9":
|
|
191
|
+
return exploitation_term + c * p_sa * math.sqrt(n_s / (1 + n_sa))
|
|
192
|
+
elif GymctsNeuralNode.score_variate == "PUCT_v10":
|
|
193
|
+
return exploitation_term + c * p_sa * math.sqrt(math.log(n_s) / (1 + n_sa))
|
|
194
|
+
elif GymctsNeuralNode.score_variate == "MuZero_v0":
|
|
195
|
+
c1 = GymctsNeuralNode.MuZero_c1
|
|
196
|
+
c2 = GymctsNeuralNode.MuZero_c2
|
|
197
|
+
return exploitation_term + c * p_sa * math.sqrt(n_s) / (1 + n_sa) * (c1 + math.log((n_s + c2 + 1) / c2))
|
|
198
|
+
elif GymctsNeuralNode.score_variate == "MuZero_v1":
|
|
199
|
+
c1 = GymctsNeuralNode.MuZero_c1
|
|
200
|
+
c2 = GymctsNeuralNode.MuZero_c2
|
|
201
|
+
return exploitation_term + c * p_sa * math.sqrt(n_s) / (1 + n_sa) * (c1 + math.log((n_s + c2 + 1) / c2))
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
exploration_term = self._selection_score_prior * c * math.sqrt(math.log(self.parent.visit_count) / (self.visit_count)) if self.visit_count > 0 else float("inf")
|
|
205
|
+
return self.mean_value + exploration_term
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def get_best_action(self) -> int:
|
|
209
|
+
"""
|
|
210
|
+
Returns the best action of the node. The best action is the action with the highest score.
|
|
211
|
+
The best action is the action that has the highest score.
|
|
212
|
+
|
|
213
|
+
:return: the best action of the node.
|
|
214
|
+
"""
|
|
215
|
+
return max(self.children.values(), key=lambda child: child.max_value).action
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def __str__(self, colored=False, action_space_n=None) -> str:
|
|
219
|
+
"""
|
|
220
|
+
Returns a string representation of the node. The string representation is used for visualisation purposes.
|
|
221
|
+
It is used for example in the mcts tree visualisation functionality.
|
|
222
|
+
|
|
223
|
+
:param colored: true if the string representation should be colored, false otherwise. (ture is used by the mcts tree visualisation)
|
|
224
|
+
:param action_space_n: the number of actions in the action space. This is used for coloring the action in the string representation.
|
|
225
|
+
:return: a potentially colored string representation of the node.
|
|
226
|
+
"""
|
|
227
|
+
if not colored:
|
|
228
|
+
|
|
229
|
+
if not self.is_root():
|
|
230
|
+
return f"(a={self.action}, N={self.visit_count}, Q_v={self.mean_value:.2f}, best={self.max_value:.2f}, ubc={self.tree_policy_score():.2f})"
|
|
231
|
+
else:
|
|
232
|
+
return f"(N={self.visit_count}, Q_v={self.mean_value:.2f}, best={self.max_value:.2f}) [root]"
|
|
233
|
+
|
|
234
|
+
import gymcts.colorful_console_utils as ccu
|
|
235
|
+
|
|
236
|
+
if self.is_root():
|
|
237
|
+
return f"({ccu.CYELLOW}N{ccu.CEND}={self.visit_count}, {ccu.CYELLOW}Q_v{ccu.CEND}={self.mean_value:.2f}, {ccu.CYELLOW}best{ccu.CEND}={self.max_value:.2f})"
|
|
238
|
+
|
|
239
|
+
if action_space_n is None:
|
|
240
|
+
raise ValueError("action_space_n must be provided if colored is True")
|
|
241
|
+
|
|
242
|
+
p = ccu.CYELLOW
|
|
243
|
+
e = ccu.CEND
|
|
244
|
+
v = ccu.CCYAN
|
|
245
|
+
|
|
246
|
+
def colorful_value(value: float | int | None) -> str:
|
|
247
|
+
if value == None:
|
|
248
|
+
return f"{ccu.CGREY}None{e}"
|
|
249
|
+
color = ccu.CCYAN
|
|
250
|
+
if value == 0:
|
|
251
|
+
color = ccu.CRED
|
|
252
|
+
if value == float("inf"):
|
|
253
|
+
color = ccu.CGREY
|
|
254
|
+
if value == -float("inf"):
|
|
255
|
+
color = ccu.CGREY
|
|
256
|
+
|
|
257
|
+
if isinstance(value, float):
|
|
258
|
+
return f"{color}{value:.2f}{e}"
|
|
259
|
+
|
|
260
|
+
if isinstance(value, int):
|
|
261
|
+
return f"{color}{value}{e}"
|
|
262
|
+
|
|
263
|
+
root_node = self.get_root()
|
|
264
|
+
mean_val = f"{self.mean_value:.2f}"
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
return ((f"("
|
|
268
|
+
f"{p}a{e}={ccu.wrap_evenly_spaced_color(s=self.action, n_of_item=self.action, n_classes=action_space_n)}, "
|
|
269
|
+
f"{p}N{e}={colorful_value(self.visit_count)}, "
|
|
270
|
+
f"{p}Q_v{e}={ccu.wrap_with_color_scale(s=mean_val, value=self.mean_value, min_val=root_node.min_value, max_val=root_node.max_value)}, "
|
|
271
|
+
f"{p}best{e}={colorful_value(self.max_value)}") +
|
|
272
|
+
(f", {p}{GymctsNeuralNode.score_variate}{e}={colorful_value(self.tree_policy_score())})" if not self.is_root() else ")"))
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
class GymctsNeuralAgent(GymctsAgent):
|
|
277
|
+
|
|
278
|
+
def __init__(self,
|
|
279
|
+
env: GymctsABC,
|
|
280
|
+
*args,
|
|
281
|
+
model_kwargs=None,
|
|
282
|
+
score_variate: Literal[
|
|
283
|
+
"PUCT_v0",
|
|
284
|
+
"PUCT_v1",
|
|
285
|
+
"PUTC_v2",
|
|
286
|
+
"PUTC_v3",
|
|
287
|
+
"PUTC_v4",
|
|
288
|
+
"PUTC_v5",
|
|
289
|
+
"PUTC_v6",
|
|
290
|
+
"PUTC_v7",
|
|
291
|
+
"PUTC_v8",
|
|
292
|
+
"PUTC_v9",
|
|
293
|
+
"PUTC_v10",
|
|
294
|
+
"MuZero_v0",
|
|
295
|
+
"MuZero_v1",
|
|
296
|
+
] = "PUCT_v0",
|
|
297
|
+
**kwargs
|
|
298
|
+
):
|
|
299
|
+
|
|
300
|
+
# init super class
|
|
301
|
+
super().__init__(
|
|
302
|
+
env=env,
|
|
303
|
+
*args,
|
|
304
|
+
**kwargs
|
|
305
|
+
)
|
|
306
|
+
if score_variate not in [
|
|
307
|
+
"PUCT_v0", "PUCT_v1", "PUTC_v2",
|
|
308
|
+
"PUTC_v3", "PUTC_v4", "PUTC_v5",
|
|
309
|
+
"PUTC_v6", "PUTC_v7", "PUTC_v8",
|
|
310
|
+
"PUTC_v9", "PUTC_v10",
|
|
311
|
+
"MuZero_v0", "MuZero_v1"
|
|
312
|
+
]:
|
|
313
|
+
raise ValueError(f"Invalid score_variate: {score_variate}. Must be one of: "
|
|
314
|
+
f"PUCT_v0, PUCT_v1, PUTC_v2, PUTC_v3, PUTC_v4, PUTC_v5, "
|
|
315
|
+
f"PUTC_v6, PUTC_v7, PUTC_v8, PUTC_v9, PUTC_v10, MuZero_v0, MuZero_v1")
|
|
316
|
+
GymctsNeuralNode.score_variate = score_variate
|
|
317
|
+
|
|
318
|
+
if model_kwargs is None:
|
|
319
|
+
model_kwargs = {}
|
|
320
|
+
obs, info = env.reset()
|
|
321
|
+
|
|
322
|
+
self.search_root_node = GymctsNeuralNode(
|
|
323
|
+
action=None,
|
|
324
|
+
parent=None,
|
|
325
|
+
env_reference=env,
|
|
326
|
+
observation=obs,
|
|
327
|
+
prior_selection_score=1.0,
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
def mask_fn(env: gym.Env) -> np.ndarray:
|
|
331
|
+
mask = env.action_masks()
|
|
332
|
+
if mask is None:
|
|
333
|
+
mask = np.ones(env.action_space.n, dtype=np.float32)
|
|
334
|
+
return mask
|
|
335
|
+
|
|
336
|
+
env = ActionMasker(env, action_mask_fn=mask_fn)
|
|
337
|
+
|
|
338
|
+
model_kwargs = {
|
|
339
|
+
"policy": MaskableActorCriticPolicy,
|
|
340
|
+
"env": env,
|
|
341
|
+
"verbose": 1,
|
|
342
|
+
} | model_kwargs
|
|
343
|
+
|
|
344
|
+
self._model = sb3_contrib.MaskablePPO(**model_kwargs)
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def learn(self, total_timesteps:int, **kwargs) -> None:
|
|
351
|
+
"""Learn from the environment using the MaskablePPO model."""
|
|
352
|
+
self._model.learn(total_timesteps=total_timesteps, **kwargs)
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
def expand_node(self, node: GymctsNeuralNode) -> None:
|
|
356
|
+
log.debug(f"expanding node: {node}")
|
|
357
|
+
# EXPANSION STRATEGY
|
|
358
|
+
# expand all children
|
|
359
|
+
|
|
360
|
+
child_dict = {}
|
|
361
|
+
|
|
362
|
+
self._load_state(node)
|
|
363
|
+
|
|
364
|
+
obs_tensor, vectorized_env = self._model.policy.obs_to_tensor(np.array([node._obs]))
|
|
365
|
+
action_masks = np.array([self.env.action_masks()])
|
|
366
|
+
distribution = self._model.policy.get_distribution(obs=obs_tensor, action_masks=action_masks)
|
|
367
|
+
unwrapped_distribution = distribution.distribution.probs[0]
|
|
368
|
+
|
|
369
|
+
# print(f'valid actions: {node.valid_actions}')
|
|
370
|
+
# print(f'env mask: {self.env.action_masks()}')
|
|
371
|
+
# print(f'env valid actions: {self.env.get_valid_actions()}')
|
|
372
|
+
"""
|
|
373
|
+
for action in node.valid_actions:
|
|
374
|
+
# reconstruct state
|
|
375
|
+
# load state of leaf node
|
|
376
|
+
self._load_state(node)
|
|
377
|
+
|
|
378
|
+
obs, reward, terminal, truncated, _ = self.env.step(action)
|
|
379
|
+
child_dict[action] = GymctsNeuralNode(
|
|
380
|
+
action=action,
|
|
381
|
+
parent=node,
|
|
382
|
+
env_reference=self.env,
|
|
383
|
+
observation=obs,
|
|
384
|
+
prior_selection_score=1.0,
|
|
385
|
+
)
|
|
386
|
+
node.children = child_dict
|
|
387
|
+
return
|
|
388
|
+
"""
|
|
389
|
+
|
|
390
|
+
for action, prob in enumerate(unwrapped_distribution):
|
|
391
|
+
self._load_state(node)
|
|
392
|
+
|
|
393
|
+
log.debug(f"Probabily for action {action}: {prob}")
|
|
394
|
+
|
|
395
|
+
if prob == 0.0:
|
|
396
|
+
continue
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
assert action in node.valid_actions, f"Action {action} is not in valid actions: {node.valid_actions}"
|
|
400
|
+
|
|
401
|
+
obs, reward, terminal, truncated, _ = self.env.step(action)
|
|
402
|
+
child_dict[action] = GymctsNeuralNode(
|
|
403
|
+
action=action,
|
|
404
|
+
parent=node,
|
|
405
|
+
observation=copy.deepcopy(obs),
|
|
406
|
+
env_reference=self.env,
|
|
407
|
+
prior_selection_score=float(prob)
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
node.children = child_dict
|
|
411
|
+
# print(f"Expanded node {node} with {len(node.children)} children.")
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
if __name__ == '__main__':
|
|
418
|
+
log.setLevel(20)
|
|
419
|
+
|
|
420
|
+
env_kwargs = {
|
|
421
|
+
"jps_instance": ft06,
|
|
422
|
+
"default_visualisations": ["gantt_console", "graph_console"],
|
|
423
|
+
"reward_function_parameters": {
|
|
424
|
+
"scaling_divisor": ft06_makespan
|
|
425
|
+
},
|
|
426
|
+
"reward_function": "nasuta",
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
env = DisjunctiveGraphJspEnv(**env_kwargs)
|
|
432
|
+
env.reset()
|
|
433
|
+
|
|
434
|
+
env = GraphJspNeuralGYMCTSWrapper(env)
|
|
435
|
+
|
|
436
|
+
import torch
|
|
437
|
+
model_kwargs = {
|
|
438
|
+
"gamma": 0.99013,
|
|
439
|
+
"gae_lambda": 0.9,
|
|
440
|
+
"normalize_advantage": True,
|
|
441
|
+
"n_epochs": 28,
|
|
442
|
+
"n_steps": 432,
|
|
443
|
+
"max_grad_norm": 0.5,
|
|
444
|
+
"learning_rate": 6e-4,
|
|
445
|
+
"policy_kwargs": {
|
|
446
|
+
"net_arch": {
|
|
447
|
+
"pi": [90, 90],
|
|
448
|
+
"vf": [90, 90],
|
|
449
|
+
},
|
|
450
|
+
"ortho_init": True,
|
|
451
|
+
"activation_fn": torch.nn.ELU,
|
|
452
|
+
"optimizer_kwargs": {
|
|
453
|
+
"eps": 1e-7
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
agent = GymctsNeuralAgent(
|
|
459
|
+
env=env,
|
|
460
|
+
render_tree_after_step=True,
|
|
461
|
+
render_tree_max_depth=3,
|
|
462
|
+
exclude_unvisited_nodes_from_render=False,
|
|
463
|
+
number_of_simulations_per_step=15,
|
|
464
|
+
# clear_mcts_tree_after_step = False,
|
|
465
|
+
model_kwargs=model_kwargs
|
|
466
|
+
)
|
|
467
|
+
|
|
468
|
+
agent.learn(total_timesteps=10_000)
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
agent.solve()
|
|
472
|
+
|
|
473
|
+
actions = agent.solve(render_tree_after_step=True)
|
|
474
|
+
for a in actions:
|
|
475
|
+
obs, rew, term, trun, info = env.step(a)
|
|
476
|
+
|
|
477
|
+
env.render()
|
|
478
|
+
makespan = env.unwrapped.get_makespan()
|
|
479
|
+
print(f"makespan: {makespan}")
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
|
gymcts/gymcts_node.py
CHANGED
|
@@ -2,7 +2,7 @@ import uuid
|
|
|
2
2
|
import random
|
|
3
3
|
import math
|
|
4
4
|
|
|
5
|
-
from typing import TypeVar, Any, SupportsFloat, Callable, Generator
|
|
5
|
+
from typing import TypeVar, Any, SupportsFloat, Callable, Generator, Literal
|
|
6
6
|
|
|
7
7
|
from gymcts.gymcts_env_abc import GymctsABC
|
|
8
8
|
|
|
@@ -16,6 +16,25 @@ class GymctsNode:
|
|
|
16
16
|
best_action_weight: float = 0.05 # weight for the best action
|
|
17
17
|
ubc_c = 0.707 # exploration coefficient
|
|
18
18
|
|
|
19
|
+
"""
|
|
20
|
+
UCT (Upper Confidence Bound applied to Trees) exploration terms:
|
|
21
|
+
|
|
22
|
+
UCT 0:
|
|
23
|
+
c * √( 2 * ln(N(s)) / N(s,a) )
|
|
24
|
+
|
|
25
|
+
UCT 1:
|
|
26
|
+
c * √( ln(N(s)) / (1 + N(s,a)) )
|
|
27
|
+
|
|
28
|
+
UCT 2:
|
|
29
|
+
c * ( √(N(s)) / (1 + N(s,a)) )
|
|
30
|
+
|
|
31
|
+
Where:
|
|
32
|
+
N(s) = number of times state s has been visited
|
|
33
|
+
N(s,a) = number of times action a was taken from state s
|
|
34
|
+
c = exploration constant
|
|
35
|
+
"""
|
|
36
|
+
score_variate: Literal["UCT_v0", "UCT_v1", "UCT_v2",] = "UCT_v0"
|
|
37
|
+
|
|
19
38
|
|
|
20
39
|
|
|
21
40
|
# attributes
|
|
@@ -42,7 +61,7 @@ class GymctsNode:
|
|
|
42
61
|
if not colored:
|
|
43
62
|
|
|
44
63
|
if not self.is_root():
|
|
45
|
-
return f"(a={self.action}, N={self.visit_count}, Q_v={self.mean_value:.2f}, best={self.max_value:.2f}, ubc={self.
|
|
64
|
+
return f"(a={self.action}, N={self.visit_count}, Q_v={self.mean_value:.2f}, best={self.max_value:.2f}, ubc={self.tree_policy_score():.2f})"
|
|
46
65
|
else:
|
|
47
66
|
return f"(N={self.visit_count}, Q_v={self.mean_value:.2f}, best={self.max_value:.2f}) [root]"
|
|
48
67
|
|
|
@@ -83,7 +102,7 @@ class GymctsNode:
|
|
|
83
102
|
f"{p}N{e}={colorful_value(self.visit_count)}, "
|
|
84
103
|
f"{p}Q_v{e}={ccu.wrap_with_color_scale(s=mean_val, value=self.mean_value, min_val=root_node.min_value, max_val=root_node.max_value)}, "
|
|
85
104
|
f"{p}best{e}={colorful_value(self.max_value)}") +
|
|
86
|
-
(f", {p}ubc{e}={colorful_value(self.
|
|
105
|
+
(f", {p}ubc{e}={colorful_value(self.tree_policy_score())})" if not self.is_root() else ")"))
|
|
87
106
|
|
|
88
107
|
def traverse_nodes(self) -> Generator[TGymctsNode, None, None]:
|
|
89
108
|
"""
|
|
@@ -192,6 +211,12 @@ class GymctsNode:
|
|
|
192
211
|
if self.parent:
|
|
193
212
|
self.parent.reset()
|
|
194
213
|
|
|
214
|
+
def remove_parent(self) -> None:
|
|
215
|
+
self.parent = None
|
|
216
|
+
|
|
217
|
+
if self.parent is not None:
|
|
218
|
+
self.parent.remove_parent()
|
|
219
|
+
|
|
195
220
|
def is_root(self) -> bool:
|
|
196
221
|
"""
|
|
197
222
|
Returns true if the node is a root node. A root node is a node that has no parent.
|
|
@@ -252,9 +277,39 @@ class GymctsNode:
|
|
|
252
277
|
"""
|
|
253
278
|
return self.max_value
|
|
254
279
|
|
|
255
|
-
def
|
|
280
|
+
def tree_policy_score(self):
|
|
256
281
|
"""
|
|
282
|
+
TODO: update docstring
|
|
283
|
+
|
|
257
284
|
The score for an action that would transition between the parent and child.
|
|
285
|
+
For vanilla MCTS, this is the UCB1 score.
|
|
286
|
+
|
|
287
|
+
The UCB1 score is calculated using the formula:
|
|
288
|
+
|
|
289
|
+
UCT (Upper Confidence Bound applied to Trees) exploration terms:
|
|
290
|
+
|
|
291
|
+
UCT_v0:
|
|
292
|
+
c * √( 2 * ln(N(s)) / N(s,a) )
|
|
293
|
+
|
|
294
|
+
UCT_v1:
|
|
295
|
+
c * √( ln(N(s)) / (1 + N(s,a)) )
|
|
296
|
+
|
|
297
|
+
UCT_v2:
|
|
298
|
+
c * ( √(N(s)) / (1 + N(s,a)) )
|
|
299
|
+
|
|
300
|
+
Where:
|
|
301
|
+
N(s) = number of times state s has been visited
|
|
302
|
+
N(s,a) = number of times action a was taken from state s
|
|
303
|
+
c = exploration constant
|
|
304
|
+
|
|
305
|
+
where:
|
|
306
|
+
- mean_value is the mean value of the node
|
|
307
|
+
- c is a constant that controls the exploration-exploitation trade-off (GymctsNode.ubc_c)
|
|
308
|
+
- parent_visit_count is the number of times the parent node has been visited
|
|
309
|
+
- visit_count is the number of times the node has been visited
|
|
310
|
+
|
|
311
|
+
If the node has not been visited yet, the score is set to infinity.
|
|
312
|
+
|
|
258
313
|
prior_score = child.prior * math.sqrt(parent.visit_count) / (child.visit_count + 1)
|
|
259
314
|
|
|
260
315
|
if child.visit_count > 0:
|
|
@@ -269,8 +324,24 @@ class GymctsNode:
|
|
|
269
324
|
"""
|
|
270
325
|
if self.is_root():
|
|
271
326
|
raise ValueError("ucb_score can only be called on non-root nodes")
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
327
|
+
c = GymctsNode.ubc_c # default is 0.707
|
|
328
|
+
|
|
329
|
+
assert 0 <= GymctsNode.best_action_weight <= 1
|
|
330
|
+
b = GymctsNode.best_action_weight
|
|
331
|
+
exploitation_term = 0.0 if self.visit_count == 0 else (1 - b) * self.mean_value + b * self.max_value
|
|
332
|
+
|
|
333
|
+
if GymctsNode.score_variate == "UCT_v0":
|
|
334
|
+
if self.visit_count == 0:
|
|
335
|
+
return float("inf")
|
|
336
|
+
return exploitation_term + c * math.sqrt( 2 * math.log(self.parent.visit_count) / (self.visit_count))
|
|
337
|
+
|
|
338
|
+
if GymctsNode.score_variate == "UCT_v1":
|
|
339
|
+
return exploitation_term + c * math.sqrt( math.log(self.parent.visit_count) / (1 + self.visit_count))
|
|
340
|
+
|
|
341
|
+
if GymctsNode.score_variate == "UCT_v2":
|
|
342
|
+
return exploitation_term + c * math.sqrt(self.parent.visit_count) / (1 + self.visit_count)
|
|
343
|
+
|
|
344
|
+
raise ValueError(f"unknown score variate: {GymctsNode.score_variate}. ")
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: gymcts
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.4.0
|
|
4
4
|
Summary: A minimalistic implementation of the Monte Carlo Tree Search algorithm for planning problems fomulated as gymnaisum reinforcement learning environments.
|
|
5
5
|
Author: Alexander Nasuta
|
|
6
6
|
Author-email: Alexander Nasuta <alexander.nasuta@wzl-iqs.rwth-aachen.de>
|
|
@@ -70,11 +70,18 @@ Requires-Dist: jupyter; extra == "dev"
|
|
|
70
70
|
Requires-Dist: typing_extensions>=4.12.0; extra == "dev"
|
|
71
71
|
Dynamic: license-file
|
|
72
72
|
|
|
73
|
-
|
|
73
|
+
[](https://doi.org/10.5281/zenodo.15283390)
|
|
74
|
+
[](https://www.python.org/downloads/)
|
|
75
|
+
[](https://pypi.org/project/gymcts/)
|
|
76
|
+
[](https://github.com/Alexander-Nasuta/gymcts/blob/master/LICENSE)
|
|
77
|
+
[](https://gymcts.readthedocs.io/en/latest/?badge=latest)
|
|
78
|
+
|
|
79
|
+
# GYMCTS
|
|
74
80
|
|
|
75
81
|
A Monte Carlo Tree Search Implementation for Gymnasium-style Environments.
|
|
76
82
|
|
|
77
83
|
- Github: [GYMCTS on Github](https://github.com/Alexander-Nasuta/gymcts)
|
|
84
|
+
- GitLab: [GYMCTS on GitLab](https://git-ce.rwth-aachen.de/alexander.nasuta/gymcts)
|
|
78
85
|
- Pypi: [GYMCTS on PyPi](https://pypi.org/project/gymcts/)
|
|
79
86
|
- Documentation: [GYMCTS Docs](https://gymcts.readthedocs.io/en/latest/)
|
|
80
87
|
|
|
@@ -579,9 +586,6 @@ This project uses `pytest` for testing. To run the tests, run the following comm
|
|
|
579
586
|
```shell
|
|
580
587
|
pytest
|
|
581
588
|
```
|
|
582
|
-
Here is a screenshot of what the output might look like:
|
|
583
|
-
|
|
584
|
-

|
|
585
589
|
|
|
586
590
|
For testing with `tox` run the following command:
|
|
587
591
|
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
gymcts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
gymcts/colorful_console_utils.py,sha256=n7nymC8kKZnA_8nXcdn201NAzjZjgEHfKpbBcnl4oAE,5891
|
|
3
|
+
gymcts/gymcts_action_history_wrapper.py,sha256=7-p17Fgb80SRCBaCm6G8SJrEPsl2Y4aIO3InviuQP08,6993
|
|
4
|
+
gymcts/gymcts_agent.py,sha256=FzMPjHXyKN6enNJubmYEouvb0wBbE1-bpxuLuW4J1gU,10960
|
|
5
|
+
gymcts/gymcts_deepcopy_wrapper.py,sha256=lCCT5-6JVCwUCP__4uPMMkT5HnO2JWm2ebzJ69zXp9c,6792
|
|
6
|
+
gymcts/gymcts_distributed_agent.py,sha256=Ha9UBQvFjoErfMWvPyN0JcTYz-JaiJ4eWjLMikp9Yhs,11569
|
|
7
|
+
gymcts/gymcts_env_abc.py,sha256=iqrFNNSa-kZyAGk1UN2BjkdkV6NufAkYJT8d7PlQ07E,2525
|
|
8
|
+
gymcts/gymcts_neural_agent.py,sha256=0udwrqUMxure4TO6bJWmy8wyTNFC-FM3U1CK7iosgaM,16020
|
|
9
|
+
gymcts/gymcts_node.py,sha256=m6Wmv1VTr6jadOf7XJMehIjjcIOnvAe0RnYKEeP_SlQ,13364
|
|
10
|
+
gymcts/gymcts_tree_plotter.py,sha256=PR6C7q9Q4kuz1aLGyD7-aZsxk3RqlHZpOqmOiRpCyK0,3547
|
|
11
|
+
gymcts/logger.py,sha256=RI7B9cvbBGrj0_QIAI77wihzuu2tPG_-z9GM2Mw5aHE,926
|
|
12
|
+
gymcts-1.4.0.dist-info/licenses/LICENSE,sha256=UGe75WojDiw_77SEnK2aysEDlElRlkWie7U7NaAFx00,1072
|
|
13
|
+
gymcts-1.4.0.dist-info/METADATA,sha256=6T-XZZrKHhEarNrs1oS3L3sZVLjLd3hhFF2HSqpZNWs,23864
|
|
14
|
+
gymcts-1.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
15
|
+
gymcts-1.4.0.dist-info/top_level.txt,sha256=E8MoLsPimUPD0H1Y6lum4TVe-lhSDAyBAXGrkYIT52w,7
|
|
16
|
+
gymcts-1.4.0.dist-info/RECORD,,
|
gymcts-1.2.1.dist-info/RECORD
DELETED
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
gymcts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
gymcts/colorful_console_utils.py,sha256=n7nymC8kKZnA_8nXcdn201NAzjZjgEHfKpbBcnl4oAE,5891
|
|
3
|
-
gymcts/gymcts_action_history_wrapper.py,sha256=7-p17Fgb80SRCBaCm6G8SJrEPsl2Y4aIO3InviuQP08,6993
|
|
4
|
-
gymcts/gymcts_agent.py,sha256=f2imP-Wv-E7EYE0-iWd86hY9cx-rqHZMlDusp-aE-ps,8698
|
|
5
|
-
gymcts/gymcts_deepcopy_wrapper.py,sha256=lCCT5-6JVCwUCP__4uPMMkT5HnO2JWm2ebzJ69zXp9c,6792
|
|
6
|
-
gymcts/gymcts_distributed_agent.py,sha256=Ha9UBQvFjoErfMWvPyN0JcTYz-JaiJ4eWjLMikp9Yhs,11569
|
|
7
|
-
gymcts/gymcts_env_abc.py,sha256=U1mPz0NWZZL1sdHX7oUP1UFKtmbHwyqHQOQidyh_Uck,2107
|
|
8
|
-
gymcts/gymcts_node.py,sha256=pxjY2Zb0kPuFQ5mWEs0ct3qXoyB47NZK7h2ZGbLJbRA,11052
|
|
9
|
-
gymcts/gymcts_tree_plotter.py,sha256=PR6C7q9Q4kuz1aLGyD7-aZsxk3RqlHZpOqmOiRpCyK0,3547
|
|
10
|
-
gymcts/logger.py,sha256=RI7B9cvbBGrj0_QIAI77wihzuu2tPG_-z9GM2Mw5aHE,926
|
|
11
|
-
gymcts-1.2.1.dist-info/licenses/LICENSE,sha256=UGe75WojDiw_77SEnK2aysEDlElRlkWie7U7NaAFx00,1072
|
|
12
|
-
gymcts-1.2.1.dist-info/METADATA,sha256=wUJEcWrAvdC42kl59qewCN5tK3DKMLxGWcCipnOX4pQ,23371
|
|
13
|
-
gymcts-1.2.1.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
|
|
14
|
-
gymcts-1.2.1.dist-info/top_level.txt,sha256=E8MoLsPimUPD0H1Y6lum4TVe-lhSDAyBAXGrkYIT52w,7
|
|
15
|
-
gymcts-1.2.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|