gymcts 1.4.1__tar.gz → 1.4.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gymcts-1.4.1/src/gymcts.egg-info → gymcts-1.4.3}/PKG-INFO +1 -1
- {gymcts-1.4.1 → gymcts-1.4.3}/pyproject.toml +1 -1
- {gymcts-1.4.1 → gymcts-1.4.3}/src/gymcts/gymcts_neural_agent.py +44 -61
- {gymcts-1.4.1 → gymcts-1.4.3/src/gymcts.egg-info}/PKG-INFO +1 -1
- {gymcts-1.4.1 → gymcts-1.4.3}/LICENSE +0 -0
- {gymcts-1.4.1 → gymcts-1.4.3}/MANIFEST.in +0 -0
- {gymcts-1.4.1 → gymcts-1.4.3}/README.md +0 -0
- {gymcts-1.4.1 → gymcts-1.4.3}/setup.cfg +0 -0
- {gymcts-1.4.1 → gymcts-1.4.3}/setup.py +0 -0
- {gymcts-1.4.1 → gymcts-1.4.3}/src/gymcts/__init__.py +0 -0
- {gymcts-1.4.1 → gymcts-1.4.3}/src/gymcts/colorful_console_utils.py +0 -0
- {gymcts-1.4.1 → gymcts-1.4.3}/src/gymcts/gymcts_action_history_wrapper.py +0 -0
- {gymcts-1.4.1 → gymcts-1.4.3}/src/gymcts/gymcts_agent.py +0 -0
- {gymcts-1.4.1 → gymcts-1.4.3}/src/gymcts/gymcts_deepcopy_wrapper.py +0 -0
- {gymcts-1.4.1 → gymcts-1.4.3}/src/gymcts/gymcts_distributed_agent.py +0 -0
- {gymcts-1.4.1 → gymcts-1.4.3}/src/gymcts/gymcts_env_abc.py +0 -0
- {gymcts-1.4.1 → gymcts-1.4.3}/src/gymcts/gymcts_node.py +0 -0
- {gymcts-1.4.1 → gymcts-1.4.3}/src/gymcts/gymcts_tree_plotter.py +0 -0
- {gymcts-1.4.1 → gymcts-1.4.3}/src/gymcts/logger.py +0 -0
- {gymcts-1.4.1 → gymcts-1.4.3}/src/gymcts.egg-info/SOURCES.txt +0 -0
- {gymcts-1.4.1 → gymcts-1.4.3}/src/gymcts.egg-info/dependency_links.txt +0 -0
- {gymcts-1.4.1 → gymcts-1.4.3}/src/gymcts.egg-info/not-zip-safe +0 -0
- {gymcts-1.4.1 → gymcts-1.4.3}/src/gymcts.egg-info/requires.txt +0 -0
- {gymcts-1.4.1 → gymcts-1.4.3}/src/gymcts.egg-info/top_level.txt +0 -0
- {gymcts-1.4.1 → gymcts-1.4.3}/tests/test_graph_matrix_jsp_env.py +0 -0
- {gymcts-1.4.1 → gymcts-1.4.3}/tests/test_gymnasium_envs.py +0 -0
- {gymcts-1.4.1 → gymcts-1.4.3}/tests/test_number_of_visits.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: gymcts
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.3
|
|
4
4
|
Summary: A minimalistic implementation of the Monte Carlo Tree Search algorithm for planning problems fomulated as gymnaisum reinforcement learning environments.
|
|
5
5
|
Author: Alexander Nasuta
|
|
6
6
|
Author-email: Alexander Nasuta <alexander.nasuta@wzl-iqs.rwth-aachen.de>
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "gymcts"
|
|
7
|
-
version = "1.4.
|
|
7
|
+
version = "1.4.3"
|
|
8
8
|
description = "A minimalistic implementation of the Monte Carlo Tree Search algorithm for planning problems fomulated as gymnaisum reinforcement learning environments."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
authors = [{ name = "Alexander Nasuta", email = "alexander.nasuta@wzl-iqs.rwth-aachen.de" }]
|
|
@@ -55,14 +55,11 @@ class GraphJspNeuralGYMCTSWrapper(GymctsABC, gym.Wrapper):
|
|
|
55
55
|
def get_state(self) -> Any:
|
|
56
56
|
return env.unwrapped.get_action_history()
|
|
57
57
|
|
|
58
|
-
|
|
59
58
|
def action_masks(self) -> np.ndarray | None:
|
|
60
59
|
"""Return the action mask for the current state."""
|
|
61
60
|
return self.env.unwrapped.valid_action_mask()
|
|
62
61
|
|
|
63
62
|
|
|
64
|
-
|
|
65
|
-
|
|
66
63
|
class GymctsNeuralNode(GymctsNode):
|
|
67
64
|
PUCT_v3_mu = 0.95
|
|
68
65
|
|
|
@@ -126,15 +123,15 @@ class GymctsNeuralNode(GymctsNode):
|
|
|
126
123
|
score_variate: Literal[
|
|
127
124
|
"PUCT_v0",
|
|
128
125
|
"PUCT_v1",
|
|
129
|
-
"
|
|
130
|
-
"
|
|
131
|
-
"
|
|
132
|
-
"
|
|
133
|
-
"
|
|
134
|
-
"
|
|
135
|
-
"
|
|
136
|
-
"
|
|
137
|
-
"
|
|
126
|
+
"PUCT_v2",
|
|
127
|
+
"PUCT_v3",
|
|
128
|
+
"PUCT_v4",
|
|
129
|
+
"PUCT_v5",
|
|
130
|
+
"PUCT_v6",
|
|
131
|
+
"PUCT_v7",
|
|
132
|
+
"PUCT_v8",
|
|
133
|
+
"PUCT_v9",
|
|
134
|
+
"PUCT_v10",
|
|
138
135
|
"MuZero_v0",
|
|
139
136
|
"MuZero_v1",
|
|
140
137
|
] = "PUCT_v0"
|
|
@@ -146,13 +143,12 @@ class GymctsNeuralNode(GymctsNode):
|
|
|
146
143
|
env_reference: GymctsABC,
|
|
147
144
|
prior_selection_score: float,
|
|
148
145
|
observation: np.ndarray | None = None,
|
|
149
|
-
|
|
146
|
+
):
|
|
150
147
|
super().__init__(action, parent, env_reference)
|
|
151
148
|
|
|
152
149
|
self._obs = observation
|
|
153
150
|
self._selection_score_prior = prior_selection_score
|
|
154
151
|
|
|
155
|
-
|
|
156
152
|
def tree_policy_score(self) -> float:
|
|
157
153
|
# call the superclass (GymctsNode) for ucb_score
|
|
158
154
|
c = GymctsNode.ubc_c
|
|
@@ -167,7 +163,6 @@ class GymctsNeuralNode(GymctsNode):
|
|
|
167
163
|
b = GymctsNode.best_action_weight
|
|
168
164
|
exploitation_term = 0.0 if self.visit_count == 0 else (1 - b) * self.mean_value + b * self.max_value
|
|
169
165
|
|
|
170
|
-
|
|
171
166
|
if GymctsNeuralNode.score_variate == "PUCT_v0":
|
|
172
167
|
return exploitation_term + c * p_sa * math.sqrt(n_s) / (1 + n_sa)
|
|
173
168
|
elif GymctsNeuralNode.score_variate == "PUCT_v1":
|
|
@@ -200,11 +195,10 @@ class GymctsNeuralNode(GymctsNode):
|
|
|
200
195
|
c2 = GymctsNeuralNode.MuZero_c2
|
|
201
196
|
return exploitation_term + c * p_sa * math.sqrt(n_s) / (1 + n_sa) * (c1 + math.log((n_s + c2 + 1) / c2))
|
|
202
197
|
|
|
203
|
-
|
|
204
|
-
|
|
198
|
+
exploration_term = self._selection_score_prior * c * math.sqrt(
|
|
199
|
+
math.log(self.parent.visit_count) / (self.visit_count)) if self.visit_count > 0 else float("inf")
|
|
205
200
|
return self.mean_value + exploration_term
|
|
206
201
|
|
|
207
|
-
|
|
208
202
|
def get_best_action(self) -> int:
|
|
209
203
|
"""
|
|
210
204
|
Returns the best action of the node. The best action is the action with the highest score.
|
|
@@ -214,7 +208,6 @@ class GymctsNeuralNode(GymctsNode):
|
|
|
214
208
|
"""
|
|
215
209
|
return max(self.children.values(), key=lambda child: child.max_value).action
|
|
216
210
|
|
|
217
|
-
|
|
218
211
|
def __str__(self, colored=False, action_space_n=None) -> str:
|
|
219
212
|
"""
|
|
220
213
|
Returns a string representation of the node. The string representation is used for visualisation purposes.
|
|
@@ -263,14 +256,13 @@ class GymctsNeuralNode(GymctsNode):
|
|
|
263
256
|
root_node = self.get_root()
|
|
264
257
|
mean_val = f"{self.mean_value:.2f}"
|
|
265
258
|
|
|
266
|
-
|
|
267
259
|
return ((f"("
|
|
268
260
|
f"{p}a{e}={ccu.wrap_evenly_spaced_color(s=self.action, n_of_item=self.action, n_classes=action_space_n)}, "
|
|
269
261
|
f"{p}N{e}={colorful_value(self.visit_count)}, "
|
|
270
262
|
f"{p}Q_v{e}={ccu.wrap_with_color_scale(s=mean_val, value=self.mean_value, min_val=root_node.min_value, max_val=root_node.max_value)}, "
|
|
271
263
|
f"{p}best{e}={colorful_value(self.max_value)}") +
|
|
272
|
-
(
|
|
273
|
-
|
|
264
|
+
(
|
|
265
|
+
f", {p}{GymctsNeuralNode.score_variate}{e}={colorful_value(self.tree_policy_score())})" if not self.is_root() else ")"))
|
|
274
266
|
|
|
275
267
|
|
|
276
268
|
class GymctsNeuralAgent(GymctsAgent):
|
|
@@ -282,15 +274,15 @@ class GymctsNeuralAgent(GymctsAgent):
|
|
|
282
274
|
score_variate: Literal[
|
|
283
275
|
"PUCT_v0",
|
|
284
276
|
"PUCT_v1",
|
|
285
|
-
"
|
|
286
|
-
"
|
|
287
|
-
"
|
|
288
|
-
"
|
|
289
|
-
"
|
|
290
|
-
"
|
|
291
|
-
"
|
|
292
|
-
"
|
|
293
|
-
"
|
|
277
|
+
"PUCT_v2",
|
|
278
|
+
"PUCT_v3",
|
|
279
|
+
"PUCT_v4",
|
|
280
|
+
"PUCT_v5",
|
|
281
|
+
"PUCT_v6",
|
|
282
|
+
"PUCT_v7",
|
|
283
|
+
"PUCT_v8",
|
|
284
|
+
"PUCT_v9",
|
|
285
|
+
"PUCT_v10",
|
|
294
286
|
"MuZero_v0",
|
|
295
287
|
"MuZero_v1",
|
|
296
288
|
] = "PUCT_v0",
|
|
@@ -304,15 +296,23 @@ class GymctsNeuralAgent(GymctsAgent):
|
|
|
304
296
|
**kwargs
|
|
305
297
|
)
|
|
306
298
|
if score_variate not in [
|
|
307
|
-
"PUCT_v0",
|
|
308
|
-
"
|
|
309
|
-
"
|
|
310
|
-
"
|
|
311
|
-
"
|
|
299
|
+
"PUCT_v0",
|
|
300
|
+
"PUCT_v1",
|
|
301
|
+
"PUCT_v2",
|
|
302
|
+
"PUCT_v3",
|
|
303
|
+
"PUCT_v4",
|
|
304
|
+
"PUCT_v5",
|
|
305
|
+
"PUCT_v6",
|
|
306
|
+
"PUCT_v7",
|
|
307
|
+
"PUCT_v8",
|
|
308
|
+
"PUCT_v9",
|
|
309
|
+
"PUCT_v10",
|
|
310
|
+
"MuZero_v0",
|
|
311
|
+
"MuZero_v1",
|
|
312
312
|
]:
|
|
313
313
|
raise ValueError(f"Invalid score_variate: {score_variate}. Must be one of: "
|
|
314
|
-
f"PUCT_v0, PUCT_v1,
|
|
315
|
-
f"
|
|
314
|
+
f"['PUCT_v0', 'PUCT_v1', 'PUCT_v2', 'PUCT_v3', 'PUCT_v4', 'PUCT_v5', "
|
|
315
|
+
f"'PUCT_v6', 'PUCT_v7', 'PUCT_v8', 'PUCT_v9', 'PUCT_v10', 'MuZero_v0', 'MuZero_v1']")
|
|
316
316
|
GymctsNeuralNode.score_variate = score_variate
|
|
317
317
|
|
|
318
318
|
if model_kwargs is None:
|
|
@@ -336,22 +336,17 @@ class GymctsNeuralAgent(GymctsAgent):
|
|
|
336
336
|
env = ActionMasker(env, action_mask_fn=mask_fn)
|
|
337
337
|
|
|
338
338
|
model_kwargs = {
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
339
|
+
"policy": MaskableActorCriticPolicy,
|
|
340
|
+
"env": env,
|
|
341
|
+
"verbose": 1,
|
|
342
|
+
} | model_kwargs
|
|
343
343
|
|
|
344
344
|
self._model = sb3_contrib.MaskablePPO(**model_kwargs)
|
|
345
345
|
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
def learn(self, total_timesteps:int, **kwargs) -> None:
|
|
346
|
+
def learn(self, total_timesteps: int, **kwargs) -> None:
|
|
351
347
|
"""Learn from the environment using the MaskablePPO model."""
|
|
352
348
|
self._model.learn(total_timesteps=total_timesteps, **kwargs)
|
|
353
349
|
|
|
354
|
-
|
|
355
350
|
def expand_node(self, node: GymctsNeuralNode) -> None:
|
|
356
351
|
log.debug(f"expanding node: {node}")
|
|
357
352
|
# EXPANSION STRATEGY
|
|
@@ -395,7 +390,6 @@ class GymctsNeuralAgent(GymctsAgent):
|
|
|
395
390
|
if prob == 0.0:
|
|
396
391
|
continue
|
|
397
392
|
|
|
398
|
-
|
|
399
393
|
assert action in node.valid_actions, f"Action {action} is not in valid actions: {node.valid_actions}"
|
|
400
394
|
|
|
401
395
|
obs, reward, terminal, truncated, _ = self.env.step(action)
|
|
@@ -411,9 +405,6 @@ class GymctsNeuralAgent(GymctsAgent):
|
|
|
411
405
|
# print(f"Expanded node {node} with {len(node.children)} children.")
|
|
412
406
|
|
|
413
407
|
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
408
|
if __name__ == '__main__':
|
|
418
409
|
log.setLevel(20)
|
|
419
410
|
|
|
@@ -426,14 +417,13 @@ if __name__ == '__main__':
|
|
|
426
417
|
"reward_function": "nasuta",
|
|
427
418
|
}
|
|
428
419
|
|
|
429
|
-
|
|
430
|
-
|
|
431
420
|
env = DisjunctiveGraphJspEnv(**env_kwargs)
|
|
432
421
|
env.reset()
|
|
433
422
|
|
|
434
423
|
env = GraphJspNeuralGYMCTSWrapper(env)
|
|
435
424
|
|
|
436
425
|
import torch
|
|
426
|
+
|
|
437
427
|
model_kwargs = {
|
|
438
428
|
"gamma": 0.99013,
|
|
439
429
|
"gae_lambda": 0.9,
|
|
@@ -467,7 +457,6 @@ if __name__ == '__main__':
|
|
|
467
457
|
|
|
468
458
|
agent.learn(total_timesteps=10_000)
|
|
469
459
|
|
|
470
|
-
|
|
471
460
|
agent.solve()
|
|
472
461
|
|
|
473
462
|
actions = agent.solve(render_tree_after_step=True)
|
|
@@ -477,9 +466,3 @@ if __name__ == '__main__':
|
|
|
477
466
|
env.render()
|
|
478
467
|
makespan = env.unwrapped.get_makespan()
|
|
479
468
|
print(f"makespan: {makespan}")
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: gymcts
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.3
|
|
4
4
|
Summary: A minimalistic implementation of the Monte Carlo Tree Search algorithm for planning problems fomulated as gymnaisum reinforcement learning environments.
|
|
5
5
|
Author: Alexander Nasuta
|
|
6
6
|
Author-email: Alexander Nasuta <alexander.nasuta@wzl-iqs.rwth-aachen.de>
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|