gymcts 1.2.0__tar.gz → 1.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gymcts-1.2.0/src/gymcts.egg-info → gymcts-1.3.0}/PKG-INFO +39 -39
- {gymcts-1.2.0 → gymcts-1.3.0}/README.md +34 -37
- {gymcts-1.2.0 → gymcts-1.3.0}/pyproject.toml +5 -2
- {gymcts-1.2.0 → gymcts-1.3.0}/setup.cfg +0 -3
- {gymcts-1.2.0 → gymcts-1.3.0}/src/gymcts/colorful_console_utils.py +22 -0
- {gymcts-1.2.0 → gymcts-1.3.0}/src/gymcts/gymcts_action_history_wrapper.py +72 -2
- {gymcts-1.2.0 → gymcts-1.3.0}/src/gymcts/gymcts_agent.py +54 -7
- {gymcts-1.2.0 → gymcts-1.3.0}/src/gymcts/gymcts_deepcopy_wrapper.py +59 -2
- {gymcts-1.2.0 → gymcts-1.3.0}/src/gymcts/gymcts_distributed_agent.py +30 -12
- gymcts-1.3.0/src/gymcts/gymcts_env_abc.py +71 -0
- gymcts-1.3.0/src/gymcts/gymcts_neural_agent.py +479 -0
- gymcts-1.3.0/src/gymcts/gymcts_node.py +343 -0
- {gymcts-1.2.0 → gymcts-1.3.0}/src/gymcts/gymcts_tree_plotter.py +22 -1
- {gymcts-1.2.0 → gymcts-1.3.0}/src/gymcts/logger.py +1 -4
- {gymcts-1.2.0 → gymcts-1.3.0/src/gymcts.egg-info}/PKG-INFO +39 -39
- {gymcts-1.2.0 → gymcts-1.3.0}/src/gymcts.egg-info/SOURCES.txt +1 -0
- {gymcts-1.2.0 → gymcts-1.3.0}/src/gymcts.egg-info/requires.txt +4 -2
- gymcts-1.2.0/src/gymcts/gymcts_env_abc.py +0 -28
- gymcts-1.2.0/src/gymcts/gymcts_node.py +0 -199
- {gymcts-1.2.0 → gymcts-1.3.0}/LICENSE +0 -0
- {gymcts-1.2.0 → gymcts-1.3.0}/MANIFEST.in +0 -0
- {gymcts-1.2.0 → gymcts-1.3.0}/setup.py +0 -0
- {gymcts-1.2.0 → gymcts-1.3.0}/src/gymcts/__init__.py +0 -0
- {gymcts-1.2.0 → gymcts-1.3.0}/src/gymcts.egg-info/dependency_links.txt +0 -0
- {gymcts-1.2.0 → gymcts-1.3.0}/src/gymcts.egg-info/not-zip-safe +0 -0
- {gymcts-1.2.0 → gymcts-1.3.0}/src/gymcts.egg-info/top_level.txt +0 -0
- {gymcts-1.2.0 → gymcts-1.3.0}/tests/test_graph_matrix_jsp_env.py +0 -0
- {gymcts-1.2.0 → gymcts-1.3.0}/tests/test_gymnasium_envs.py +0 -0
- {gymcts-1.2.0 → gymcts-1.3.0}/tests/test_number_of_visits.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: gymcts
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.0
|
|
4
4
|
Summary: A minimalistic implementation of the Monte Carlo Tree Search algorithm for planning problems fomulated as gymnaisum reinforcement learning environments.
|
|
5
5
|
Author: Alexander Nasuta
|
|
6
6
|
Author-email: Alexander Nasuta <alexander.nasuta@wzl-iqs.rwth-aachen.de>
|
|
@@ -47,7 +47,7 @@ Requires-Dist: graph-matrix-jsp-env; extra == "examples"
|
|
|
47
47
|
Requires-Dist: graph-jsp-env; extra == "examples"
|
|
48
48
|
Provides-Extra: dev
|
|
49
49
|
Requires-Dist: jsp-instance-utils; extra == "dev"
|
|
50
|
-
Requires-Dist: graph-matrix-jsp-env; extra == "dev"
|
|
50
|
+
Requires-Dist: graph-matrix-jsp-env>=0.3.0; extra == "dev"
|
|
51
51
|
Requires-Dist: graph-jsp-env; extra == "dev"
|
|
52
52
|
Requires-Dist: JSSEnv; extra == "dev"
|
|
53
53
|
Requires-Dist: pip-tools; extra == "dev"
|
|
@@ -59,21 +59,31 @@ Requires-Dist: stable_baselines3; extra == "dev"
|
|
|
59
59
|
Requires-Dist: sphinx; extra == "dev"
|
|
60
60
|
Requires-Dist: myst-parser; extra == "dev"
|
|
61
61
|
Requires-Dist: sphinx-autobuild; extra == "dev"
|
|
62
|
+
Requires-Dist: sphinx-copybutton; extra == "dev"
|
|
62
63
|
Requires-Dist: furo; extra == "dev"
|
|
63
64
|
Requires-Dist: twine; extra == "dev"
|
|
64
65
|
Requires-Dist: sphinx-copybutton; extra == "dev"
|
|
65
66
|
Requires-Dist: nbsphinx; extra == "dev"
|
|
67
|
+
Requires-Dist: pandoc; extra == "dev"
|
|
66
68
|
Requires-Dist: jupytext; extra == "dev"
|
|
67
69
|
Requires-Dist: jupyter; extra == "dev"
|
|
70
|
+
Requires-Dist: typing_extensions>=4.12.0; extra == "dev"
|
|
68
71
|
Dynamic: license-file
|
|
69
72
|
|
|
70
|
-
|
|
73
|
+
[](https://doi.org/10.5281/zenodo.15283390)
|
|
74
|
+
[](https://www.python.org/downloads/)
|
|
75
|
+
[](https://pypi.org/project/gymcts/)
|
|
76
|
+
[](https://github.com/Alexander-Nasuta/gymcts/blob/master/LICENSE)
|
|
77
|
+
[](https://gymcts.readthedocs.io/en/latest/?badge=latest)
|
|
78
|
+
|
|
79
|
+
# GYMCTS
|
|
71
80
|
|
|
72
81
|
A Monte Carlo Tree Search Implementation for Gymnasium-style Environments.
|
|
73
82
|
|
|
74
|
-
- Github: [GYMCTS on Github](https://github.com/Alexander-Nasuta/
|
|
75
|
-
-
|
|
76
|
-
-
|
|
83
|
+
- Github: [GYMCTS on Github](https://github.com/Alexander-Nasuta/gymcts)
|
|
84
|
+
- GitLab: [GYMCTS on GitLab](https://git-ce.rwth-aachen.de/alexander.nasuta/gymcts)
|
|
85
|
+
- Pypi: [GYMCTS on PyPi](https://pypi.org/project/gymcts/)
|
|
86
|
+
- Documentation: [GYMCTS Docs](https://gymcts.readthedocs.io/en/latest/)
|
|
77
87
|
|
|
78
88
|
## Description
|
|
79
89
|
|
|
@@ -101,22 +111,26 @@ The usage of a MCTS agent can roughly organised into the following steps:
|
|
|
101
111
|
- Render the solution
|
|
102
112
|
|
|
103
113
|
The GYMCTS package provides a two types of wrappers for Gymnasium-style environments:
|
|
104
|
-
- `
|
|
105
|
-
- `
|
|
114
|
+
- `DeepCopyMCTSGymEnvWrapper`: A wrapper that uses deepcopies of the environment to save a snapshot of the environment state for each node in the MCTS tree.
|
|
115
|
+
- `ActionHistoryMCTSGymEnvWrapper`: A wrapper that saves the action sequence that lead to the current state in the MCTS node.
|
|
106
116
|
|
|
107
|
-
These wrappers can be used with the `
|
|
108
|
-
The wrapper implement methods that are required by the `
|
|
117
|
+
These wrappers can be used with the `GymctsAgent` to solve the environment.
|
|
118
|
+
The wrapper implement methods that are required by the `GymctsAgent` to interact with the environment.
|
|
109
119
|
GYMCTS is designed to use a single environment instance and reconstructing the environment state form a state snapshot, when needed.
|
|
110
120
|
|
|
111
121
|
NOTE: MCTS works best when the return of an episode is in the range of [-1, 1]. Please adjust the reward function of the environment accordingly (or change the ubc-scaling parameter of the MCTS agent).
|
|
112
122
|
Adjusting the reward function of the environment is easily done with a [NormalizeReward](https://gymnasium.farama.org/api/wrappers/reward_wrappers/#gymnasium.wrappers.NormalizeReward) or [TransformReward](https://gymnasium.farama.org/api/wrappers/reward_wrappers/#gymnasium.wrappers.TransformReward) Wrapper.
|
|
123
|
+
```python
|
|
124
|
+
env = NormalizeReward(env, gamma=0.99, epsilon=1e-8)
|
|
125
|
+
```
|
|
113
126
|
|
|
114
|
-
|
|
115
|
-
env = TransformReward(env, lambda r: r /
|
|
116
|
-
|
|
127
|
+
```python
|
|
128
|
+
env = TransformReward(env, lambda r: r / n_steps_per_episode)
|
|
129
|
+
```
|
|
130
|
+
### FrozenLake Example (DeepCopyMCTSGymEnvWrapper)
|
|
117
131
|
|
|
118
132
|
A minimal example of how to use the package with the FrozenLake environment and the NaiveSoloMCTSGymEnvWrapper is provided in the following code snippet below.
|
|
119
|
-
The
|
|
133
|
+
The DeepCopyMCTSGymEnvWrapper can be used with non-deterministic environments, such as the FrozenLake environment with slippery ice.
|
|
120
134
|
|
|
121
135
|
```python
|
|
122
136
|
import gymnasium as gym
|
|
@@ -135,7 +149,7 @@ if __name__ == '__main__':
|
|
|
135
149
|
env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=True, render_mode="ansi")
|
|
136
150
|
env.reset()
|
|
137
151
|
|
|
138
|
-
# 1. wrap the environment with the
|
|
152
|
+
# 1. wrap the environment with the deep copy wrapper or a custom gymcts wrapper
|
|
139
153
|
env = DeepCopyMCTSGymEnvWrapper(env)
|
|
140
154
|
|
|
141
155
|
# 2. create the agent
|
|
@@ -158,7 +172,7 @@ if __name__ == '__main__':
|
|
|
158
172
|
|
|
159
173
|
# 5. print the solution
|
|
160
174
|
# read the solution from the info provided by the RecordEpisodeStatistics wrapper
|
|
161
|
-
# (that
|
|
175
|
+
# (that DeepCopyMCTSGymEnvWrapper uses internally)
|
|
162
176
|
episode_length = info["episode"]["l"]
|
|
163
177
|
episode_return = info["episode"]["r"]
|
|
164
178
|
|
|
@@ -251,7 +265,7 @@ if __name__ == '__main__':
|
|
|
251
265
|
env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=False, render_mode="rgb_array")
|
|
252
266
|
env.reset()
|
|
253
267
|
|
|
254
|
-
# 1. wrap the environment with the
|
|
268
|
+
# 1. wrap the environment with the deep copy wrapper or a custom gymcts wrapper
|
|
255
269
|
env = DeepCopyMCTSGymEnvWrapper(env)
|
|
256
270
|
|
|
257
271
|
# 2. create the agent
|
|
@@ -280,7 +294,7 @@ if __name__ == '__main__':
|
|
|
280
294
|
env.close()
|
|
281
295
|
|
|
282
296
|
# 5. print the solution
|
|
283
|
-
# read the solution from the info provided by the RecordEpisodeStatistics wrapper (that
|
|
297
|
+
# read the solution from the info provided by the RecordEpisodeStatistics wrapper (that DeepCopyMCTSGymEnvWrapper wraps internally)
|
|
284
298
|
episode_length = info["episode"]["l"]
|
|
285
299
|
episode_return = info["episode"]["r"]
|
|
286
300
|
|
|
@@ -321,13 +335,13 @@ import gymnasium as gym
|
|
|
321
335
|
from graph_jsp_env.disjunctive_graph_jsp_env import DisjunctiveGraphJspEnv
|
|
322
336
|
from jsp_instance_utils.instances import ft06, ft06_makespan
|
|
323
337
|
|
|
324
|
-
from gymcts.gymcts_agent import
|
|
325
|
-
from gymcts.
|
|
338
|
+
from gymcts.gymcts_agent import GymctsAgent
|
|
339
|
+
from gymcts.gymcts_env_abc import GymctsABC
|
|
326
340
|
|
|
327
341
|
from gymcts.logger import log
|
|
328
342
|
|
|
329
343
|
|
|
330
|
-
class GraphJspGYMCTSWrapper(
|
|
344
|
+
class GraphJspGYMCTSWrapper(GymctsABC, gym.Wrapper):
|
|
331
345
|
|
|
332
346
|
def __init__(self, env: DisjunctiveGraphJspEnv):
|
|
333
347
|
gym.Wrapper.__init__(self, env)
|
|
@@ -378,7 +392,7 @@ if __name__ == '__main__':
|
|
|
378
392
|
|
|
379
393
|
env = GraphJspGYMCTSWrapper(env)
|
|
380
394
|
|
|
381
|
-
agent =
|
|
395
|
+
agent = GymctsAgent(
|
|
382
396
|
env=env,
|
|
383
397
|
clear_mcts_tree_after_step=True,
|
|
384
398
|
render_tree_after_step=True,
|
|
@@ -421,7 +435,6 @@ import gymnasium as gym
|
|
|
421
435
|
|
|
422
436
|
from gymcts.gymcts_agent import GymctsAgent
|
|
423
437
|
from gymcts.gymcts_action_history_wrapper import ActionHistoryMCTSGymEnvWrapper
|
|
424
|
-
from gymcts.gymcts_deepcopy_wrapper import DeepCopyMCTSGymEnvWrapper
|
|
425
438
|
|
|
426
439
|
from gymcts.logger import log
|
|
427
440
|
|
|
@@ -434,7 +447,7 @@ if __name__ == '__main__':
|
|
|
434
447
|
env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=False, render_mode="ansi")
|
|
435
448
|
env.reset()
|
|
436
449
|
|
|
437
|
-
# wrap the environment with the
|
|
450
|
+
# wrap the environment with the wrapper or a custom gymcts wrapper
|
|
438
451
|
env = ActionHistoryMCTSGymEnvWrapper(env)
|
|
439
452
|
|
|
440
453
|
# create the agent
|
|
@@ -505,11 +518,11 @@ clone the repository in your favorite code editor (for example PyCharm, VSCode,
|
|
|
505
518
|
|
|
506
519
|
using https:
|
|
507
520
|
```shell
|
|
508
|
-
git clone https://github.com/Alexander-Nasuta/
|
|
521
|
+
git clone https://github.com/Alexander-Nasuta/gymcts.git
|
|
509
522
|
```
|
|
510
523
|
or by using the GitHub CLI:
|
|
511
524
|
```shell
|
|
512
|
-
gh repo clone Alexander-Nasuta/
|
|
525
|
+
gh repo clone Alexander-Nasuta/gymcts
|
|
513
526
|
```
|
|
514
527
|
|
|
515
528
|
if you are using PyCharm, I recommend doing the following additional steps:
|
|
@@ -518,9 +531,6 @@ if you are using PyCharm, I recommend doing the following additional steps:
|
|
|
518
531
|
- mark the `tests` folder as test root (by right-clicking on the folder and selecting `Mark Directory as` -> `Test Sources Root`)
|
|
519
532
|
- mark the `resources` folder as resources root (by right-clicking on the folder and selecting `Mark Directory as` -> `Resources Root`)
|
|
520
533
|
|
|
521
|
-
at the end your project structure should look like this:
|
|
522
|
-
|
|
523
|
-
todo
|
|
524
534
|
|
|
525
535
|
### Create a Virtual Environment (optional)
|
|
526
536
|
|
|
@@ -576,9 +586,6 @@ This project uses `pytest` for testing. To run the tests, run the following comm
|
|
|
576
586
|
```shell
|
|
577
587
|
pytest
|
|
578
588
|
```
|
|
579
|
-
Here is a screenshot of what the output might look like:
|
|
580
|
-
|
|
581
|
-

|
|
582
589
|
|
|
583
590
|
For testing with `tox` run the following command:
|
|
584
591
|
|
|
@@ -586,12 +593,6 @@ For testing with `tox` run the following command:
|
|
|
586
593
|
tox
|
|
587
594
|
```
|
|
588
595
|
|
|
589
|
-
Here is a screenshot of what the output might look like:
|
|
590
|
-
|
|
591
|
-

|
|
592
|
-
|
|
593
|
-
Tox will run the tests in a separate environment and will also check if the requirements are installed correctly.
|
|
594
|
-
|
|
595
596
|
### Builing and Publishing the Project to PyPi
|
|
596
597
|
|
|
597
598
|
In order to publish the project to PyPi, the project needs to be built and then uploaded to PyPi.
|
|
@@ -630,7 +631,6 @@ sphinx-autobuild ./docs/source/ ./docs/build/html/
|
|
|
630
631
|
This project features most of the extensions featured in this Tutorial: [Document Your Scientific Project With Markdown, Sphinx, and Read the Docs | PyData Global 2021](https://www.youtube.com/watch?v=qRSb299awB0).
|
|
631
632
|
|
|
632
633
|
|
|
633
|
-
|
|
634
634
|
## Contact
|
|
635
635
|
|
|
636
636
|
If you have any questions or feedback, feel free to contact me via [email](mailto:alexander.nasuta@wzl-iqs.rwth-aachen.de) or open an issue on repository.
|
|
@@ -1,10 +1,17 @@
|
|
|
1
|
-
|
|
1
|
+
[](https://doi.org/10.5281/zenodo.15283390)
|
|
2
|
+
[](https://www.python.org/downloads/)
|
|
3
|
+
[](https://pypi.org/project/gymcts/)
|
|
4
|
+
[](https://github.com/Alexander-Nasuta/gymcts/blob/master/LICENSE)
|
|
5
|
+
[](https://gymcts.readthedocs.io/en/latest/?badge=latest)
|
|
6
|
+
|
|
7
|
+
# GYMCTS
|
|
2
8
|
|
|
3
9
|
A Monte Carlo Tree Search Implementation for Gymnasium-style Environments.
|
|
4
10
|
|
|
5
|
-
- Github: [GYMCTS on Github](https://github.com/Alexander-Nasuta/
|
|
6
|
-
-
|
|
7
|
-
-
|
|
11
|
+
- Github: [GYMCTS on Github](https://github.com/Alexander-Nasuta/gymcts)
|
|
12
|
+
- GitLab: [GYMCTS on GitLab](https://git-ce.rwth-aachen.de/alexander.nasuta/gymcts)
|
|
13
|
+
- Pypi: [GYMCTS on PyPi](https://pypi.org/project/gymcts/)
|
|
14
|
+
- Documentation: [GYMCTS Docs](https://gymcts.readthedocs.io/en/latest/)
|
|
8
15
|
|
|
9
16
|
## Description
|
|
10
17
|
|
|
@@ -32,22 +39,26 @@ The usage of a MCTS agent can roughly organised into the following steps:
|
|
|
32
39
|
- Render the solution
|
|
33
40
|
|
|
34
41
|
The GYMCTS package provides a two types of wrappers for Gymnasium-style environments:
|
|
35
|
-
- `
|
|
36
|
-
- `
|
|
42
|
+
- `DeepCopyMCTSGymEnvWrapper`: A wrapper that uses deepcopies of the environment to save a snapshot of the environment state for each node in the MCTS tree.
|
|
43
|
+
- `ActionHistoryMCTSGymEnvWrapper`: A wrapper that saves the action sequence that lead to the current state in the MCTS node.
|
|
37
44
|
|
|
38
|
-
These wrappers can be used with the `
|
|
39
|
-
The wrapper implement methods that are required by the `
|
|
45
|
+
These wrappers can be used with the `GymctsAgent` to solve the environment.
|
|
46
|
+
The wrapper implement methods that are required by the `GymctsAgent` to interact with the environment.
|
|
40
47
|
GYMCTS is designed to use a single environment instance and reconstructing the environment state form a state snapshot, when needed.
|
|
41
48
|
|
|
42
49
|
NOTE: MCTS works best when the return of an episode is in the range of [-1, 1]. Please adjust the reward function of the environment accordingly (or change the ubc-scaling parameter of the MCTS agent).
|
|
43
50
|
Adjusting the reward function of the environment is easily done with a [NormalizeReward](https://gymnasium.farama.org/api/wrappers/reward_wrappers/#gymnasium.wrappers.NormalizeReward) or [TransformReward](https://gymnasium.farama.org/api/wrappers/reward_wrappers/#gymnasium.wrappers.TransformReward) Wrapper.
|
|
51
|
+
```python
|
|
52
|
+
env = NormalizeReward(env, gamma=0.99, epsilon=1e-8)
|
|
53
|
+
```
|
|
44
54
|
|
|
45
|
-
|
|
46
|
-
env = TransformReward(env, lambda r: r /
|
|
47
|
-
|
|
55
|
+
```python
|
|
56
|
+
env = TransformReward(env, lambda r: r / n_steps_per_episode)
|
|
57
|
+
```
|
|
58
|
+
### FrozenLake Example (DeepCopyMCTSGymEnvWrapper)
|
|
48
59
|
|
|
49
60
|
A minimal example of how to use the package with the FrozenLake environment and the NaiveSoloMCTSGymEnvWrapper is provided in the following code snippet below.
|
|
50
|
-
The
|
|
61
|
+
The DeepCopyMCTSGymEnvWrapper can be used with non-deterministic environments, such as the FrozenLake environment with slippery ice.
|
|
51
62
|
|
|
52
63
|
```python
|
|
53
64
|
import gymnasium as gym
|
|
@@ -66,7 +77,7 @@ if __name__ == '__main__':
|
|
|
66
77
|
env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=True, render_mode="ansi")
|
|
67
78
|
env.reset()
|
|
68
79
|
|
|
69
|
-
# 1. wrap the environment with the
|
|
80
|
+
# 1. wrap the environment with the deep copy wrapper or a custom gymcts wrapper
|
|
70
81
|
env = DeepCopyMCTSGymEnvWrapper(env)
|
|
71
82
|
|
|
72
83
|
# 2. create the agent
|
|
@@ -89,7 +100,7 @@ if __name__ == '__main__':
|
|
|
89
100
|
|
|
90
101
|
# 5. print the solution
|
|
91
102
|
# read the solution from the info provided by the RecordEpisodeStatistics wrapper
|
|
92
|
-
# (that
|
|
103
|
+
# (that DeepCopyMCTSGymEnvWrapper uses internally)
|
|
93
104
|
episode_length = info["episode"]["l"]
|
|
94
105
|
episode_return = info["episode"]["r"]
|
|
95
106
|
|
|
@@ -182,7 +193,7 @@ if __name__ == '__main__':
|
|
|
182
193
|
env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=False, render_mode="rgb_array")
|
|
183
194
|
env.reset()
|
|
184
195
|
|
|
185
|
-
# 1. wrap the environment with the
|
|
196
|
+
# 1. wrap the environment with the deep copy wrapper or a custom gymcts wrapper
|
|
186
197
|
env = DeepCopyMCTSGymEnvWrapper(env)
|
|
187
198
|
|
|
188
199
|
# 2. create the agent
|
|
@@ -211,7 +222,7 @@ if __name__ == '__main__':
|
|
|
211
222
|
env.close()
|
|
212
223
|
|
|
213
224
|
# 5. print the solution
|
|
214
|
-
# read the solution from the info provided by the RecordEpisodeStatistics wrapper (that
|
|
225
|
+
# read the solution from the info provided by the RecordEpisodeStatistics wrapper (that DeepCopyMCTSGymEnvWrapper wraps internally)
|
|
215
226
|
episode_length = info["episode"]["l"]
|
|
216
227
|
episode_return = info["episode"]["r"]
|
|
217
228
|
|
|
@@ -252,13 +263,13 @@ import gymnasium as gym
|
|
|
252
263
|
from graph_jsp_env.disjunctive_graph_jsp_env import DisjunctiveGraphJspEnv
|
|
253
264
|
from jsp_instance_utils.instances import ft06, ft06_makespan
|
|
254
265
|
|
|
255
|
-
from gymcts.gymcts_agent import
|
|
256
|
-
from gymcts.
|
|
266
|
+
from gymcts.gymcts_agent import GymctsAgent
|
|
267
|
+
from gymcts.gymcts_env_abc import GymctsABC
|
|
257
268
|
|
|
258
269
|
from gymcts.logger import log
|
|
259
270
|
|
|
260
271
|
|
|
261
|
-
class GraphJspGYMCTSWrapper(
|
|
272
|
+
class GraphJspGYMCTSWrapper(GymctsABC, gym.Wrapper):
|
|
262
273
|
|
|
263
274
|
def __init__(self, env: DisjunctiveGraphJspEnv):
|
|
264
275
|
gym.Wrapper.__init__(self, env)
|
|
@@ -309,7 +320,7 @@ if __name__ == '__main__':
|
|
|
309
320
|
|
|
310
321
|
env = GraphJspGYMCTSWrapper(env)
|
|
311
322
|
|
|
312
|
-
agent =
|
|
323
|
+
agent = GymctsAgent(
|
|
313
324
|
env=env,
|
|
314
325
|
clear_mcts_tree_after_step=True,
|
|
315
326
|
render_tree_after_step=True,
|
|
@@ -352,7 +363,6 @@ import gymnasium as gym
|
|
|
352
363
|
|
|
353
364
|
from gymcts.gymcts_agent import GymctsAgent
|
|
354
365
|
from gymcts.gymcts_action_history_wrapper import ActionHistoryMCTSGymEnvWrapper
|
|
355
|
-
from gymcts.gymcts_deepcopy_wrapper import DeepCopyMCTSGymEnvWrapper
|
|
356
366
|
|
|
357
367
|
from gymcts.logger import log
|
|
358
368
|
|
|
@@ -365,7 +375,7 @@ if __name__ == '__main__':
|
|
|
365
375
|
env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=False, render_mode="ansi")
|
|
366
376
|
env.reset()
|
|
367
377
|
|
|
368
|
-
# wrap the environment with the
|
|
378
|
+
# wrap the environment with the wrapper or a custom gymcts wrapper
|
|
369
379
|
env = ActionHistoryMCTSGymEnvWrapper(env)
|
|
370
380
|
|
|
371
381
|
# create the agent
|
|
@@ -436,11 +446,11 @@ clone the repository in your favorite code editor (for example PyCharm, VSCode,
|
|
|
436
446
|
|
|
437
447
|
using https:
|
|
438
448
|
```shell
|
|
439
|
-
git clone https://github.com/Alexander-Nasuta/
|
|
449
|
+
git clone https://github.com/Alexander-Nasuta/gymcts.git
|
|
440
450
|
```
|
|
441
451
|
or by using the GitHub CLI:
|
|
442
452
|
```shell
|
|
443
|
-
gh repo clone Alexander-Nasuta/
|
|
453
|
+
gh repo clone Alexander-Nasuta/gymcts
|
|
444
454
|
```
|
|
445
455
|
|
|
446
456
|
if you are using PyCharm, I recommend doing the following additional steps:
|
|
@@ -449,9 +459,6 @@ if you are using PyCharm, I recommend doing the following additional steps:
|
|
|
449
459
|
- mark the `tests` folder as test root (by right-clicking on the folder and selecting `Mark Directory as` -> `Test Sources Root`)
|
|
450
460
|
- mark the `resources` folder as resources root (by right-clicking on the folder and selecting `Mark Directory as` -> `Resources Root`)
|
|
451
461
|
|
|
452
|
-
at the end your project structure should look like this:
|
|
453
|
-
|
|
454
|
-
todo
|
|
455
462
|
|
|
456
463
|
### Create a Virtual Environment (optional)
|
|
457
464
|
|
|
@@ -507,9 +514,6 @@ This project uses `pytest` for testing. To run the tests, run the following comm
|
|
|
507
514
|
```shell
|
|
508
515
|
pytest
|
|
509
516
|
```
|
|
510
|
-
Here is a screenshot of what the output might look like:
|
|
511
|
-
|
|
512
|
-

|
|
513
517
|
|
|
514
518
|
For testing with `tox` run the following command:
|
|
515
519
|
|
|
@@ -517,12 +521,6 @@ For testing with `tox` run the following command:
|
|
|
517
521
|
tox
|
|
518
522
|
```
|
|
519
523
|
|
|
520
|
-
Here is a screenshot of what the output might look like:
|
|
521
|
-
|
|
522
|
-

|
|
523
|
-
|
|
524
|
-
Tox will run the tests in a separate environment and will also check if the requirements are installed correctly.
|
|
525
|
-
|
|
526
524
|
### Builing and Publishing the Project to PyPi
|
|
527
525
|
|
|
528
526
|
In order to publish the project to PyPi, the project needs to be built and then uploaded to PyPi.
|
|
@@ -561,7 +559,6 @@ sphinx-autobuild ./docs/source/ ./docs/build/html/
|
|
|
561
559
|
This project features most of the extensions featured in this Tutorial: [Document Your Scientific Project With Markdown, Sphinx, and Read the Docs | PyData Global 2021](https://www.youtube.com/watch?v=qRSb299awB0).
|
|
562
560
|
|
|
563
561
|
|
|
564
|
-
|
|
565
562
|
## Contact
|
|
566
563
|
|
|
567
564
|
If you have any questions or feedback, feel free to contact me via [email](mailto:alexander.nasuta@wzl-iqs.rwth-aachen.de) or open an issue on repository.
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "gymcts"
|
|
7
|
-
version = "1.
|
|
7
|
+
version = "1.3.0"
|
|
8
8
|
description = "A minimalistic implementation of the Monte Carlo Tree Search algorithm for planning problems fomulated as gymnaisum reinforcement learning environments."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
authors = [{ name = "Alexander Nasuta", email = "alexander.nasuta@wzl-iqs.rwth-aachen.de" }]
|
|
@@ -32,7 +32,7 @@ examples = [
|
|
|
32
32
|
]
|
|
33
33
|
dev = [
|
|
34
34
|
"jsp-instance-utils",
|
|
35
|
-
"graph-matrix-jsp-env",
|
|
35
|
+
"graph-matrix-jsp-env>=0.3.0",
|
|
36
36
|
"graph-jsp-env",
|
|
37
37
|
"JSSEnv",
|
|
38
38
|
|
|
@@ -49,13 +49,16 @@ dev = [
|
|
|
49
49
|
"myst-parser", # .md support for sphinx
|
|
50
50
|
"sphinx-autobuild",
|
|
51
51
|
#
|
|
52
|
+
"sphinx-copybutton", # for code copy buttons
|
|
52
53
|
"furo", # cool theme
|
|
53
54
|
"twine",
|
|
54
55
|
"sphinx-copybutton", # for code copy buttons
|
|
55
56
|
"nbsphinx", # for jupyter notebook support in sphinx
|
|
57
|
+
"pandoc",
|
|
56
58
|
|
|
57
59
|
"jupytext", # converting .py examples to jupyter notebook jupytext --to notebook *.py
|
|
58
60
|
"jupyter", # for jupyter notebook kernel
|
|
61
|
+
"typing_extensions>=4.12.0",
|
|
59
62
|
]
|
|
60
63
|
|
|
61
64
|
[project.urls]
|
|
@@ -106,6 +106,18 @@ def wrap_with_color_codes(s: object, /, r: int | float, g: int | float, b: int |
|
|
|
106
106
|
|
|
107
107
|
|
|
108
108
|
def wrap_evenly_spaced_color(s: Any, n_of_item: int, n_classes: int, c_map="rainbow") -> str:
|
|
109
|
+
"""
|
|
110
|
+
Wraps a string with a color scale (a matplotlib c_map) based on the n_of_item and n_classes.
|
|
111
|
+
This function is used to color code the available actions in the MCTS tree visualisation.
|
|
112
|
+
The children of the MCTS tree are colored based on their action for a clearer visualisation.
|
|
113
|
+
|
|
114
|
+
:param s: the string (or object) to be wrapped. objects are converted to string (using the __str__ function).
|
|
115
|
+
:param n_of_item: the index of the item to be colored. In a mcts tree, this is the (parent-)action of the node.
|
|
116
|
+
:param n_classes: the number of classes (or items) to be colored. In a mcts tree, this is the number of available actions.
|
|
117
|
+
:param c_map: the colormap to be used (default is 'rainbow').
|
|
118
|
+
The colormap can be any matplotlib colormap, e.g. 'viridis', 'plasma', 'inferno', 'magma', 'cividis'.
|
|
119
|
+
:return: a string that contains the color-codes (prefix and suffix) and the string s in between.
|
|
120
|
+
"""
|
|
109
121
|
if s is None or n_of_item is None or n_classes is None:
|
|
110
122
|
return s
|
|
111
123
|
|
|
@@ -119,6 +131,16 @@ def wrap_evenly_spaced_color(s: Any, n_of_item: int, n_classes: int, c_map="rain
|
|
|
119
131
|
|
|
120
132
|
|
|
121
133
|
def wrap_with_color_scale(s: str, value: float, min_val: float, max_val: float, c_map=None) -> str:
|
|
134
|
+
"""
|
|
135
|
+
Wraps a string with a color scale (a matplotlib c_map) based on the value, min_val, and max_val.
|
|
136
|
+
|
|
137
|
+
:param s: the string to be wrapped
|
|
138
|
+
:param value: the value to be mapped to a color
|
|
139
|
+
:param min_val: the minimum value of the scale
|
|
140
|
+
:param max_val: the maximum value of the scale
|
|
141
|
+
:param c_map: the colormap to be used (default is 'rainbow')
|
|
142
|
+
:return:
|
|
143
|
+
"""
|
|
122
144
|
if s is None or min_val is None or max_val is None or min_val >= max_val:
|
|
123
145
|
return s
|
|
124
146
|
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
import random
|
|
2
|
-
import copy
|
|
3
2
|
|
|
4
3
|
import numpy as np
|
|
5
|
-
from typing import
|
|
4
|
+
from typing import Any, SupportsFloat, Callable
|
|
6
5
|
import gymnasium as gym
|
|
7
6
|
from gymnasium.core import WrapperActType, WrapperObsType
|
|
8
7
|
from gymnasium.wrappers import RecordEpisodeStatistics
|
|
@@ -13,6 +12,21 @@ from gymcts.logger import log
|
|
|
13
12
|
|
|
14
13
|
|
|
15
14
|
class ActionHistoryMCTSGymEnvWrapper(GymctsABC, gym.Wrapper):
|
|
15
|
+
"""
|
|
16
|
+
A wrapper for gym environments that implements the GymctsABC interface.
|
|
17
|
+
It uses the action history as state representation.
|
|
18
|
+
Please note that this is not the most efficient way to implement the state representation.
|
|
19
|
+
It is supposed to be used to see if your use-case works well with the MCTS algorithm.
|
|
20
|
+
If it does, you can consider implementing all GymctsABC methods in a more efficient way.
|
|
21
|
+
The action history is a list of actions taken in the environment.
|
|
22
|
+
The state is represented as a list of actions taken in the environment.
|
|
23
|
+
The state is used to restore the environment using the load_state method.
|
|
24
|
+
|
|
25
|
+
It is supposed to be used to see if your use-case works well with the MCTS algorithm.
|
|
26
|
+
If it does, you can consider implementing all GymctsABC methods in a more efficient way.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
# helper attributes for the wrapper
|
|
16
30
|
_terminal_flag: bool = False
|
|
17
31
|
_last_reward: SupportsFloat = 0
|
|
18
32
|
_step_tuple: tuple[WrapperObsType, SupportsFloat, bool, bool, dict[str, Any]] = None
|
|
@@ -25,6 +39,17 @@ class ActionHistoryMCTSGymEnvWrapper(GymctsABC, gym.Wrapper):
|
|
|
25
39
|
action_mask_fn: str | Callable[[gym.Env], np.ndarray] | None = None,
|
|
26
40
|
buffer_length: int = 100,
|
|
27
41
|
):
|
|
42
|
+
"""
|
|
43
|
+
A wrapper for gym environments that implements the GymctsABC interface.
|
|
44
|
+
It uses the action history as state representation.
|
|
45
|
+
Please note that this is not the most efficient way to implement the state representation.
|
|
46
|
+
It is supposed to be used to see if your use-case works well with the MCTS algorithm.
|
|
47
|
+
If it does, you can consider implementing all GymctsABC methods in a more efficient way.
|
|
48
|
+
|
|
49
|
+
:param env: the environment to wrap
|
|
50
|
+
:param action_mask_fn: a function that takes the environment as input and returns a mask of valid actions
|
|
51
|
+
:param buffer_length: the length of the buffer for recording episodes for determining their rollout returns
|
|
52
|
+
"""
|
|
28
53
|
# wrap with RecordEpisodeStatistics if it is not already wrapped
|
|
29
54
|
env = RecordEpisodeStatistics(env, buffer_length=buffer_length)
|
|
30
55
|
|
|
@@ -48,6 +73,17 @@ class ActionHistoryMCTSGymEnvWrapper(GymctsABC, gym.Wrapper):
|
|
|
48
73
|
self._action_mask_fn = action_mask_fn
|
|
49
74
|
|
|
50
75
|
def load_state(self, state: list[int]) -> None:
|
|
76
|
+
"""
|
|
77
|
+
Loads the state of the environment. The state is a list of actions taken in the environment.
|
|
78
|
+
|
|
79
|
+
The environment is reset and all actions in the state are performed in order to restore the environment to the
|
|
80
|
+
same state.
|
|
81
|
+
|
|
82
|
+
This works only for deterministic environments!
|
|
83
|
+
|
|
84
|
+
:param state: the state to load
|
|
85
|
+
:return: None
|
|
86
|
+
"""
|
|
51
87
|
self.env.reset()
|
|
52
88
|
self._wrapper_action_history = []
|
|
53
89
|
|
|
@@ -56,15 +92,30 @@ class ActionHistoryMCTSGymEnvWrapper(GymctsABC, gym.Wrapper):
|
|
|
56
92
|
self._wrapper_action_history.append(action)
|
|
57
93
|
|
|
58
94
|
def is_terminal(self) -> bool:
|
|
95
|
+
"""
|
|
96
|
+
Returns True if the environment is in a terminal state, False otherwise.
|
|
97
|
+
|
|
98
|
+
:return:
|
|
99
|
+
"""
|
|
59
100
|
if not len(self.get_valid_actions()):
|
|
60
101
|
return True
|
|
61
102
|
else:
|
|
62
103
|
return self._terminal_flag
|
|
63
104
|
|
|
64
105
|
def action_masks(self) -> np.ndarray | None:
|
|
106
|
+
"""
|
|
107
|
+
Returns the action masks for the environment. If the action_mask_fn is not set, it returns None.
|
|
108
|
+
|
|
109
|
+
:return:
|
|
110
|
+
"""
|
|
65
111
|
return self._action_mask_fn(self.env) if self._action_mask_fn is not None else None
|
|
66
112
|
|
|
67
113
|
def get_valid_actions(self) -> list[int]:
|
|
114
|
+
"""
|
|
115
|
+
Returns a list of valid actions for the current state of the environment.
|
|
116
|
+
|
|
117
|
+
:return: a list of valid actions
|
|
118
|
+
"""
|
|
68
119
|
if self._action_mask_fn is None:
|
|
69
120
|
action_space: gym.spaces.Discrete = self.env.action_space # Type hinting
|
|
70
121
|
return list(range(action_space.n))
|
|
@@ -72,6 +123,12 @@ class ActionHistoryMCTSGymEnvWrapper(GymctsABC, gym.Wrapper):
|
|
|
72
123
|
return [i for i, mask in enumerate(self.action_masks()) if mask]
|
|
73
124
|
|
|
74
125
|
def rollout(self) -> float:
|
|
126
|
+
"""
|
|
127
|
+
Performs a random rollout from the current state of the environment and returns the return (sum of rewards)
|
|
128
|
+
of the rollout.
|
|
129
|
+
|
|
130
|
+
:return: the return of the rollout
|
|
131
|
+
"""
|
|
75
132
|
log.debug("performing rollout")
|
|
76
133
|
# random rollout
|
|
77
134
|
# perform random valid action util terminal
|
|
@@ -92,11 +149,24 @@ class ActionHistoryMCTSGymEnvWrapper(GymctsABC, gym.Wrapper):
|
|
|
92
149
|
return episode_return
|
|
93
150
|
|
|
94
151
|
def get_state(self) -> list[int]:
|
|
152
|
+
"""
|
|
153
|
+
Returns the current state of the environment. The state is a list of actions taken in the environment,
|
|
154
|
+
namely all action that have been taken in the environment so far (since the last reset).
|
|
155
|
+
|
|
156
|
+
:return: a list of actions taken in the environment
|
|
157
|
+
"""
|
|
158
|
+
|
|
95
159
|
return self._wrapper_action_history.copy()
|
|
96
160
|
|
|
97
161
|
def step(
|
|
98
162
|
self, action: WrapperActType
|
|
99
163
|
) -> tuple[WrapperObsType, SupportsFloat, bool, bool, dict[str, Any]]:
|
|
164
|
+
"""
|
|
165
|
+
Performs a step in the environment. It adds the action to the action history and updates the terminal flag.
|
|
166
|
+
|
|
167
|
+
:param action: action to perform in the environment
|
|
168
|
+
:return: the step tuple of the environment (obs, reward, terminated, truncated, info)
|
|
169
|
+
"""
|
|
100
170
|
step_tuple = self.env.step(action)
|
|
101
171
|
self._wrapper_action_history.append(action)
|
|
102
172
|
obs, reward, terminated, truncated, info = step_tuple
|