gymcts 1.2.0__tar.gz → 1.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {gymcts-1.2.0/src/gymcts.egg-info → gymcts-1.3.0}/PKG-INFO +39 -39
  2. {gymcts-1.2.0 → gymcts-1.3.0}/README.md +34 -37
  3. {gymcts-1.2.0 → gymcts-1.3.0}/pyproject.toml +5 -2
  4. {gymcts-1.2.0 → gymcts-1.3.0}/setup.cfg +0 -3
  5. {gymcts-1.2.0 → gymcts-1.3.0}/src/gymcts/colorful_console_utils.py +22 -0
  6. {gymcts-1.2.0 → gymcts-1.3.0}/src/gymcts/gymcts_action_history_wrapper.py +72 -2
  7. {gymcts-1.2.0 → gymcts-1.3.0}/src/gymcts/gymcts_agent.py +54 -7
  8. {gymcts-1.2.0 → gymcts-1.3.0}/src/gymcts/gymcts_deepcopy_wrapper.py +59 -2
  9. {gymcts-1.2.0 → gymcts-1.3.0}/src/gymcts/gymcts_distributed_agent.py +30 -12
  10. gymcts-1.3.0/src/gymcts/gymcts_env_abc.py +71 -0
  11. gymcts-1.3.0/src/gymcts/gymcts_neural_agent.py +479 -0
  12. gymcts-1.3.0/src/gymcts/gymcts_node.py +343 -0
  13. {gymcts-1.2.0 → gymcts-1.3.0}/src/gymcts/gymcts_tree_plotter.py +22 -1
  14. {gymcts-1.2.0 → gymcts-1.3.0}/src/gymcts/logger.py +1 -4
  15. {gymcts-1.2.0 → gymcts-1.3.0/src/gymcts.egg-info}/PKG-INFO +39 -39
  16. {gymcts-1.2.0 → gymcts-1.3.0}/src/gymcts.egg-info/SOURCES.txt +1 -0
  17. {gymcts-1.2.0 → gymcts-1.3.0}/src/gymcts.egg-info/requires.txt +4 -2
  18. gymcts-1.2.0/src/gymcts/gymcts_env_abc.py +0 -28
  19. gymcts-1.2.0/src/gymcts/gymcts_node.py +0 -199
  20. {gymcts-1.2.0 → gymcts-1.3.0}/LICENSE +0 -0
  21. {gymcts-1.2.0 → gymcts-1.3.0}/MANIFEST.in +0 -0
  22. {gymcts-1.2.0 → gymcts-1.3.0}/setup.py +0 -0
  23. {gymcts-1.2.0 → gymcts-1.3.0}/src/gymcts/__init__.py +0 -0
  24. {gymcts-1.2.0 → gymcts-1.3.0}/src/gymcts.egg-info/dependency_links.txt +0 -0
  25. {gymcts-1.2.0 → gymcts-1.3.0}/src/gymcts.egg-info/not-zip-safe +0 -0
  26. {gymcts-1.2.0 → gymcts-1.3.0}/src/gymcts.egg-info/top_level.txt +0 -0
  27. {gymcts-1.2.0 → gymcts-1.3.0}/tests/test_graph_matrix_jsp_env.py +0 -0
  28. {gymcts-1.2.0 → gymcts-1.3.0}/tests/test_gymnasium_envs.py +0 -0
  29. {gymcts-1.2.0 → gymcts-1.3.0}/tests/test_number_of_visits.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gymcts
3
- Version: 1.2.0
3
+ Version: 1.3.0
4
4
  Summary: A minimalistic implementation of the Monte Carlo Tree Search algorithm for planning problems fomulated as gymnaisum reinforcement learning environments.
5
5
  Author: Alexander Nasuta
6
6
  Author-email: Alexander Nasuta <alexander.nasuta@wzl-iqs.rwth-aachen.de>
@@ -47,7 +47,7 @@ Requires-Dist: graph-matrix-jsp-env; extra == "examples"
47
47
  Requires-Dist: graph-jsp-env; extra == "examples"
48
48
  Provides-Extra: dev
49
49
  Requires-Dist: jsp-instance-utils; extra == "dev"
50
- Requires-Dist: graph-matrix-jsp-env; extra == "dev"
50
+ Requires-Dist: graph-matrix-jsp-env>=0.3.0; extra == "dev"
51
51
  Requires-Dist: graph-jsp-env; extra == "dev"
52
52
  Requires-Dist: JSSEnv; extra == "dev"
53
53
  Requires-Dist: pip-tools; extra == "dev"
@@ -59,21 +59,31 @@ Requires-Dist: stable_baselines3; extra == "dev"
59
59
  Requires-Dist: sphinx; extra == "dev"
60
60
  Requires-Dist: myst-parser; extra == "dev"
61
61
  Requires-Dist: sphinx-autobuild; extra == "dev"
62
+ Requires-Dist: sphinx-copybutton; extra == "dev"
62
63
  Requires-Dist: furo; extra == "dev"
63
64
  Requires-Dist: twine; extra == "dev"
64
65
  Requires-Dist: sphinx-copybutton; extra == "dev"
65
66
  Requires-Dist: nbsphinx; extra == "dev"
67
+ Requires-Dist: pandoc; extra == "dev"
66
68
  Requires-Dist: jupytext; extra == "dev"
67
69
  Requires-Dist: jupyter; extra == "dev"
70
+ Requires-Dist: typing_extensions>=4.12.0; extra == "dev"
68
71
  Dynamic: license-file
69
72
 
70
- # Graph Matrix Job Shop Env
73
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.15283390.svg)](https://doi.org/10.5281/zenodo.15283390)
74
+ [![Python Badge](https://img.shields.io/badge/Python-3776AB?logo=python&logoColor=fff&style=flat)](https://www.python.org/downloads/)
75
+ [![PyPI version](https://img.shields.io/pypi/v/gymcts)](https://pypi.org/project/gymcts/)
76
+ [![License](https://img.shields.io/pypi/l/gymcts)](https://github.com/Alexander-Nasuta/gymcts/blob/master/LICENSE)
77
+ [![Documentation Status](https://readthedocs.org/projects/gymcts/badge/?version=latest)](https://gymcts.readthedocs.io/en/latest/?badge=latest)
78
+
79
+ # GYMCTS
71
80
 
72
81
  A Monte Carlo Tree Search Implementation for Gymnasium-style Environments.
73
82
 
74
- - Github: [GYMCTS on Github](https://github.com/Alexander-Nasuta/GraphMatrixJobShopEnv)
75
- - Pypi: [GYMCTS on PyPi](https://pypi.org/project/graph-matrix-jsp-env/)
76
- - Documentation: [GYMCTS Docs](https://graphmatrixjobshopenv.readthedocs.io/en/latest/)
83
+ - Github: [GYMCTS on Github](https://github.com/Alexander-Nasuta/gymcts)
84
+ - GitLab: [GYMCTS on GitLab](https://git-ce.rwth-aachen.de/alexander.nasuta/gymcts)
85
+ - Pypi: [GYMCTS on PyPi](https://pypi.org/project/gymcts/)
86
+ - Documentation: [GYMCTS Docs](https://gymcts.readthedocs.io/en/latest/)
77
87
 
78
88
  ## Description
79
89
 
@@ -101,22 +111,26 @@ The usage of a MCTS agent can roughly organised into the following steps:
101
111
  - Render the solution
102
112
 
103
113
  The GYMCTS package provides a two types of wrappers for Gymnasium-style environments:
104
- - `NaiveSoloMCTSGymEnvWrapper`: A wrapper that uses deepcopies of the environment to save a snapshot of the environment state for each node in the MCTS tree.
105
- - `DeterministicSoloMCTSGymEnvWrapper`: A wrapper that saves the action sequence that lead to the current state in the MCTS node.
114
+ - `DeepCopyMCTSGymEnvWrapper`: A wrapper that uses deepcopies of the environment to save a snapshot of the environment state for each node in the MCTS tree.
115
+ - `ActionHistoryMCTSGymEnvWrapper`: A wrapper that saves the action sequence that lead to the current state in the MCTS node.
106
116
 
107
- These wrappers can be used with the `SoloMCTSAgent` to solve the environment.
108
- The wrapper implement methods that are required by the `SoloMCTSAgent` to interact with the environment.
117
+ These wrappers can be used with the `GymctsAgent` to solve the environment.
118
+ The wrapper implement methods that are required by the `GymctsAgent` to interact with the environment.
109
119
  GYMCTS is designed to use a single environment instance and reconstructing the environment state form a state snapshot, when needed.
110
120
 
111
121
  NOTE: MCTS works best when the return of an episode is in the range of [-1, 1]. Please adjust the reward function of the environment accordingly (or change the ubc-scaling parameter of the MCTS agent).
112
122
  Adjusting the reward function of the environment is easily done with a [NormalizeReward](https://gymnasium.farama.org/api/wrappers/reward_wrappers/#gymnasium.wrappers.NormalizeReward) or [TransformReward](https://gymnasium.farama.org/api/wrappers/reward_wrappers/#gymnasium.wrappers.TransformReward) Wrapper.
123
+ ```python
124
+ env = NormalizeReward(env, gamma=0.99, epsilon=1e-8)
125
+ ```
113
126
 
114
- NormalizeReward(env, gamma=0.99, epsilon=1e-8)
115
- env = TransformReward(env, lambda r: r / 36)
116
- ### FrozenLake Example (NaiveSoloMCTSGymEnvWrapper)
127
+ ```python
128
+ env = TransformReward(env, lambda r: r / n_steps_per_episode)
129
+ ```
130
+ ### FrozenLake Example (DeepCopyMCTSGymEnvWrapper)
117
131
 
118
132
  A minimal example of how to use the package with the FrozenLake environment and the NaiveSoloMCTSGymEnvWrapper is provided in the following code snippet below.
119
- The NaiveSoloMCTSGymEnvWrapper can be used with non-deterministic environments, such as the FrozenLake environment with slippery ice.
133
+ The DeepCopyMCTSGymEnvWrapper can be used with non-deterministic environments, such as the FrozenLake environment with slippery ice.
120
134
 
121
135
  ```python
122
136
  import gymnasium as gym
@@ -135,7 +149,7 @@ if __name__ == '__main__':
135
149
  env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=True, render_mode="ansi")
136
150
  env.reset()
137
151
 
138
- # 1. wrap the environment with the naive wrapper or a custom gymcts wrapper
152
+ # 1. wrap the environment with the deep copy wrapper or a custom gymcts wrapper
139
153
  env = DeepCopyMCTSGymEnvWrapper(env)
140
154
 
141
155
  # 2. create the agent
@@ -158,7 +172,7 @@ if __name__ == '__main__':
158
172
 
159
173
  # 5. print the solution
160
174
  # read the solution from the info provided by the RecordEpisodeStatistics wrapper
161
- # (that NaiveSoloMCTSGymEnvWrapper uses internally)
175
+ # (that DeepCopyMCTSGymEnvWrapper uses internally)
162
176
  episode_length = info["episode"]["l"]
163
177
  episode_return = info["episode"]["r"]
164
178
 
@@ -251,7 +265,7 @@ if __name__ == '__main__':
251
265
  env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=False, render_mode="rgb_array")
252
266
  env.reset()
253
267
 
254
- # 1. wrap the environment with the naive wrapper or a custom gymcts wrapper
268
+ # 1. wrap the environment with the deep copy wrapper or a custom gymcts wrapper
255
269
  env = DeepCopyMCTSGymEnvWrapper(env)
256
270
 
257
271
  # 2. create the agent
@@ -280,7 +294,7 @@ if __name__ == '__main__':
280
294
  env.close()
281
295
 
282
296
  # 5. print the solution
283
- # read the solution from the info provided by the RecordEpisodeStatistics wrapper (that NaiveSoloMCTSGymEnvWrapper wraps internally)
297
+ # read the solution from the info provided by the RecordEpisodeStatistics wrapper (that DeepCopyMCTSGymEnvWrapper wraps internally)
284
298
  episode_length = info["episode"]["l"]
285
299
  episode_return = info["episode"]["r"]
286
300
 
@@ -321,13 +335,13 @@ import gymnasium as gym
321
335
  from graph_jsp_env.disjunctive_graph_jsp_env import DisjunctiveGraphJspEnv
322
336
  from jsp_instance_utils.instances import ft06, ft06_makespan
323
337
 
324
- from gymcts.gymcts_agent import SoloMCTSAgent
325
- from gymcts.gymcts_gym_env import SoloMCTSGymEnv
338
+ from gymcts.gymcts_agent import GymctsAgent
339
+ from gymcts.gymcts_env_abc import GymctsABC
326
340
 
327
341
  from gymcts.logger import log
328
342
 
329
343
 
330
- class GraphJspGYMCTSWrapper(SoloMCTSGymEnv, gym.Wrapper):
344
+ class GraphJspGYMCTSWrapper(GymctsABC, gym.Wrapper):
331
345
 
332
346
  def __init__(self, env: DisjunctiveGraphJspEnv):
333
347
  gym.Wrapper.__init__(self, env)
@@ -378,7 +392,7 @@ if __name__ == '__main__':
378
392
 
379
393
  env = GraphJspGYMCTSWrapper(env)
380
394
 
381
- agent = SoloMCTSAgent(
395
+ agent = GymctsAgent(
382
396
  env=env,
383
397
  clear_mcts_tree_after_step=True,
384
398
  render_tree_after_step=True,
@@ -421,7 +435,6 @@ import gymnasium as gym
421
435
 
422
436
  from gymcts.gymcts_agent import GymctsAgent
423
437
  from gymcts.gymcts_action_history_wrapper import ActionHistoryMCTSGymEnvWrapper
424
- from gymcts.gymcts_deepcopy_wrapper import DeepCopyMCTSGymEnvWrapper
425
438
 
426
439
  from gymcts.logger import log
427
440
 
@@ -434,7 +447,7 @@ if __name__ == '__main__':
434
447
  env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=False, render_mode="ansi")
435
448
  env.reset()
436
449
 
437
- # wrap the environment with the naive wrapper or a custom gymcts wrapper
450
+ # wrap the environment with the wrapper or a custom gymcts wrapper
438
451
  env = ActionHistoryMCTSGymEnvWrapper(env)
439
452
 
440
453
  # create the agent
@@ -505,11 +518,11 @@ clone the repository in your favorite code editor (for example PyCharm, VSCode,
505
518
 
506
519
  using https:
507
520
  ```shell
508
- git clone https://github.com/Alexander-Nasuta/todo
521
+ git clone https://github.com/Alexander-Nasuta/gymcts.git
509
522
  ```
510
523
  or by using the GitHub CLI:
511
524
  ```shell
512
- gh repo clone Alexander-Nasuta/todo
525
+ gh repo clone Alexander-Nasuta/gymcts
513
526
  ```
514
527
 
515
528
  if you are using PyCharm, I recommend doing the following additional steps:
@@ -518,9 +531,6 @@ if you are using PyCharm, I recommend doing the following additional steps:
518
531
  - mark the `tests` folder as test root (by right-clicking on the folder and selecting `Mark Directory as` -> `Test Sources Root`)
519
532
  - mark the `resources` folder as resources root (by right-clicking on the folder and selecting `Mark Directory as` -> `Resources Root`)
520
533
 
521
- at the end your project structure should look like this:
522
-
523
- todo
524
534
 
525
535
  ### Create a Virtual Environment (optional)
526
536
 
@@ -576,9 +586,6 @@ This project uses `pytest` for testing. To run the tests, run the following comm
576
586
  ```shell
577
587
  pytest
578
588
  ```
579
- Here is a screenshot of what the output might look like:
580
-
581
- ![](https://github.com/Alexander-Nasuta/GraphMatrixJobShopEnv/raw/master/resources/pytest-screenshot.png)
582
589
 
583
590
  For testing with `tox` run the following command:
584
591
 
@@ -586,12 +593,6 @@ For testing with `tox` run the following command:
586
593
  tox
587
594
  ```
588
595
 
589
- Here is a screenshot of what the output might look like:
590
-
591
- ![](https://github.com/Alexander-Nasuta/GraphMatrixJobShopEnv/raw/master/resources/tox-screenshot.png)
592
-
593
- Tox will run the tests in a separate environment and will also check if the requirements are installed correctly.
594
-
595
596
  ### Builing and Publishing the Project to PyPi
596
597
 
597
598
  In order to publish the project to PyPi, the project needs to be built and then uploaded to PyPi.
@@ -630,7 +631,6 @@ sphinx-autobuild ./docs/source/ ./docs/build/html/
630
631
  This project features most of the extensions featured in this Tutorial: [Document Your Scientific Project With Markdown, Sphinx, and Read the Docs | PyData Global 2021](https://www.youtube.com/watch?v=qRSb299awB0).
631
632
 
632
633
 
633
-
634
634
  ## Contact
635
635
 
636
636
  If you have any questions or feedback, feel free to contact me via [email](mailto:alexander.nasuta@wzl-iqs.rwth-aachen.de) or open an issue on repository.
@@ -1,10 +1,17 @@
1
- # Graph Matrix Job Shop Env
1
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.15283390.svg)](https://doi.org/10.5281/zenodo.15283390)
2
+ [![Python Badge](https://img.shields.io/badge/Python-3776AB?logo=python&logoColor=fff&style=flat)](https://www.python.org/downloads/)
3
+ [![PyPI version](https://img.shields.io/pypi/v/gymcts)](https://pypi.org/project/gymcts/)
4
+ [![License](https://img.shields.io/pypi/l/gymcts)](https://github.com/Alexander-Nasuta/gymcts/blob/master/LICENSE)
5
+ [![Documentation Status](https://readthedocs.org/projects/gymcts/badge/?version=latest)](https://gymcts.readthedocs.io/en/latest/?badge=latest)
6
+
7
+ # GYMCTS
2
8
 
3
9
  A Monte Carlo Tree Search Implementation for Gymnasium-style Environments.
4
10
 
5
- - Github: [GYMCTS on Github](https://github.com/Alexander-Nasuta/GraphMatrixJobShopEnv)
6
- - Pypi: [GYMCTS on PyPi](https://pypi.org/project/graph-matrix-jsp-env/)
7
- - Documentation: [GYMCTS Docs](https://graphmatrixjobshopenv.readthedocs.io/en/latest/)
11
+ - Github: [GYMCTS on Github](https://github.com/Alexander-Nasuta/gymcts)
12
+ - GitLab: [GYMCTS on GitLab](https://git-ce.rwth-aachen.de/alexander.nasuta/gymcts)
13
+ - Pypi: [GYMCTS on PyPi](https://pypi.org/project/gymcts/)
14
+ - Documentation: [GYMCTS Docs](https://gymcts.readthedocs.io/en/latest/)
8
15
 
9
16
  ## Description
10
17
 
@@ -32,22 +39,26 @@ The usage of a MCTS agent can roughly organised into the following steps:
32
39
  - Render the solution
33
40
 
34
41
  The GYMCTS package provides a two types of wrappers for Gymnasium-style environments:
35
- - `NaiveSoloMCTSGymEnvWrapper`: A wrapper that uses deepcopies of the environment to save a snapshot of the environment state for each node in the MCTS tree.
36
- - `DeterministicSoloMCTSGymEnvWrapper`: A wrapper that saves the action sequence that lead to the current state in the MCTS node.
42
+ - `DeepCopyMCTSGymEnvWrapper`: A wrapper that uses deepcopies of the environment to save a snapshot of the environment state for each node in the MCTS tree.
43
+ - `ActionHistoryMCTSGymEnvWrapper`: A wrapper that saves the action sequence that lead to the current state in the MCTS node.
37
44
 
38
- These wrappers can be used with the `SoloMCTSAgent` to solve the environment.
39
- The wrapper implement methods that are required by the `SoloMCTSAgent` to interact with the environment.
45
+ These wrappers can be used with the `GymctsAgent` to solve the environment.
46
+ The wrapper implement methods that are required by the `GymctsAgent` to interact with the environment.
40
47
  GYMCTS is designed to use a single environment instance and reconstructing the environment state form a state snapshot, when needed.
41
48
 
42
49
  NOTE: MCTS works best when the return of an episode is in the range of [-1, 1]. Please adjust the reward function of the environment accordingly (or change the ubc-scaling parameter of the MCTS agent).
43
50
  Adjusting the reward function of the environment is easily done with a [NormalizeReward](https://gymnasium.farama.org/api/wrappers/reward_wrappers/#gymnasium.wrappers.NormalizeReward) or [TransformReward](https://gymnasium.farama.org/api/wrappers/reward_wrappers/#gymnasium.wrappers.TransformReward) Wrapper.
51
+ ```python
52
+ env = NormalizeReward(env, gamma=0.99, epsilon=1e-8)
53
+ ```
44
54
 
45
- NormalizeReward(env, gamma=0.99, epsilon=1e-8)
46
- env = TransformReward(env, lambda r: r / 36)
47
- ### FrozenLake Example (NaiveSoloMCTSGymEnvWrapper)
55
+ ```python
56
+ env = TransformReward(env, lambda r: r / n_steps_per_episode)
57
+ ```
58
+ ### FrozenLake Example (DeepCopyMCTSGymEnvWrapper)
48
59
 
49
60
  A minimal example of how to use the package with the FrozenLake environment and the NaiveSoloMCTSGymEnvWrapper is provided in the following code snippet below.
50
- The NaiveSoloMCTSGymEnvWrapper can be used with non-deterministic environments, such as the FrozenLake environment with slippery ice.
61
+ The DeepCopyMCTSGymEnvWrapper can be used with non-deterministic environments, such as the FrozenLake environment with slippery ice.
51
62
 
52
63
  ```python
53
64
  import gymnasium as gym
@@ -66,7 +77,7 @@ if __name__ == '__main__':
66
77
  env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=True, render_mode="ansi")
67
78
  env.reset()
68
79
 
69
- # 1. wrap the environment with the naive wrapper or a custom gymcts wrapper
80
+ # 1. wrap the environment with the deep copy wrapper or a custom gymcts wrapper
70
81
  env = DeepCopyMCTSGymEnvWrapper(env)
71
82
 
72
83
  # 2. create the agent
@@ -89,7 +100,7 @@ if __name__ == '__main__':
89
100
 
90
101
  # 5. print the solution
91
102
  # read the solution from the info provided by the RecordEpisodeStatistics wrapper
92
- # (that NaiveSoloMCTSGymEnvWrapper uses internally)
103
+ # (that DeepCopyMCTSGymEnvWrapper uses internally)
93
104
  episode_length = info["episode"]["l"]
94
105
  episode_return = info["episode"]["r"]
95
106
 
@@ -182,7 +193,7 @@ if __name__ == '__main__':
182
193
  env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=False, render_mode="rgb_array")
183
194
  env.reset()
184
195
 
185
- # 1. wrap the environment with the naive wrapper or a custom gymcts wrapper
196
+ # 1. wrap the environment with the deep copy wrapper or a custom gymcts wrapper
186
197
  env = DeepCopyMCTSGymEnvWrapper(env)
187
198
 
188
199
  # 2. create the agent
@@ -211,7 +222,7 @@ if __name__ == '__main__':
211
222
  env.close()
212
223
 
213
224
  # 5. print the solution
214
- # read the solution from the info provided by the RecordEpisodeStatistics wrapper (that NaiveSoloMCTSGymEnvWrapper wraps internally)
225
+ # read the solution from the info provided by the RecordEpisodeStatistics wrapper (that DeepCopyMCTSGymEnvWrapper wraps internally)
215
226
  episode_length = info["episode"]["l"]
216
227
  episode_return = info["episode"]["r"]
217
228
 
@@ -252,13 +263,13 @@ import gymnasium as gym
252
263
  from graph_jsp_env.disjunctive_graph_jsp_env import DisjunctiveGraphJspEnv
253
264
  from jsp_instance_utils.instances import ft06, ft06_makespan
254
265
 
255
- from gymcts.gymcts_agent import SoloMCTSAgent
256
- from gymcts.gymcts_gym_env import SoloMCTSGymEnv
266
+ from gymcts.gymcts_agent import GymctsAgent
267
+ from gymcts.gymcts_env_abc import GymctsABC
257
268
 
258
269
  from gymcts.logger import log
259
270
 
260
271
 
261
- class GraphJspGYMCTSWrapper(SoloMCTSGymEnv, gym.Wrapper):
272
+ class GraphJspGYMCTSWrapper(GymctsABC, gym.Wrapper):
262
273
 
263
274
  def __init__(self, env: DisjunctiveGraphJspEnv):
264
275
  gym.Wrapper.__init__(self, env)
@@ -309,7 +320,7 @@ if __name__ == '__main__':
309
320
 
310
321
  env = GraphJspGYMCTSWrapper(env)
311
322
 
312
- agent = SoloMCTSAgent(
323
+ agent = GymctsAgent(
313
324
  env=env,
314
325
  clear_mcts_tree_after_step=True,
315
326
  render_tree_after_step=True,
@@ -352,7 +363,6 @@ import gymnasium as gym
352
363
 
353
364
  from gymcts.gymcts_agent import GymctsAgent
354
365
  from gymcts.gymcts_action_history_wrapper import ActionHistoryMCTSGymEnvWrapper
355
- from gymcts.gymcts_deepcopy_wrapper import DeepCopyMCTSGymEnvWrapper
356
366
 
357
367
  from gymcts.logger import log
358
368
 
@@ -365,7 +375,7 @@ if __name__ == '__main__':
365
375
  env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=False, render_mode="ansi")
366
376
  env.reset()
367
377
 
368
- # wrap the environment with the naive wrapper or a custom gymcts wrapper
378
+ # wrap the environment with the wrapper or a custom gymcts wrapper
369
379
  env = ActionHistoryMCTSGymEnvWrapper(env)
370
380
 
371
381
  # create the agent
@@ -436,11 +446,11 @@ clone the repository in your favorite code editor (for example PyCharm, VSCode,
436
446
 
437
447
  using https:
438
448
  ```shell
439
- git clone https://github.com/Alexander-Nasuta/todo
449
+ git clone https://github.com/Alexander-Nasuta/gymcts.git
440
450
  ```
441
451
  or by using the GitHub CLI:
442
452
  ```shell
443
- gh repo clone Alexander-Nasuta/todo
453
+ gh repo clone Alexander-Nasuta/gymcts
444
454
  ```
445
455
 
446
456
  if you are using PyCharm, I recommend doing the following additional steps:
@@ -449,9 +459,6 @@ if you are using PyCharm, I recommend doing the following additional steps:
449
459
  - mark the `tests` folder as test root (by right-clicking on the folder and selecting `Mark Directory as` -> `Test Sources Root`)
450
460
  - mark the `resources` folder as resources root (by right-clicking on the folder and selecting `Mark Directory as` -> `Resources Root`)
451
461
 
452
- at the end your project structure should look like this:
453
-
454
- todo
455
462
 
456
463
  ### Create a Virtual Environment (optional)
457
464
 
@@ -507,9 +514,6 @@ This project uses `pytest` for testing. To run the tests, run the following comm
507
514
  ```shell
508
515
  pytest
509
516
  ```
510
- Here is a screenshot of what the output might look like:
511
-
512
- ![](https://github.com/Alexander-Nasuta/GraphMatrixJobShopEnv/raw/master/resources/pytest-screenshot.png)
513
517
 
514
518
  For testing with `tox` run the following command:
515
519
 
@@ -517,12 +521,6 @@ For testing with `tox` run the following command:
517
521
  tox
518
522
  ```
519
523
 
520
- Here is a screenshot of what the output might look like:
521
-
522
- ![](https://github.com/Alexander-Nasuta/GraphMatrixJobShopEnv/raw/master/resources/tox-screenshot.png)
523
-
524
- Tox will run the tests in a separate environment and will also check if the requirements are installed correctly.
525
-
526
524
  ### Builing and Publishing the Project to PyPi
527
525
 
528
526
  In order to publish the project to PyPi, the project needs to be built and then uploaded to PyPi.
@@ -561,7 +559,6 @@ sphinx-autobuild ./docs/source/ ./docs/build/html/
561
559
  This project features most of the extensions featured in this Tutorial: [Document Your Scientific Project With Markdown, Sphinx, and Read the Docs | PyData Global 2021](https://www.youtube.com/watch?v=qRSb299awB0).
562
560
 
563
561
 
564
-
565
562
  ## Contact
566
563
 
567
564
  If you have any questions or feedback, feel free to contact me via [email](mailto:alexander.nasuta@wzl-iqs.rwth-aachen.de) or open an issue on repository.
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "gymcts"
7
- version = "1.2.0"
7
+ version = "1.3.0"
8
8
  description = "A minimalistic implementation of the Monte Carlo Tree Search algorithm for planning problems fomulated as gymnaisum reinforcement learning environments."
9
9
  readme = "README.md"
10
10
  authors = [{ name = "Alexander Nasuta", email = "alexander.nasuta@wzl-iqs.rwth-aachen.de" }]
@@ -32,7 +32,7 @@ examples = [
32
32
  ]
33
33
  dev = [
34
34
  "jsp-instance-utils",
35
- "graph-matrix-jsp-env",
35
+ "graph-matrix-jsp-env>=0.3.0",
36
36
  "graph-jsp-env",
37
37
  "JSSEnv",
38
38
 
@@ -49,13 +49,16 @@ dev = [
49
49
  "myst-parser", # .md support for sphinx
50
50
  "sphinx-autobuild",
51
51
  #
52
+ "sphinx-copybutton", # for code copy buttons
52
53
  "furo", # cool theme
53
54
  "twine",
54
55
  "sphinx-copybutton", # for code copy buttons
55
56
  "nbsphinx", # for jupyter notebook support in sphinx
57
+ "pandoc",
56
58
 
57
59
  "jupytext", # converting .py examples to jupyter notebook jupytext --to notebook *.py
58
60
  "jupyter", # for jupyter notebook kernel
61
+ "typing_extensions>=4.12.0",
59
62
  ]
60
63
 
61
64
  [project.urls]
@@ -25,9 +25,6 @@ testing =
25
25
  flake8>=3.9
26
26
  tox>=3.24
27
27
 
28
- [options.package_data]
29
- phantomderopfa = py.typed
30
-
31
28
  [flake8]
32
29
  max-line-length = 160
33
30
 
@@ -106,6 +106,18 @@ def wrap_with_color_codes(s: object, /, r: int | float, g: int | float, b: int |
106
106
 
107
107
 
108
108
  def wrap_evenly_spaced_color(s: Any, n_of_item: int, n_classes: int, c_map="rainbow") -> str:
109
+ """
110
+ Wraps a string with a color scale (a matplotlib c_map) based on the n_of_item and n_classes.
111
+ This function is used to color code the available actions in the MCTS tree visualisation.
112
+ The children of the MCTS tree are colored based on their action for a clearer visualisation.
113
+
114
+ :param s: the string (or object) to be wrapped. objects are converted to string (using the __str__ function).
115
+ :param n_of_item: the index of the item to be colored. In a mcts tree, this is the (parent-)action of the node.
116
+ :param n_classes: the number of classes (or items) to be colored. In a mcts tree, this is the number of available actions.
117
+ :param c_map: the colormap to be used (default is 'rainbow').
118
+ The colormap can be any matplotlib colormap, e.g. 'viridis', 'plasma', 'inferno', 'magma', 'cividis'.
119
+ :return: a string that contains the color-codes (prefix and suffix) and the string s in between.
120
+ """
109
121
  if s is None or n_of_item is None or n_classes is None:
110
122
  return s
111
123
 
@@ -119,6 +131,16 @@ def wrap_evenly_spaced_color(s: Any, n_of_item: int, n_classes: int, c_map="rain
119
131
 
120
132
 
121
133
  def wrap_with_color_scale(s: str, value: float, min_val: float, max_val: float, c_map=None) -> str:
134
+ """
135
+ Wraps a string with a color scale (a matplotlib c_map) based on the value, min_val, and max_val.
136
+
137
+ :param s: the string to be wrapped
138
+ :param value: the value to be mapped to a color
139
+ :param min_val: the minimum value of the scale
140
+ :param max_val: the maximum value of the scale
141
+ :param c_map: the colormap to be used (default is 'rainbow')
142
+ :return:
143
+ """
122
144
  if s is None or min_val is None or max_val is None or min_val >= max_val:
123
145
  return s
124
146
 
@@ -1,8 +1,7 @@
1
1
  import random
2
- import copy
3
2
 
4
3
  import numpy as np
5
- from typing import TypeVar, Any, SupportsFloat, Callable
4
+ from typing import Any, SupportsFloat, Callable
6
5
  import gymnasium as gym
7
6
  from gymnasium.core import WrapperActType, WrapperObsType
8
7
  from gymnasium.wrappers import RecordEpisodeStatistics
@@ -13,6 +12,21 @@ from gymcts.logger import log
13
12
 
14
13
 
15
14
  class ActionHistoryMCTSGymEnvWrapper(GymctsABC, gym.Wrapper):
15
+ """
16
+ A wrapper for gym environments that implements the GymctsABC interface.
17
+ It uses the action history as state representation.
18
+ Please note that this is not the most efficient way to implement the state representation.
19
+ It is supposed to be used to see if your use-case works well with the MCTS algorithm.
20
+ If it does, you can consider implementing all GymctsABC methods in a more efficient way.
21
+ The action history is a list of actions taken in the environment.
22
+ The state is represented as a list of actions taken in the environment.
23
+ The state is used to restore the environment using the load_state method.
24
+
25
+ It is supposed to be used to see if your use-case works well with the MCTS algorithm.
26
+ If it does, you can consider implementing all GymctsABC methods in a more efficient way.
27
+ """
28
+
29
+ # helper attributes for the wrapper
16
30
  _terminal_flag: bool = False
17
31
  _last_reward: SupportsFloat = 0
18
32
  _step_tuple: tuple[WrapperObsType, SupportsFloat, bool, bool, dict[str, Any]] = None
@@ -25,6 +39,17 @@ class ActionHistoryMCTSGymEnvWrapper(GymctsABC, gym.Wrapper):
25
39
  action_mask_fn: str | Callable[[gym.Env], np.ndarray] | None = None,
26
40
  buffer_length: int = 100,
27
41
  ):
42
+ """
43
+ A wrapper for gym environments that implements the GymctsABC interface.
44
+ It uses the action history as state representation.
45
+ Please note that this is not the most efficient way to implement the state representation.
46
+ It is supposed to be used to see if your use-case works well with the MCTS algorithm.
47
+ If it does, you can consider implementing all GymctsABC methods in a more efficient way.
48
+
49
+ :param env: the environment to wrap
50
+ :param action_mask_fn: a function that takes the environment as input and returns a mask of valid actions
51
+ :param buffer_length: the length of the buffer for recording episodes for determining their rollout returns
52
+ """
28
53
  # wrap with RecordEpisodeStatistics if it is not already wrapped
29
54
  env = RecordEpisodeStatistics(env, buffer_length=buffer_length)
30
55
 
@@ -48,6 +73,17 @@ class ActionHistoryMCTSGymEnvWrapper(GymctsABC, gym.Wrapper):
48
73
  self._action_mask_fn = action_mask_fn
49
74
 
50
75
  def load_state(self, state: list[int]) -> None:
76
+ """
77
+ Loads the state of the environment. The state is a list of actions taken in the environment.
78
+
79
+ The environment is reset and all actions in the state are performed in order to restore the environment to the
80
+ same state.
81
+
82
+ This works only for deterministic environments!
83
+
84
+ :param state: the state to load
85
+ :return: None
86
+ """
51
87
  self.env.reset()
52
88
  self._wrapper_action_history = []
53
89
 
@@ -56,15 +92,30 @@ class ActionHistoryMCTSGymEnvWrapper(GymctsABC, gym.Wrapper):
56
92
  self._wrapper_action_history.append(action)
57
93
 
58
94
  def is_terminal(self) -> bool:
95
+ """
96
+ Returns True if the environment is in a terminal state, False otherwise.
97
+
98
+ :return:
99
+ """
59
100
  if not len(self.get_valid_actions()):
60
101
  return True
61
102
  else:
62
103
  return self._terminal_flag
63
104
 
64
105
  def action_masks(self) -> np.ndarray | None:
106
+ """
107
+ Returns the action masks for the environment. If the action_mask_fn is not set, it returns None.
108
+
109
+ :return:
110
+ """
65
111
  return self._action_mask_fn(self.env) if self._action_mask_fn is not None else None
66
112
 
67
113
  def get_valid_actions(self) -> list[int]:
114
+ """
115
+ Returns a list of valid actions for the current state of the environment.
116
+
117
+ :return: a list of valid actions
118
+ """
68
119
  if self._action_mask_fn is None:
69
120
  action_space: gym.spaces.Discrete = self.env.action_space # Type hinting
70
121
  return list(range(action_space.n))
@@ -72,6 +123,12 @@ class ActionHistoryMCTSGymEnvWrapper(GymctsABC, gym.Wrapper):
72
123
  return [i for i, mask in enumerate(self.action_masks()) if mask]
73
124
 
74
125
  def rollout(self) -> float:
126
+ """
127
+ Performs a random rollout from the current state of the environment and returns the return (sum of rewards)
128
+ of the rollout.
129
+
130
+ :return: the return of the rollout
131
+ """
75
132
  log.debug("performing rollout")
76
133
  # random rollout
77
134
  # perform random valid action util terminal
@@ -92,11 +149,24 @@ class ActionHistoryMCTSGymEnvWrapper(GymctsABC, gym.Wrapper):
92
149
  return episode_return
93
150
 
94
151
  def get_state(self) -> list[int]:
152
+ """
153
+ Returns the current state of the environment. The state is a list of actions taken in the environment,
154
+ namely all action that have been taken in the environment so far (since the last reset).
155
+
156
+ :return: a list of actions taken in the environment
157
+ """
158
+
95
159
  return self._wrapper_action_history.copy()
96
160
 
97
161
  def step(
98
162
  self, action: WrapperActType
99
163
  ) -> tuple[WrapperObsType, SupportsFloat, bool, bool, dict[str, Any]]:
164
+ """
165
+ Performs a step in the environment. It adds the action to the action history and updates the terminal flag.
166
+
167
+ :param action: action to perform in the environment
168
+ :return: the step tuple of the environment (obs, reward, terminated, truncated, info)
169
+ """
100
170
  step_tuple = self.env.step(action)
101
171
  self._wrapper_action_history.append(action)
102
172
  obs, reward, terminated, truncated, info = step_tuple