gymcts 1.4.1__tar.gz → 1.4.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {gymcts-1.4.1/src/gymcts.egg-info → gymcts-1.4.3}/PKG-INFO +1 -1
  2. {gymcts-1.4.1 → gymcts-1.4.3}/pyproject.toml +1 -1
  3. {gymcts-1.4.1 → gymcts-1.4.3}/src/gymcts/gymcts_neural_agent.py +44 -61
  4. {gymcts-1.4.1 → gymcts-1.4.3/src/gymcts.egg-info}/PKG-INFO +1 -1
  5. {gymcts-1.4.1 → gymcts-1.4.3}/LICENSE +0 -0
  6. {gymcts-1.4.1 → gymcts-1.4.3}/MANIFEST.in +0 -0
  7. {gymcts-1.4.1 → gymcts-1.4.3}/README.md +0 -0
  8. {gymcts-1.4.1 → gymcts-1.4.3}/setup.cfg +0 -0
  9. {gymcts-1.4.1 → gymcts-1.4.3}/setup.py +0 -0
  10. {gymcts-1.4.1 → gymcts-1.4.3}/src/gymcts/__init__.py +0 -0
  11. {gymcts-1.4.1 → gymcts-1.4.3}/src/gymcts/colorful_console_utils.py +0 -0
  12. {gymcts-1.4.1 → gymcts-1.4.3}/src/gymcts/gymcts_action_history_wrapper.py +0 -0
  13. {gymcts-1.4.1 → gymcts-1.4.3}/src/gymcts/gymcts_agent.py +0 -0
  14. {gymcts-1.4.1 → gymcts-1.4.3}/src/gymcts/gymcts_deepcopy_wrapper.py +0 -0
  15. {gymcts-1.4.1 → gymcts-1.4.3}/src/gymcts/gymcts_distributed_agent.py +0 -0
  16. {gymcts-1.4.1 → gymcts-1.4.3}/src/gymcts/gymcts_env_abc.py +0 -0
  17. {gymcts-1.4.1 → gymcts-1.4.3}/src/gymcts/gymcts_node.py +0 -0
  18. {gymcts-1.4.1 → gymcts-1.4.3}/src/gymcts/gymcts_tree_plotter.py +0 -0
  19. {gymcts-1.4.1 → gymcts-1.4.3}/src/gymcts/logger.py +0 -0
  20. {gymcts-1.4.1 → gymcts-1.4.3}/src/gymcts.egg-info/SOURCES.txt +0 -0
  21. {gymcts-1.4.1 → gymcts-1.4.3}/src/gymcts.egg-info/dependency_links.txt +0 -0
  22. {gymcts-1.4.1 → gymcts-1.4.3}/src/gymcts.egg-info/not-zip-safe +0 -0
  23. {gymcts-1.4.1 → gymcts-1.4.3}/src/gymcts.egg-info/requires.txt +0 -0
  24. {gymcts-1.4.1 → gymcts-1.4.3}/src/gymcts.egg-info/top_level.txt +0 -0
  25. {gymcts-1.4.1 → gymcts-1.4.3}/tests/test_graph_matrix_jsp_env.py +0 -0
  26. {gymcts-1.4.1 → gymcts-1.4.3}/tests/test_gymnasium_envs.py +0 -0
  27. {gymcts-1.4.1 → gymcts-1.4.3}/tests/test_number_of_visits.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gymcts
3
- Version: 1.4.1
3
+ Version: 1.4.3
4
4
  Summary: A minimalistic implementation of the Monte Carlo Tree Search algorithm for planning problems fomulated as gymnaisum reinforcement learning environments.
5
5
  Author: Alexander Nasuta
6
6
  Author-email: Alexander Nasuta <alexander.nasuta@wzl-iqs.rwth-aachen.de>
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "gymcts"
7
- version = "1.4.1"
7
+ version = "1.4.3"
8
8
  description = "A minimalistic implementation of the Monte Carlo Tree Search algorithm for planning problems fomulated as gymnaisum reinforcement learning environments."
9
9
  readme = "README.md"
10
10
  authors = [{ name = "Alexander Nasuta", email = "alexander.nasuta@wzl-iqs.rwth-aachen.de" }]
@@ -55,14 +55,11 @@ class GraphJspNeuralGYMCTSWrapper(GymctsABC, gym.Wrapper):
55
55
  def get_state(self) -> Any:
56
56
  return env.unwrapped.get_action_history()
57
57
 
58
-
59
58
  def action_masks(self) -> np.ndarray | None:
60
59
  """Return the action mask for the current state."""
61
60
  return self.env.unwrapped.valid_action_mask()
62
61
 
63
62
 
64
-
65
-
66
63
  class GymctsNeuralNode(GymctsNode):
67
64
  PUCT_v3_mu = 0.95
68
65
 
@@ -126,15 +123,15 @@ class GymctsNeuralNode(GymctsNode):
126
123
  score_variate: Literal[
127
124
  "PUCT_v0",
128
125
  "PUCT_v1",
129
- "PUTC_v2",
130
- "PUTC_v3",
131
- "PUTC_v4",
132
- "PUTC_v5",
133
- "PUTC_v6",
134
- "PUTC_v7",
135
- "PUTC_v8",
136
- "PUTC_v9",
137
- "PUTC_v10",
126
+ "PUCT_v2",
127
+ "PUCT_v3",
128
+ "PUCT_v4",
129
+ "PUCT_v5",
130
+ "PUCT_v6",
131
+ "PUCT_v7",
132
+ "PUCT_v8",
133
+ "PUCT_v9",
134
+ "PUCT_v10",
138
135
  "MuZero_v0",
139
136
  "MuZero_v1",
140
137
  ] = "PUCT_v0"
@@ -146,13 +143,12 @@ class GymctsNeuralNode(GymctsNode):
146
143
  env_reference: GymctsABC,
147
144
  prior_selection_score: float,
148
145
  observation: np.ndarray | None = None,
149
- ):
146
+ ):
150
147
  super().__init__(action, parent, env_reference)
151
148
 
152
149
  self._obs = observation
153
150
  self._selection_score_prior = prior_selection_score
154
151
 
155
-
156
152
  def tree_policy_score(self) -> float:
157
153
  # call the superclass (GymctsNode) for ucb_score
158
154
  c = GymctsNode.ubc_c
@@ -167,7 +163,6 @@ class GymctsNeuralNode(GymctsNode):
167
163
  b = GymctsNode.best_action_weight
168
164
  exploitation_term = 0.0 if self.visit_count == 0 else (1 - b) * self.mean_value + b * self.max_value
169
165
 
170
-
171
166
  if GymctsNeuralNode.score_variate == "PUCT_v0":
172
167
  return exploitation_term + c * p_sa * math.sqrt(n_s) / (1 + n_sa)
173
168
  elif GymctsNeuralNode.score_variate == "PUCT_v1":
@@ -200,11 +195,10 @@ class GymctsNeuralNode(GymctsNode):
200
195
  c2 = GymctsNeuralNode.MuZero_c2
201
196
  return exploitation_term + c * p_sa * math.sqrt(n_s) / (1 + n_sa) * (c1 + math.log((n_s + c2 + 1) / c2))
202
197
 
203
-
204
- exploration_term = self._selection_score_prior * c * math.sqrt(math.log(self.parent.visit_count) / (self.visit_count)) if self.visit_count > 0 else float("inf")
198
+ exploration_term = self._selection_score_prior * c * math.sqrt(
199
+ math.log(self.parent.visit_count) / (self.visit_count)) if self.visit_count > 0 else float("inf")
205
200
  return self.mean_value + exploration_term
206
201
 
207
-
208
202
  def get_best_action(self) -> int:
209
203
  """
210
204
  Returns the best action of the node. The best action is the action with the highest score.
@@ -214,7 +208,6 @@ class GymctsNeuralNode(GymctsNode):
214
208
  """
215
209
  return max(self.children.values(), key=lambda child: child.max_value).action
216
210
 
217
-
218
211
  def __str__(self, colored=False, action_space_n=None) -> str:
219
212
  """
220
213
  Returns a string representation of the node. The string representation is used for visualisation purposes.
@@ -263,14 +256,13 @@ class GymctsNeuralNode(GymctsNode):
263
256
  root_node = self.get_root()
264
257
  mean_val = f"{self.mean_value:.2f}"
265
258
 
266
-
267
259
  return ((f"("
268
260
  f"{p}a{e}={ccu.wrap_evenly_spaced_color(s=self.action, n_of_item=self.action, n_classes=action_space_n)}, "
269
261
  f"{p}N{e}={colorful_value(self.visit_count)}, "
270
262
  f"{p}Q_v{e}={ccu.wrap_with_color_scale(s=mean_val, value=self.mean_value, min_val=root_node.min_value, max_val=root_node.max_value)}, "
271
263
  f"{p}best{e}={colorful_value(self.max_value)}") +
272
- (f", {p}{GymctsNeuralNode.score_variate}{e}={colorful_value(self.tree_policy_score())})" if not self.is_root() else ")"))
273
-
264
+ (
265
+ f", {p}{GymctsNeuralNode.score_variate}{e}={colorful_value(self.tree_policy_score())})" if not self.is_root() else ")"))
274
266
 
275
267
 
276
268
  class GymctsNeuralAgent(GymctsAgent):
@@ -282,15 +274,15 @@ class GymctsNeuralAgent(GymctsAgent):
282
274
  score_variate: Literal[
283
275
  "PUCT_v0",
284
276
  "PUCT_v1",
285
- "PUTC_v2",
286
- "PUTC_v3",
287
- "PUTC_v4",
288
- "PUTC_v5",
289
- "PUTC_v6",
290
- "PUTC_v7",
291
- "PUTC_v8",
292
- "PUTC_v9",
293
- "PUTC_v10",
277
+ "PUCT_v2",
278
+ "PUCT_v3",
279
+ "PUCT_v4",
280
+ "PUCT_v5",
281
+ "PUCT_v6",
282
+ "PUCT_v7",
283
+ "PUCT_v8",
284
+ "PUCT_v9",
285
+ "PUCT_v10",
294
286
  "MuZero_v0",
295
287
  "MuZero_v1",
296
288
  ] = "PUCT_v0",
@@ -304,15 +296,23 @@ class GymctsNeuralAgent(GymctsAgent):
304
296
  **kwargs
305
297
  )
306
298
  if score_variate not in [
307
- "PUCT_v0", "PUCT_v1", "PUTC_v2",
308
- "PUTC_v3", "PUTC_v4", "PUTC_v5",
309
- "PUTC_v6", "PUTC_v7", "PUTC_v8",
310
- "PUTC_v9", "PUTC_v10",
311
- "MuZero_v0", "MuZero_v1"
299
+ "PUCT_v0",
300
+ "PUCT_v1",
301
+ "PUCT_v2",
302
+ "PUCT_v3",
303
+ "PUCT_v4",
304
+ "PUCT_v5",
305
+ "PUCT_v6",
306
+ "PUCT_v7",
307
+ "PUCT_v8",
308
+ "PUCT_v9",
309
+ "PUCT_v10",
310
+ "MuZero_v0",
311
+ "MuZero_v1",
312
312
  ]:
313
313
  raise ValueError(f"Invalid score_variate: {score_variate}. Must be one of: "
314
- f"PUCT_v0, PUCT_v1, PUTC_v2, PUTC_v3, PUTC_v4, PUTC_v5, "
315
- f"PUTC_v6, PUTC_v7, PUTC_v8, PUTC_v9, PUTC_v10, MuZero_v0, MuZero_v1")
314
+ f"['PUCT_v0', 'PUCT_v1', 'PUCT_v2', 'PUCT_v3', 'PUCT_v4', 'PUCT_v5', "
315
+ f"'PUCT_v6', 'PUCT_v7', 'PUCT_v8', 'PUCT_v9', 'PUCT_v10', 'MuZero_v0', 'MuZero_v1']")
316
316
  GymctsNeuralNode.score_variate = score_variate
317
317
 
318
318
  if model_kwargs is None:
@@ -336,22 +336,17 @@ class GymctsNeuralAgent(GymctsAgent):
336
336
  env = ActionMasker(env, action_mask_fn=mask_fn)
337
337
 
338
338
  model_kwargs = {
339
- "policy": MaskableActorCriticPolicy,
340
- "env": env,
341
- "verbose": 1,
342
- } | model_kwargs
339
+ "policy": MaskableActorCriticPolicy,
340
+ "env": env,
341
+ "verbose": 1,
342
+ } | model_kwargs
343
343
 
344
344
  self._model = sb3_contrib.MaskablePPO(**model_kwargs)
345
345
 
346
-
347
-
348
-
349
-
350
- def learn(self, total_timesteps:int, **kwargs) -> None:
346
+ def learn(self, total_timesteps: int, **kwargs) -> None:
351
347
  """Learn from the environment using the MaskablePPO model."""
352
348
  self._model.learn(total_timesteps=total_timesteps, **kwargs)
353
349
 
354
-
355
350
  def expand_node(self, node: GymctsNeuralNode) -> None:
356
351
  log.debug(f"expanding node: {node}")
357
352
  # EXPANSION STRATEGY
@@ -395,7 +390,6 @@ class GymctsNeuralAgent(GymctsAgent):
395
390
  if prob == 0.0:
396
391
  continue
397
392
 
398
-
399
393
  assert action in node.valid_actions, f"Action {action} is not in valid actions: {node.valid_actions}"
400
394
 
401
395
  obs, reward, terminal, truncated, _ = self.env.step(action)
@@ -411,9 +405,6 @@ class GymctsNeuralAgent(GymctsAgent):
411
405
  # print(f"Expanded node {node} with {len(node.children)} children.")
412
406
 
413
407
 
414
-
415
-
416
-
417
408
  if __name__ == '__main__':
418
409
  log.setLevel(20)
419
410
 
@@ -426,14 +417,13 @@ if __name__ == '__main__':
426
417
  "reward_function": "nasuta",
427
418
  }
428
419
 
429
-
430
-
431
420
  env = DisjunctiveGraphJspEnv(**env_kwargs)
432
421
  env.reset()
433
422
 
434
423
  env = GraphJspNeuralGYMCTSWrapper(env)
435
424
 
436
425
  import torch
426
+
437
427
  model_kwargs = {
438
428
  "gamma": 0.99013,
439
429
  "gae_lambda": 0.9,
@@ -467,7 +457,6 @@ if __name__ == '__main__':
467
457
 
468
458
  agent.learn(total_timesteps=10_000)
469
459
 
470
-
471
460
  agent.solve()
472
461
 
473
462
  actions = agent.solve(render_tree_after_step=True)
@@ -477,9 +466,3 @@ if __name__ == '__main__':
477
466
  env.render()
478
467
  makespan = env.unwrapped.get_makespan()
479
468
  print(f"makespan: {makespan}")
480
-
481
-
482
-
483
-
484
-
485
-
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gymcts
3
- Version: 1.4.1
3
+ Version: 1.4.3
4
4
  Summary: A minimalistic implementation of the Monte Carlo Tree Search algorithm for planning problems fomulated as gymnaisum reinforcement learning environments.
5
5
  Author: Alexander Nasuta
6
6
  Author-email: Alexander Nasuta <alexander.nasuta@wzl-iqs.rwth-aachen.de>
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes