job-shop-lib 0.5.1__py3-none-any.whl → 1.0.0a1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- job_shop_lib/__init__.py +16 -8
- job_shop_lib/{base_solver.py → _base_solver.py} +1 -1
- job_shop_lib/{job_shop_instance.py → _job_shop_instance.py} +9 -4
- job_shop_lib/_operation.py +95 -0
- job_shop_lib/{schedule.py → _schedule.py} +73 -54
- job_shop_lib/{scheduled_operation.py → _scheduled_operation.py} +13 -37
- job_shop_lib/benchmarking/__init__.py +66 -43
- job_shop_lib/benchmarking/_load_benchmark.py +88 -0
- job_shop_lib/constraint_programming/__init__.py +13 -0
- job_shop_lib/{cp_sat/ortools_solver.py → constraint_programming/_ortools_solver.py} +57 -18
- job_shop_lib/dispatching/__init__.py +45 -41
- job_shop_lib/dispatching/{dispatcher.py → _dispatcher.py} +153 -80
- job_shop_lib/dispatching/_dispatcher_observer_config.py +54 -0
- job_shop_lib/dispatching/_factories.py +125 -0
- job_shop_lib/dispatching/{history_tracker.py → _history_observer.py} +4 -6
- job_shop_lib/dispatching/{pruning_functions.py → _ready_operation_filters.py} +6 -35
- job_shop_lib/dispatching/_unscheduled_operations_observer.py +69 -0
- job_shop_lib/dispatching/feature_observers/__init__.py +16 -10
- job_shop_lib/dispatching/feature_observers/{composite_feature_observer.py → _composite_feature_observer.py} +84 -2
- job_shop_lib/dispatching/feature_observers/{duration_observer.py → _duration_observer.py} +6 -17
- job_shop_lib/dispatching/feature_observers/{earliest_start_time_observer.py → _earliest_start_time_observer.py} +114 -35
- job_shop_lib/dispatching/feature_observers/{factory.py → _factory.py} +31 -5
- job_shop_lib/dispatching/feature_observers/{feature_observer.py → _feature_observer.py} +59 -16
- job_shop_lib/dispatching/feature_observers/_is_completed_observer.py +97 -0
- job_shop_lib/dispatching/feature_observers/_is_ready_observer.py +33 -0
- job_shop_lib/dispatching/feature_observers/{position_in_job_observer.py → _position_in_job_observer.py} +1 -8
- job_shop_lib/dispatching/feature_observers/{remaining_operations_observer.py → _remaining_operations_observer.py} +8 -26
- job_shop_lib/dispatching/rules/__init__.py +51 -0
- job_shop_lib/dispatching/rules/_dispatching_rule_factory.py +82 -0
- job_shop_lib/dispatching/{dispatching_rule_solver.py → rules/_dispatching_rule_solver.py} +44 -15
- job_shop_lib/dispatching/{dispatching_rules.py → rules/_dispatching_rules_functions.py} +74 -21
- job_shop_lib/dispatching/rules/_machine_chooser_factory.py +69 -0
- job_shop_lib/dispatching/rules/_utils.py +127 -0
- job_shop_lib/exceptions.py +18 -0
- job_shop_lib/generation/__init__.py +2 -2
- job_shop_lib/generation/{general_instance_generator.py → _general_instance_generator.py} +26 -7
- job_shop_lib/generation/{instance_generator.py → _instance_generator.py} +13 -3
- job_shop_lib/graphs/__init__.py +17 -6
- job_shop_lib/graphs/{job_shop_graph.py → _job_shop_graph.py} +81 -2
- job_shop_lib/graphs/{node.py → _node.py} +18 -12
- job_shop_lib/graphs/graph_updaters/__init__.py +13 -0
- job_shop_lib/graphs/graph_updaters/_graph_updater.py +59 -0
- job_shop_lib/graphs/graph_updaters/_residual_graph_updater.py +154 -0
- job_shop_lib/graphs/graph_updaters/_utils.py +25 -0
- job_shop_lib/reinforcement_learning/__init__.py +41 -0
- job_shop_lib/reinforcement_learning/_multi_job_shop_graph_env.py +366 -0
- job_shop_lib/reinforcement_learning/_reward_observers.py +85 -0
- job_shop_lib/reinforcement_learning/_single_job_shop_graph_env.py +337 -0
- job_shop_lib/reinforcement_learning/_types_and_constants.py +61 -0
- job_shop_lib/reinforcement_learning/_utils.py +96 -0
- job_shop_lib/visualization/__init__.py +20 -4
- job_shop_lib/visualization/{agent_task_graph.py → _agent_task_graph.py} +28 -9
- job_shop_lib/visualization/_gantt_chart_creator.py +219 -0
- job_shop_lib/visualization/_gantt_chart_video_and_gif_creation.py +388 -0
- {job_shop_lib-0.5.1.dist-info → job_shop_lib-1.0.0a1.dist-info}/METADATA +68 -44
- job_shop_lib-1.0.0a1.dist-info/RECORD +66 -0
- job_shop_lib/benchmarking/load_benchmark.py +0 -142
- job_shop_lib/cp_sat/__init__.py +0 -5
- job_shop_lib/dispatching/factories.py +0 -206
- job_shop_lib/dispatching/feature_observers/is_completed_observer.py +0 -98
- job_shop_lib/dispatching/feature_observers/is_ready_observer.py +0 -40
- job_shop_lib/generators/__init__.py +0 -8
- job_shop_lib/generators/basic_generator.py +0 -200
- job_shop_lib/generators/transformations.py +0 -164
- job_shop_lib/operation.py +0 -122
- job_shop_lib/visualization/create_gif.py +0 -209
- job_shop_lib-0.5.1.dist-info/RECORD +0 -52
- /job_shop_lib/dispatching/feature_observers/{is_scheduled_observer.py → _is_scheduled_observer.py} +0 -0
- /job_shop_lib/generation/{transformations.py → _transformations.py} +0 -0
- /job_shop_lib/graphs/{build_agent_task_graph.py → _build_agent_task_graph.py} +0 -0
- /job_shop_lib/graphs/{build_disjunctive_graph.py → _build_disjunctive_graph.py} +0 -0
- /job_shop_lib/graphs/{constants.py → _constants.py} +0 -0
- /job_shop_lib/visualization/{disjunctive_graph.py → _disjunctive_graph.py} +0 -0
- /job_shop_lib/visualization/{gantt_chart.py → _gantt_chart.py} +0 -0
- {job_shop_lib-0.5.1.dist-info → job_shop_lib-1.0.0a1.dist-info}/LICENSE +0 -0
- {job_shop_lib-0.5.1.dist-info → job_shop_lib-1.0.0a1.dist-info}/WHEEL +0 -0
@@ -0,0 +1,366 @@
|
|
1
|
+
"""Home of the `GraphEnvironment` class."""
|
2
|
+
|
3
|
+
from collections import defaultdict
|
4
|
+
from collections.abc import Callable, Sequence
|
5
|
+
from typing import Any
|
6
|
+
from copy import deepcopy
|
7
|
+
|
8
|
+
import gymnasium as gym
|
9
|
+
import numpy as np
|
10
|
+
|
11
|
+
from job_shop_lib import JobShopInstance, Operation
|
12
|
+
from job_shop_lib.dispatching import (
|
13
|
+
Dispatcher,
|
14
|
+
filter_dominated_operations,
|
15
|
+
DispatcherObserverConfig,
|
16
|
+
)
|
17
|
+
from job_shop_lib.dispatching.feature_observers import FeatureObserverConfig
|
18
|
+
from job_shop_lib.generation import InstanceGenerator
|
19
|
+
from job_shop_lib.graphs import JobShopGraph, build_agent_task_graph
|
20
|
+
from job_shop_lib.graphs.graph_updaters import (
|
21
|
+
GraphUpdater,
|
22
|
+
ResidualGraphUpdater,
|
23
|
+
)
|
24
|
+
from job_shop_lib.reinforcement_learning import (
|
25
|
+
SingleJobShopGraphEnv,
|
26
|
+
RewardObserver,
|
27
|
+
RenderConfig,
|
28
|
+
MakespanReward,
|
29
|
+
ObservationDict,
|
30
|
+
ObservationSpaceKey,
|
31
|
+
add_padding,
|
32
|
+
)
|
33
|
+
|
34
|
+
|
35
|
+
class MultiJobShopGraphEnv(gym.Env):
|
36
|
+
"""Gymnasium environment for solving multiple Job Shop Scheduling Problems
|
37
|
+
using reinforcement learning and Graph Neural Networks.
|
38
|
+
|
39
|
+
This environment generates a new Job Shop Scheduling Problem instance
|
40
|
+
for each reset, creates a graph representation, and manages the scheduling
|
41
|
+
process using a :class:`~job_shop_lib.dispatching.Dispatcher`.
|
42
|
+
|
43
|
+
The observation space includes:
|
44
|
+
|
45
|
+
- removed_nodes: Binary vector indicating removed nodes.
|
46
|
+
- edge_index: Edge list in COO format.
|
47
|
+
- operations: Matrix of operation features.
|
48
|
+
- jobs: Matrix of job features (if applicable).
|
49
|
+
- machines: Matrix of machine features (if applicable).
|
50
|
+
|
51
|
+
Internally, the class creates a
|
52
|
+
:class:`~job_shop_lib.reinforcement_learning.SingleJobShopGraphEnv`
|
53
|
+
environment to manage the scheduling process for each
|
54
|
+
:class:`~job_shop_lib.JobShopInstance`.
|
55
|
+
|
56
|
+
Attributes:
|
57
|
+
instance_generator:
|
58
|
+
A :class:`~job_shop_lib.generation.InstanceGenerator` that
|
59
|
+
generates a new problem instance on each reset.
|
60
|
+
action_space:
|
61
|
+
:class:`gymnasium.spaces.Discrete`) action space with size equal to
|
62
|
+
the maximum number of jobs.
|
63
|
+
observation_space:
|
64
|
+
Dictionary of observation spaces. Keys are defined in
|
65
|
+
:class:`~job_shop_lib.reinforcement_learning.ObservationSpaceKey`.
|
66
|
+
single_job_shop_graph_env:
|
67
|
+
Environment for a specific Job Shop Scheduling Problem instance.
|
68
|
+
See :class:`SingleJobShopGraphEnv`.
|
69
|
+
graph_initializer:
|
70
|
+
Function to create the initial graph representation. It should
|
71
|
+
take a :class:`~job_shop_lib.JobShopInstance` as input and return
|
72
|
+
a :class:`~job_shop_lib.graphs.JobShopGraph`.
|
73
|
+
render_mode:
|
74
|
+
Rendering mode for visualization. Supported modes are:
|
75
|
+
- human: Renders the current Gannt chart.
|
76
|
+
- save_video: Saves a video of the Gantt chart. Used only if the
|
77
|
+
schedule is completed.
|
78
|
+
- save_gif: Saves a GIF of the Gantt chart. Used only if the
|
79
|
+
schedule is completed.
|
80
|
+
render_config:
|
81
|
+
Configuration for rendering. See
|
82
|
+
:class:`~job_shop_lib.RenderConfig`.
|
83
|
+
feature_observer_configs:
|
84
|
+
List of :class:`~job_shop_lib.dispatching.DispatcherObserverConfig`
|
85
|
+
for feature observers.
|
86
|
+
reward_function_config:
|
87
|
+
Configuration for the reward function. See
|
88
|
+
:class:`~job_shop_lib.dispatching.DispatcherObserverConfig` and
|
89
|
+
:class:`~job_shop_lib.dispatching.RewardObserver`.
|
90
|
+
graph_updater_config:
|
91
|
+
Configuration for the graph updater. The graph updater is used to
|
92
|
+
update the graph representation after each action. See
|
93
|
+
:class:`~job_shop_lib.dispatching.DispatcherObserverConfig` and
|
94
|
+
:class:`~job_shop_lib.graphs.GraphUpdater`.
|
95
|
+
"""
|
96
|
+
|
97
|
+
def __init__(
|
98
|
+
self,
|
99
|
+
instance_generator: InstanceGenerator,
|
100
|
+
feature_observer_configs: Sequence[FeatureObserverConfig],
|
101
|
+
graph_initializer: Callable[
|
102
|
+
[JobShopInstance], JobShopGraph
|
103
|
+
] = build_agent_task_graph,
|
104
|
+
graph_updater_config: DispatcherObserverConfig[
|
105
|
+
type[GraphUpdater]
|
106
|
+
] = DispatcherObserverConfig(class_type=ResidualGraphUpdater),
|
107
|
+
ready_operations_filter: Callable[
|
108
|
+
[Dispatcher, list[Operation]], list[Operation]
|
109
|
+
] = filter_dominated_operations,
|
110
|
+
reward_function_config: DispatcherObserverConfig[
|
111
|
+
type[RewardObserver]
|
112
|
+
] = DispatcherObserverConfig(class_type=MakespanReward),
|
113
|
+
render_mode: str | None = None,
|
114
|
+
render_config: RenderConfig | None = None,
|
115
|
+
use_padding: bool = True,
|
116
|
+
) -> None:
|
117
|
+
"""Initializes the environment.
|
118
|
+
|
119
|
+
Args:
|
120
|
+
instance_generator:
|
121
|
+
A :class:`~job_shop_lib.generation.InstanceGenerator` that
|
122
|
+
generates a new problem instance on each reset.
|
123
|
+
feature_observer_configs:
|
124
|
+
Configurations for feature observers. Each configuration
|
125
|
+
should be a
|
126
|
+
:class:`~job_shop_lib.dispatching.DispatcherObserverConfig`
|
127
|
+
with a class type that inherits from
|
128
|
+
:class:`~job_shop_lib.dispatching.FeatureObserver` or a string
|
129
|
+
or enum that represents a built-in feature observer.
|
130
|
+
graph_initializer:
|
131
|
+
Function to create the initial graph representation.
|
132
|
+
If ``None``, the default graph initializer is used:
|
133
|
+
:func:`~job_shop_lib.graphs.build_agent_task_graph`.
|
134
|
+
graph_updater_config:
|
135
|
+
Configuration for the graph updater. The graph updater is used
|
136
|
+
to update the graph representation after each action. If
|
137
|
+
``None``, the default graph updater is used:
|
138
|
+
:class:`~job_shop_lib.graphs.ResidualGraphUpdater`.
|
139
|
+
ready_operations_filter:
|
140
|
+
Function to filter ready operations. If ``None``, the default
|
141
|
+
filter is used:
|
142
|
+
:func:`~job_shop_lib.dispatching.filter_dominated_operations`.
|
143
|
+
reward_function_config:
|
144
|
+
Configuration for the reward function. If ``None``, the default
|
145
|
+
reward function is used:
|
146
|
+
:class:`~job_shop_lib.dispatching.MakespanReward`.
|
147
|
+
render_mode:
|
148
|
+
Rendering mode for visualization. Supported modes are:
|
149
|
+
- human: Renders the current Gannt chart.
|
150
|
+
- save_video: Saves a video of the Gantt chart. Used only if
|
151
|
+
the schedule is completed.
|
152
|
+
- save_gif: Saves a GIF of the Gantt chart. Used only if the
|
153
|
+
schedule is completed.
|
154
|
+
render_config:
|
155
|
+
Configuration for rendering. See
|
156
|
+
:class:`~job_shop_lib.RenderConfig`.
|
157
|
+
use_padding:
|
158
|
+
Whether to use padding in observations. If True, all matrices
|
159
|
+
are padded to fixed sizes based on the maximum instance size.
|
160
|
+
Values are padded with -1, except for the "removed_nodes" key,
|
161
|
+
which is padded with ``True``, indicating that the node is
|
162
|
+
removed.
|
163
|
+
"""
|
164
|
+
super().__init__()
|
165
|
+
|
166
|
+
# Create an instance with the maximum size
|
167
|
+
instance_with_max_size = instance_generator.generate(
|
168
|
+
num_jobs=instance_generator.max_num_jobs,
|
169
|
+
num_machines=instance_generator.max_num_machines,
|
170
|
+
)
|
171
|
+
graph = graph_initializer(instance_with_max_size)
|
172
|
+
|
173
|
+
self.single_job_shop_graph_env = SingleJobShopGraphEnv(
|
174
|
+
job_shop_graph=graph,
|
175
|
+
feature_observer_configs=feature_observer_configs,
|
176
|
+
reward_function_config=reward_function_config,
|
177
|
+
graph_updater_config=graph_updater_config,
|
178
|
+
ready_operations_filter=ready_operations_filter,
|
179
|
+
render_mode=render_mode,
|
180
|
+
render_config=render_config,
|
181
|
+
use_padding=use_padding,
|
182
|
+
)
|
183
|
+
self.instance_generator = instance_generator
|
184
|
+
self.graph_initializer = graph_initializer
|
185
|
+
self.render_mode = render_mode
|
186
|
+
self.render_config = render_config
|
187
|
+
self.feature_observer_configs = feature_observer_configs
|
188
|
+
self.reward_function_config = reward_function_config
|
189
|
+
self.graph_updater_config = graph_updater_config
|
190
|
+
|
191
|
+
self.action_space = deepcopy(
|
192
|
+
self.single_job_shop_graph_env.action_space
|
193
|
+
)
|
194
|
+
self.observation_space: gym.spaces.Dict = deepcopy(
|
195
|
+
self.single_job_shop_graph_env.observation_space
|
196
|
+
)
|
197
|
+
|
198
|
+
@property
|
199
|
+
def dispatcher(self) -> Dispatcher:
|
200
|
+
"""Returns the current dispatcher instance."""
|
201
|
+
return self.single_job_shop_graph_env.dispatcher
|
202
|
+
|
203
|
+
@property
|
204
|
+
def reward_function(self) -> RewardObserver:
|
205
|
+
"""Returns the current reward function instance."""
|
206
|
+
return self.single_job_shop_graph_env.reward_function
|
207
|
+
|
208
|
+
@reward_function.setter
|
209
|
+
def reward_function(self, reward_function: RewardObserver) -> None:
|
210
|
+
"""Sets the reward function instance."""
|
211
|
+
self.single_job_shop_graph_env.reward_function = reward_function
|
212
|
+
|
213
|
+
@property
|
214
|
+
def ready_operations_filter(
|
215
|
+
self,
|
216
|
+
) -> Callable[[Dispatcher, list[Operation]], list[Operation]] | None:
|
217
|
+
"""Returns the current ready operations filter."""
|
218
|
+
return (
|
219
|
+
self.single_job_shop_graph_env.dispatcher.ready_operations_filter
|
220
|
+
)
|
221
|
+
|
222
|
+
@ready_operations_filter.setter
|
223
|
+
def ready_operations_filter(
|
224
|
+
self,
|
225
|
+
pruning_function: Callable[
|
226
|
+
[Dispatcher, list[Operation]], list[Operation]
|
227
|
+
],
|
228
|
+
) -> None:
|
229
|
+
"""Sets the ready operations filter."""
|
230
|
+
self.single_job_shop_graph_env.dispatcher.ready_operations_filter = (
|
231
|
+
pruning_function
|
232
|
+
)
|
233
|
+
|
234
|
+
@property
|
235
|
+
def use_padding(self) -> bool:
|
236
|
+
"""Returns whether the padding is used."""
|
237
|
+
return self.single_job_shop_graph_env.use_padding
|
238
|
+
|
239
|
+
@use_padding.setter
|
240
|
+
def use_padding(self, use_padding: bool) -> None:
|
241
|
+
"""Sets whether the padding is used."""
|
242
|
+
self.single_job_shop_graph_env.use_padding = use_padding
|
243
|
+
|
244
|
+
@property
|
245
|
+
def job_shop_graph(self) -> JobShopGraph:
|
246
|
+
"""Returns the current job shop graph."""
|
247
|
+
return self.single_job_shop_graph_env.job_shop_graph
|
248
|
+
|
249
|
+
@property
|
250
|
+
def instance(self) -> JobShopInstance:
|
251
|
+
"""Returns the current job shop instance."""
|
252
|
+
return self.single_job_shop_graph_env.instance
|
253
|
+
|
254
|
+
def reset(
|
255
|
+
self,
|
256
|
+
*,
|
257
|
+
seed: int | None = None,
|
258
|
+
options: dict[str, Any] | None = None,
|
259
|
+
) -> tuple[ObservationDict, dict]:
|
260
|
+
"""Resets the environment and returns the initial observation.
|
261
|
+
|
262
|
+
Args:
|
263
|
+
seed: Random seed for reproducibility.
|
264
|
+
options: Additional options for reset (currently unused).
|
265
|
+
|
266
|
+
Returns:
|
267
|
+
A tuple containing:
|
268
|
+
- ObservationDict: The initial observation of the environment.
|
269
|
+
- dict: An info dictionary containing additional information about
|
270
|
+
the reset state. This may include details about the generated
|
271
|
+
instance or initial graph structure.
|
272
|
+
"""
|
273
|
+
instance = self.instance_generator.generate()
|
274
|
+
graph = self.graph_initializer(instance)
|
275
|
+
self.single_job_shop_graph_env = SingleJobShopGraphEnv(
|
276
|
+
job_shop_graph=graph,
|
277
|
+
feature_observer_configs=self.feature_observer_configs,
|
278
|
+
reward_function_config=self.reward_function_config,
|
279
|
+
ready_operations_filter=self.ready_operations_filter,
|
280
|
+
render_mode=self.render_mode,
|
281
|
+
render_config=self.render_config,
|
282
|
+
use_padding=self.single_job_shop_graph_env.use_padding,
|
283
|
+
)
|
284
|
+
obs, info = self.single_job_shop_graph_env.reset(
|
285
|
+
seed=seed, options=options
|
286
|
+
)
|
287
|
+
if self.use_padding:
|
288
|
+
obs = self._add_padding_to_observation(obs)
|
289
|
+
|
290
|
+
return obs, info
|
291
|
+
|
292
|
+
def step(
|
293
|
+
self, action: tuple[int, int]
|
294
|
+
) -> tuple[ObservationDict, float, bool, bool, dict]:
|
295
|
+
"""Takes a step in the environment.
|
296
|
+
|
297
|
+
Args:
|
298
|
+
action:
|
299
|
+
The action to take. The action is a tuple of two integers
|
300
|
+
(job_id, machine_id):
|
301
|
+
the job ID and the machine ID in which to schedule the
|
302
|
+
operation.
|
303
|
+
|
304
|
+
Returns:
|
305
|
+
A tuple containing the following elements:
|
306
|
+
- The observation of the environment.
|
307
|
+
- The reward obtained.
|
308
|
+
- Whether the environment is done.
|
309
|
+
- Whether the episode was truncated (always False).
|
310
|
+
- A dictionary with additional information. The dictionary
|
311
|
+
contains the following keys:
|
312
|
+
- "feature_names": The names of the features in the
|
313
|
+
observation.
|
314
|
+
- "available_operations": The operations that are ready to be
|
315
|
+
scheduled.
|
316
|
+
"""
|
317
|
+
obs, reward, done, truncated, info = (
|
318
|
+
self.single_job_shop_graph_env.step(action)
|
319
|
+
)
|
320
|
+
if self.use_padding:
|
321
|
+
obs = self._add_padding_to_observation(obs)
|
322
|
+
|
323
|
+
return obs, reward, done, truncated, info
|
324
|
+
|
325
|
+
def _add_padding_to_observation(
|
326
|
+
self, observation: ObservationDict
|
327
|
+
) -> ObservationDict:
|
328
|
+
"""Adds padding to the observation.
|
329
|
+
|
330
|
+
"removed_nodes":
|
331
|
+
input_shape: (num_nodes,)
|
332
|
+
output_shape: (max_num_nodes,) (padded with True)
|
333
|
+
"edge_index":
|
334
|
+
input_shape: (2, num_edges)
|
335
|
+
output_shape: (2, max_num_edges) (padded with -1)
|
336
|
+
"operations":
|
337
|
+
input_shape: (num_operations, num_features)
|
338
|
+
output_shape: (max_num_operations, num_features) (padded with -1)
|
339
|
+
"jobs":
|
340
|
+
input_shape: (num_jobs, num_features)
|
341
|
+
output_shape: (max_num_jobs, num_features) (padded with -1)
|
342
|
+
"machines":
|
343
|
+
input_shape: (num_machines, num_features)
|
344
|
+
output_shape: (max_num_machines, num_features) (padded with -1)
|
345
|
+
"""
|
346
|
+
padding_value: dict[str, float | bool] = defaultdict(lambda: -1)
|
347
|
+
padding_value[ObservationSpaceKey.REMOVED_NODES.value] = True
|
348
|
+
for key, value in observation.items():
|
349
|
+
if not isinstance(value, np.ndarray): # Make mypy happy
|
350
|
+
continue
|
351
|
+
expected_shape = self._get_output_shape(key)
|
352
|
+
observation[key] = add_padding( # type: ignore[literal-required]
|
353
|
+
value,
|
354
|
+
expected_shape,
|
355
|
+
padding_value=padding_value[key],
|
356
|
+
)
|
357
|
+
return observation
|
358
|
+
|
359
|
+
def _get_output_shape(self, key: str) -> tuple[int, ...]:
|
360
|
+
"""Returns the output shape of the observation space key."""
|
361
|
+
output_shape = self.observation_space[key].shape
|
362
|
+
assert output_shape is not None # Make mypy happy
|
363
|
+
return output_shape
|
364
|
+
|
365
|
+
def render(self) -> None:
|
366
|
+
self.single_job_shop_graph_env.render()
|
@@ -0,0 +1,85 @@
|
|
1
|
+
"""Rewards functions are defined as `DispatcherObervers` and are used to
|
2
|
+
calculate the reward for a given state."""
|
3
|
+
|
4
|
+
from job_shop_lib.dispatching import DispatcherObserver, Dispatcher
|
5
|
+
from job_shop_lib import ScheduledOperation
|
6
|
+
|
7
|
+
|
8
|
+
class RewardObserver(DispatcherObserver):
|
9
|
+
"""Base class for all reward functions.
|
10
|
+
|
11
|
+
Attributes:
|
12
|
+
rewards:
|
13
|
+
List of rewards calculated for each operation scheduled by the
|
14
|
+
dispatcher.
|
15
|
+
"""
|
16
|
+
|
17
|
+
def __init__(
|
18
|
+
self, dispatcher: Dispatcher, *, subscribe: bool = True
|
19
|
+
) -> None:
|
20
|
+
super().__init__(dispatcher, subscribe=subscribe)
|
21
|
+
self.rewards: list[float] = []
|
22
|
+
|
23
|
+
@property
|
24
|
+
def last_reward(self) -> float:
|
25
|
+
"""Returns the reward of the last step or 0 if no rewards have been
|
26
|
+
calculated."""
|
27
|
+
return self.rewards[-1] if self.rewards else 0
|
28
|
+
|
29
|
+
def reset(self) -> None:
|
30
|
+
"""Sets rewards attribute to a new empty list."""
|
31
|
+
self.rewards = []
|
32
|
+
|
33
|
+
|
34
|
+
class MakespanReward(RewardObserver):
|
35
|
+
"""Dense reward function based on the negative makespan of the schedule.
|
36
|
+
|
37
|
+
The reward is calculated as the difference between the makespan of the
|
38
|
+
schedule before and after the last operation was scheduled. The makespan
|
39
|
+
is the time at which the last operation is completed.
|
40
|
+
|
41
|
+
Attributes:
|
42
|
+
current_makespan:
|
43
|
+
Makespan of the schedule after the last operation was scheduled.
|
44
|
+
"""
|
45
|
+
|
46
|
+
def __init__(self, dispatcher: Dispatcher, *, subscribe=True) -> None:
|
47
|
+
super().__init__(dispatcher, subscribe=subscribe)
|
48
|
+
self.current_makespan = dispatcher.schedule.makespan()
|
49
|
+
|
50
|
+
def reset(self) -> None:
|
51
|
+
super().reset()
|
52
|
+
self.current_makespan = self.dispatcher.schedule.makespan()
|
53
|
+
|
54
|
+
def update(self, scheduled_operation: ScheduledOperation):
|
55
|
+
last_makespan = self.current_makespan
|
56
|
+
self.current_makespan = max(
|
57
|
+
last_makespan, scheduled_operation.end_time
|
58
|
+
)
|
59
|
+
reward = last_makespan - self.current_makespan
|
60
|
+
self.rewards.append(reward)
|
61
|
+
|
62
|
+
|
63
|
+
class IdleTimeReward(RewardObserver):
|
64
|
+
"""Dense reward function based on the negative idle time of the schedule.
|
65
|
+
|
66
|
+
The reward is calculated as the difference between the idle time of the
|
67
|
+
schedule before and after the last operation was scheduled. The idle time
|
68
|
+
is the sum of the time between the end of the last operation and the start
|
69
|
+
of the next operation.
|
70
|
+
"""
|
71
|
+
|
72
|
+
def update(self, scheduled_operation: ScheduledOperation):
|
73
|
+
machine_id = scheduled_operation.machine_id
|
74
|
+
machine_schedule = self.dispatcher.schedule.schedule[machine_id][:-1]
|
75
|
+
|
76
|
+
if machine_schedule:
|
77
|
+
last_operation = machine_schedule[-1]
|
78
|
+
idle_time = (
|
79
|
+
scheduled_operation.start_time - last_operation.end_time
|
80
|
+
)
|
81
|
+
else:
|
82
|
+
idle_time = scheduled_operation.start_time
|
83
|
+
|
84
|
+
reward = -idle_time
|
85
|
+
self.rewards.append(reward)
|