PyPI - job-shop-lib - Versions diffs - 0.5.1__py3-none-any.whl → 1.0.0a1__py3-none-any.whl - Mend

job-shop-lib 0.5.1py3-none-any.whl → 1.0.0a1py3-none-any.whl

Files changed (76) hide show

job_shop_lib/reinforcement_learning/_multi_job_shop_graph_env.py ADDED Viewed

@@ -0,0 +1,366 @@
+"""Home of the `GraphEnvironment` class."""
+from collections import defaultdict
+from collections.abc import Callable, Sequence
+from typing import Any
+from copy import deepcopy
+import gymnasium as gym
+import numpy as np
+from job_shop_lib import JobShopInstance, Operation
+from job_shop_lib.dispatching import (
+    Dispatcher,
+    filter_dominated_operations,
+    DispatcherObserverConfig,
+)
+from job_shop_lib.dispatching.feature_observers import FeatureObserverConfig
+from job_shop_lib.generation import InstanceGenerator
+from job_shop_lib.graphs import JobShopGraph, build_agent_task_graph
+from job_shop_lib.graphs.graph_updaters import (
+    GraphUpdater,
+    ResidualGraphUpdater,
+)
+from job_shop_lib.reinforcement_learning import (
+    SingleJobShopGraphEnv,
+    RewardObserver,
+    RenderConfig,
+    MakespanReward,
+    ObservationDict,
+    ObservationSpaceKey,
+    add_padding,
+)
+class MultiJobShopGraphEnv(gym.Env):
+    """Gymnasium environment for solving multiple Job Shop Scheduling Problems
+    using reinforcement learning and Graph Neural Networks.
+    This environment generates a new Job Shop Scheduling Problem instance
+    for each reset, creates a graph representation, and manages the scheduling
+    process using a :class:`~job_shop_lib.dispatching.Dispatcher`.
+    The observation space includes:
+        - removed_nodes: Binary vector indicating removed nodes.
+        - edge_index: Edge list in COO format.
+        - operations: Matrix of operation features.
+        - jobs: Matrix of job features (if applicable).
+        - machines: Matrix of machine features (if applicable).
+    Internally, the class creates a
+    :class:`~job_shop_lib.reinforcement_learning.SingleJobShopGraphEnv`
+    environment to manage the scheduling process for each
+    :class:`~job_shop_lib.JobShopInstance`.
+    Attributes:
+        instance_generator:
+            A :class:`~job_shop_lib.generation.InstanceGenerator` that
+            generates a new problem instance on each reset.
+        action_space:
+            :class:`gymnasium.spaces.Discrete`) action space with size equal to
+            the maximum number of jobs.
+        observation_space:
+            Dictionary of observation spaces. Keys are defined in
+            :class:`~job_shop_lib.reinforcement_learning.ObservationSpaceKey`.
+        single_job_shop_graph_env:
+            Environment for a specific Job Shop Scheduling Problem instance.
+            See :class:`SingleJobShopGraphEnv`.
+        graph_initializer:
+            Function to create the initial graph representation. It should
+            take a :class:`~job_shop_lib.JobShopInstance` as input and return
+            a :class:`~job_shop_lib.graphs.JobShopGraph`.
+        render_mode:
+            Rendering mode for visualization. Supported modes are:
+            - human: Renders the current Gannt chart.
+            - save_video: Saves a video of the Gantt chart. Used only if the
+              schedule is completed.
+            - save_gif: Saves a GIF of the Gantt chart. Used only if the
+              schedule is completed.
+        render_config:
+            Configuration for rendering. See
+            :class:`~job_shop_lib.RenderConfig`.
+        feature_observer_configs:
+            List of :class:`~job_shop_lib.dispatching.DispatcherObserverConfig`
+            for feature observers.
+        reward_function_config:
+            Configuration for the reward function. See
+            :class:`~job_shop_lib.dispatching.DispatcherObserverConfig` and
+            :class:`~job_shop_lib.dispatching.RewardObserver`.
+        graph_updater_config:
+            Configuration for the graph updater. The graph updater is used to
+            update the graph representation after each action. See
+            :class:`~job_shop_lib.dispatching.DispatcherObserverConfig` and
+            :class:`~job_shop_lib.graphs.GraphUpdater`.
+    """
+    def __init__(
+        self,
+        instance_generator: InstanceGenerator,
+        feature_observer_configs: Sequence[FeatureObserverConfig],
+        graph_initializer: Callable[
+            [JobShopInstance], JobShopGraph
+        ] = build_agent_task_graph,
+        graph_updater_config: DispatcherObserverConfig[
+            type[GraphUpdater]
+        ] = DispatcherObserverConfig(class_type=ResidualGraphUpdater),
+        ready_operations_filter: Callable[
+            [Dispatcher, list[Operation]], list[Operation]
+        ] = filter_dominated_operations,
+        reward_function_config: DispatcherObserverConfig[
+            type[RewardObserver]
+        ] = DispatcherObserverConfig(class_type=MakespanReward),
+        render_mode: str | None = None,
+        render_config: RenderConfig | None = None,
+        use_padding: bool = True,
+    ) -> None:
+        """Initializes the environment.
+        Args:
+            instance_generator:
+                A :class:`~job_shop_lib.generation.InstanceGenerator` that
+                generates a new problem instance on each reset.
+            feature_observer_configs:
+                Configurations for feature observers. Each configuration
+                should be a
+                :class:`~job_shop_lib.dispatching.DispatcherObserverConfig`
+                with a class type that inherits from
+                :class:`~job_shop_lib.dispatching.FeatureObserver` or a string
+                or enum that represents a built-in feature observer.
+            graph_initializer:
+                Function to create the initial graph representation.
+                If ``None``, the default graph initializer is used:
+                :func:`~job_shop_lib.graphs.build_agent_task_graph`.
+            graph_updater_config:
+                Configuration for the graph updater. The graph updater is used
+                to update the graph representation after each action. If
+                ``None``, the default graph updater is used:
+                :class:`~job_shop_lib.graphs.ResidualGraphUpdater`.
+            ready_operations_filter:
+                Function to filter ready operations. If ``None``, the default
+                filter is used:
+                :func:`~job_shop_lib.dispatching.filter_dominated_operations`.
+            reward_function_config:
+                Configuration for the reward function. If ``None``, the default
+                reward function is used:
+                :class:`~job_shop_lib.dispatching.MakespanReward`.
+            render_mode:
+                Rendering mode for visualization. Supported modes are:
+                - human: Renders the current Gannt chart.
+                - save_video: Saves a video of the Gantt chart. Used only if
+                  the schedule is completed.
+                - save_gif: Saves a GIF of the Gantt chart. Used only if the
+                  schedule is completed.
+            render_config:
+                Configuration for rendering. See
+                :class:`~job_shop_lib.RenderConfig`.
+            use_padding:
+                Whether to use padding in observations. If True, all matrices
+                are padded to fixed sizes based on the maximum instance size.
+                Values are padded with -1, except for the "removed_nodes" key,
+                which is padded with ``True``, indicating that the node is
+                removed.
+        """
+        super().__init__()
+        # Create an instance with the maximum size
+        instance_with_max_size = instance_generator.generate(
+            num_jobs=instance_generator.max_num_jobs,
+            num_machines=instance_generator.max_num_machines,
+        )
+        graph = graph_initializer(instance_with_max_size)
+        self.single_job_shop_graph_env = SingleJobShopGraphEnv(
+            job_shop_graph=graph,
+            feature_observer_configs=feature_observer_configs,
+            reward_function_config=reward_function_config,
+            graph_updater_config=graph_updater_config,
+            ready_operations_filter=ready_operations_filter,
+            render_mode=render_mode,
+            render_config=render_config,
+            use_padding=use_padding,
+        )
+        self.instance_generator = instance_generator
+        self.graph_initializer = graph_initializer
+        self.render_mode = render_mode
+        self.render_config = render_config
+        self.feature_observer_configs = feature_observer_configs
+        self.reward_function_config = reward_function_config
+        self.graph_updater_config = graph_updater_config
+        self.action_space = deepcopy(
+            self.single_job_shop_graph_env.action_space
+        )
+        self.observation_space: gym.spaces.Dict = deepcopy(
+            self.single_job_shop_graph_env.observation_space
+        )
+    @property
+    def dispatcher(self) -> Dispatcher:
+        """Returns the current dispatcher instance."""
+        return self.single_job_shop_graph_env.dispatcher
+    @property
+    def reward_function(self) -> RewardObserver:
+        """Returns the current reward function instance."""
+        return self.single_job_shop_graph_env.reward_function
+    @reward_function.setter
+    def reward_function(self, reward_function: RewardObserver) -> None:
+        """Sets the reward function instance."""
+        self.single_job_shop_graph_env.reward_function = reward_function
+    @property
+    def ready_operations_filter(
+        self,
+    ) -> Callable[[Dispatcher, list[Operation]], list[Operation]] | None:
+        """Returns the current ready operations filter."""
+        return (
+            self.single_job_shop_graph_env.dispatcher.ready_operations_filter
+        )
+    @ready_operations_filter.setter
+    def ready_operations_filter(
+        self,
+        pruning_function: Callable[
+            [Dispatcher, list[Operation]], list[Operation]
+        ],
+    ) -> None:
+        """Sets the ready operations filter."""
+        self.single_job_shop_graph_env.dispatcher.ready_operations_filter = (
+            pruning_function
+        )
+    @property
+    def use_padding(self) -> bool:
+        """Returns whether the padding is used."""
+        return self.single_job_shop_graph_env.use_padding
+    @use_padding.setter
+    def use_padding(self, use_padding: bool) -> None:
+        """Sets whether the padding is used."""
+        self.single_job_shop_graph_env.use_padding = use_padding
+    @property
+    def job_shop_graph(self) -> JobShopGraph:
+        """Returns the current job shop graph."""
+        return self.single_job_shop_graph_env.job_shop_graph
+    @property
+    def instance(self) -> JobShopInstance:
+        """Returns the current job shop instance."""
+        return self.single_job_shop_graph_env.instance
+    def reset(
+        self,
+        *,
+        seed: int | None = None,
+        options: dict[str, Any] | None = None,
+    ) -> tuple[ObservationDict, dict]:
+        """Resets the environment and returns the initial observation.
+        Args:
+            seed: Random seed for reproducibility.
+            options: Additional options for reset (currently unused).
+        Returns:
+            A tuple containing:
+            - ObservationDict: The initial observation of the environment.
+            - dict: An info dictionary containing additional information about
+              the reset state. This may include details about the generated
+              instance or initial graph structure.
+        """
+        instance = self.instance_generator.generate()
+        graph = self.graph_initializer(instance)
+        self.single_job_shop_graph_env = SingleJobShopGraphEnv(
+            job_shop_graph=graph,
+            feature_observer_configs=self.feature_observer_configs,
+            reward_function_config=self.reward_function_config,
+            ready_operations_filter=self.ready_operations_filter,
+            render_mode=self.render_mode,
+            render_config=self.render_config,
+            use_padding=self.single_job_shop_graph_env.use_padding,
+        )
+        obs, info = self.single_job_shop_graph_env.reset(
+            seed=seed, options=options
+        )
+        if self.use_padding:
+            obs = self._add_padding_to_observation(obs)
+        return obs, info
+    def step(
+        self, action: tuple[int, int]
+    ) -> tuple[ObservationDict, float, bool, bool, dict]:
+        """Takes a step in the environment.
+        Args:
+            action:
+                The action to take. The action is a tuple of two integers
+                (job_id, machine_id):
+                the job ID and the machine ID in which to schedule the
+                operation.
+        Returns:
+            A tuple containing the following elements:
+            - The observation of the environment.
+            - The reward obtained.
+            - Whether the environment is done.
+            - Whether the episode was truncated (always False).
+            - A dictionary with additional information. The dictionary
+              contains the following keys:
+                - "feature_names": The names of the features in the
+                  observation.
+                - "available_operations": The operations that are ready to be
+                  scheduled.
+        """
+        obs, reward, done, truncated, info = (
+            self.single_job_shop_graph_env.step(action)
+        )
+        if self.use_padding:
+            obs = self._add_padding_to_observation(obs)
+        return obs, reward, done, truncated, info
+    def _add_padding_to_observation(
+        self, observation: ObservationDict
+    ) -> ObservationDict:
+        """Adds padding to the observation.
+        "removed_nodes":
+            input_shape: (num_nodes,)
+            output_shape: (max_num_nodes,) (padded with True)
+        "edge_index":
+            input_shape: (2, num_edges)
+            output_shape: (2, max_num_edges) (padded with -1)
+        "operations":
+            input_shape: (num_operations, num_features)
+            output_shape: (max_num_operations, num_features) (padded with -1)
+        "jobs":
+            input_shape: (num_jobs, num_features)
+            output_shape: (max_num_jobs, num_features) (padded with -1)
+        "machines":
+            input_shape: (num_machines, num_features)
+            output_shape: (max_num_machines, num_features) (padded with -1)
+        """
+        padding_value: dict[str, float | bool] = defaultdict(lambda: -1)
+        padding_value[ObservationSpaceKey.REMOVED_NODES.value] = True
+        for key, value in observation.items():
+            if not isinstance(value, np.ndarray):  # Make mypy happy
+                continue
+            expected_shape = self._get_output_shape(key)
+            observation[key] = add_padding(  # type: ignore[literal-required]
+                value,
+                expected_shape,
+                padding_value=padding_value[key],
+            )
+        return observation
+    def _get_output_shape(self, key: str) -> tuple[int, ...]:
+        """Returns the output shape of the observation space key."""
+        output_shape = self.observation_space[key].shape
+        assert output_shape is not None  # Make mypy happy
+        return output_shape
+    def render(self) -> None:
+        self.single_job_shop_graph_env.render()

job_shop_lib/reinforcement_learning/_reward_observers.py ADDED Viewed

@@ -0,0 +1,85 @@
+"""Rewards functions are defined as `DispatcherObervers` and are used to
+calculate the reward for a given state."""
+from job_shop_lib.dispatching import DispatcherObserver, Dispatcher
+from job_shop_lib import ScheduledOperation
+class RewardObserver(DispatcherObserver):
+    """Base class for all reward functions.
+    Attributes:
+        rewards:
+            List of rewards calculated for each operation scheduled by the
+            dispatcher.
+    """
+    def __init__(
+        self, dispatcher: Dispatcher, *, subscribe: bool = True
+    ) -> None:
+        super().__init__(dispatcher, subscribe=subscribe)
+        self.rewards: list[float] = []
+    @property
+    def last_reward(self) -> float:
+        """Returns the reward of the last step or 0 if no rewards have been
+        calculated."""
+        return self.rewards[-1] if self.rewards else 0
+    def reset(self) -> None:
+        """Sets rewards attribute to a new empty list."""
+        self.rewards = []
+class MakespanReward(RewardObserver):
+    """Dense reward function based on the negative makespan of the schedule.
+    The reward is calculated as the difference between the makespan of the
+    schedule before and after the last operation was scheduled. The makespan
+    is the time at which the last operation is completed.
+    Attributes:
+        current_makespan:
+            Makespan of the schedule after the last operation was scheduled.
+    """
+    def __init__(self, dispatcher: Dispatcher, *, subscribe=True) -> None:
+        super().__init__(dispatcher, subscribe=subscribe)
+        self.current_makespan = dispatcher.schedule.makespan()
+    def reset(self) -> None:
+        super().reset()
+        self.current_makespan = self.dispatcher.schedule.makespan()
+    def update(self, scheduled_operation: ScheduledOperation):
+        last_makespan = self.current_makespan
+        self.current_makespan = max(
+            last_makespan, scheduled_operation.end_time
+        )
+        reward = last_makespan - self.current_makespan
+        self.rewards.append(reward)
+class IdleTimeReward(RewardObserver):
+    """Dense reward function based on the negative idle time of the schedule.
+    The reward is calculated as the difference between the idle time of the
+    schedule before and after the last operation was scheduled. The idle time
+    is the sum of the time between the end of the last operation and the start
+    of the next operation.
+    """
+    def update(self, scheduled_operation: ScheduledOperation):
+        machine_id = scheduled_operation.machine_id
+        machine_schedule = self.dispatcher.schedule.schedule[machine_id][:-1]
+        if machine_schedule:
+            last_operation = machine_schedule[-1]
+            idle_time = (
+                scheduled_operation.start_time - last_operation.end_time
+            )
+        else:
+            idle_time = scheduled_operation.start_time
+        reward = -idle_time
+        self.rewards.append(reward)

job-shop-lib 0.5.1__py3-none-any.whl → 1.0.0a1__py3-none-any.whl

job-shop-lib 0.5.1py3-none-any.whl → 1.0.0a1py3-none-any.whl