server-simulator 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- server_simulator-0.1.0.dist-info/METADATA +115 -0
- server_simulator-0.1.0.dist-info/RECORD +49 -0
- server_simulator-0.1.0.dist-info/WHEEL +4 -0
- server_simulator-0.1.0.dist-info/licenses/LICENSE +21 -0
- src/__init__.py +0 -0
- src/envs/__init__.py +75 -0
- src/envs/cluster_simulator/__init__.py +0 -0
- src/envs/cluster_simulator/actions.py +55 -0
- src/envs/cluster_simulator/base/__init__.py +0 -0
- src/envs/cluster_simulator/base/extractors/__init__.py +0 -0
- src/envs/cluster_simulator/base/extractors/information.py +26 -0
- src/envs/cluster_simulator/base/extractors/observation.py +41 -0
- src/envs/cluster_simulator/base/extractors/reward.py +29 -0
- src/envs/cluster_simulator/base/internal/cluster.py +137 -0
- src/envs/cluster_simulator/base/internal/dilation.py +189 -0
- src/envs/cluster_simulator/base/internal/job.py +63 -0
- src/envs/cluster_simulator/base/internal/machine.py +33 -0
- src/envs/cluster_simulator/base/renderer.py +34 -0
- src/envs/cluster_simulator/basic.py +74 -0
- src/envs/cluster_simulator/deep_rm/__init__.py +164 -0
- src/envs/cluster_simulator/deep_rm/creator.py +48 -0
- src/envs/cluster_simulator/deep_rm/internal/custom_type.py +21 -0
- src/envs/cluster_simulator/deep_rm/internal/jobs.py +77 -0
- src/envs/cluster_simulator/deep_rm/internal/machines.py +56 -0
- src/envs/cluster_simulator/deep_rm/observation.py +79 -0
- src/envs/cluster_simulator/metric_based/__init__.py +155 -0
- src/envs/cluster_simulator/metric_based/creator.py +52 -0
- src/envs/cluster_simulator/metric_based/internal/custom_type.py +20 -0
- src/envs/cluster_simulator/metric_based/internal/dilation.py +83 -0
- src/envs/cluster_simulator/metric_based/internal/jobs.py +80 -0
- src/envs/cluster_simulator/metric_based/internal/machines.py +56 -0
- src/envs/cluster_simulator/metric_based/observation.py +80 -0
- src/envs/cluster_simulator/metric_based/renderer.py +541 -0
- src/envs/cluster_simulator/single_slot/__init__.py +84 -0
- src/envs/cluster_simulator/single_slot/creator.py +47 -0
- src/envs/cluster_simulator/single_slot/internal/jobs.py +50 -0
- src/envs/cluster_simulator/single_slot/internal/machines.py +49 -0
- src/envs/cluster_simulator/single_slot/observation.py +68 -0
- src/envs/cluster_simulator/utils/__init__.py +0 -0
- src/envs/cluster_simulator/utils/array_operations.py +197 -0
- src/scheduler/__init__.py +7 -0
- src/scheduler/base_scheduler.py +38 -0
- src/scheduler/first_come_first_served_scheduler.py +36 -0
- src/scheduler/random_scheduler.py +41 -0
- src/scheduler/round_robin_scheduler.py +51 -0
- src/scheduler/shortest_job_first_scheduler.py +47 -0
- src/wrappers/cluster_simulator/__init__.py +0 -0
- src/wrappers/cluster_simulator/dilation_wrapper.py +150 -0
- src/wrappers/cluster_simulator/render_wrapper.py +60 -0
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: server-simulator
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Add your description here
|
|
5
|
+
License-File: LICENSE
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Requires-Dist: gymnasium>=1.2.1
|
|
8
|
+
Requires-Dist: numpy>=2.2.6
|
|
9
|
+
Requires-Dist: pygame>=2.6.1
|
|
10
|
+
Requires-Dist: rust-enum>=1.1.5
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
|
|
13
|
+

|
|
14
|
+
[](https://codecov.io/gh/dev0Guy/server-simulator)
|
|
15
|
+

|
|
16
|
+

|
|
17
|
+
[](https://www.python.org/dev/peps/pep-0008/)
|
|
18
|
+
|
|
19
|
+
Base Status
|
|
20
|
+
-----------
|
|
21
|
+
We've created 3 Environment for scheduling, which implement the `ClusterABC` with `JobCollection` and `MachineCollection` protocols:
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
### SingleSlot Environment
|
|
25
|
+
|
|
26
|
+
| Component | Details |
|
|
27
|
+
|----------|---------|
|
|
28
|
+
| **Description** | Minimal scheduling environment with no temporal memory; each machine and job exposes only its current resource usage. |
|
|
29
|
+
| **State: Machines** | Shape: **[resource_usage]** — a single-cell array indicating current resource utilization (value between 0–1). |
|
|
30
|
+
| **State: Jobs** | Shape: **[resource_usage]** — a single-cell array indicating the current resource requirement (value between 0–1). |
|
|
31
|
+
| **Actions** | Discrete integer: **0 … (num_machines × num_jobs) + 1**. |
|
|
32
|
+
| **Action 0** | Interrupt / time tick (move to next timestamp). |
|
|
33
|
+
| **Action 1+** | Scheduling decision mapping to a specific **(machine, job)** pair. |
|
|
34
|
+
|
|
35
|
+
### DeepRM Environment
|
|
36
|
+
|
|
37
|
+
| Component | Details |
|
|
38
|
+
|----------|---------|
|
|
39
|
+
| **Description** | Environment inspired by the DeepRM scheduling model, representing fine-grained resource units across time. |
|
|
40
|
+
| **State: Machines** | Tensor shape: **[num_machines, num_resources, num_resource_cells, num_ticks]**. Each cell represents whether resource unit **r_unit** of resource **r** is occupied at time **t**. |
|
|
41
|
+
| **State: Jobs** | Tensor shape: **[num_jobs, num_resources, num_resource_cells, num_ticks]**. Each cell represents whether the job requires resource unit **r_unit** of resource **r** at time **t**. |
|
|
42
|
+
| **Actions** | Discrete integer: **0 … (num_machines × num_jobs) + 1**. |
|
|
43
|
+
| **Action 0** | Interrupt / time tick (advance to next timestamp). |
|
|
44
|
+
| **Action 1+** | Scheduling decision mapping to a specific **(machine, job)** pair. |
|
|
45
|
+
|
|
46
|
+
### MetricBased Environment
|
|
47
|
+
|
|
48
|
+
| Component | Details |
|
|
49
|
+
|----------|---------|
|
|
50
|
+
| **Description** | Scheduling environment that models resource usage of machines and jobs over time. |
|
|
51
|
+
| **State: Machines** | Tensor shape: **[num_machines, n_resources, n_ticks]**. Each value ∈ **[0–1]** representing machine resource usage for resource **r** at time **t**. |
|
|
52
|
+
| **State: Jobs** | Tensor shape: **[num_jobs, n_resources, n_ticks]**. Each value ∈ **[0–1]** representing job resource demand for resource **r** at time **t**. |
|
|
53
|
+
| **Actions** | Discrete integer: **0 … (num_machines × num_jobs) + 1**. |
|
|
54
|
+
| **Action 0** | Represents an interrupt or a time tick (advance to next timestamp). |
|
|
55
|
+
| **Action 1+** | Represents assigning job **j** to machine **m** (encoded according to environment mapping). |
|
|
56
|
+
|
|
57
|
+
<br>
|
|
58
|
+
|
|
59
|
+
**Dilation Operations:** assuming kernel size (in each dimension) is bigger than 1. By padding the input according to max zoom in possible the service can work for varying kernel sizes.
|
|
60
|
+
In addition, assume that use (in our case the DRL agent) can execute 3 operation with zoomingIn (going up one level -1) or zoomingOut (going out one level +1) or skipping to next timestamp,
|
|
61
|
+
notice that without executing real scheduling (when in last level and select a machine) can't stop and skip.
|
|
62
|
+
Our algorithm represent the state as ndarray of `shape` to `[kernel_x, kernel_y, *shape[1:]]`. <br>
|
|
63
|
+
|
|
64
|
+
**Reward Functions:** for now the only reward function is +1 for scheduling job and changing the job status from pending into running.
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
Change Log
|
|
68
|
+
-----
|
|
69
|
+
EMPTY
|
|
70
|
+
|
|
71
|
+
Tasks:
|
|
72
|
+
-----
|
|
73
|
+
|
|
74
|
+
#### Important:
|
|
75
|
+
|
|
76
|
+
- [-] Create Dilation Gym Environment Wrapper `DilationWrapper`.
|
|
77
|
+
- [ ] Test `DilationWrapper`.
|
|
78
|
+
- [ ] Implement Render technics to represent and visualize cluster result
|
|
79
|
+
- [ ] Implement different reward Wrapper:
|
|
80
|
+
- [ ] Need to decide which one ??
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
#### Side Note:
|
|
84
|
+
|
|
85
|
+
- [ ] Create Dilation for DeepRM
|
|
86
|
+
- [ ] Create Tests for DeepRM
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
Finished Tasks:
|
|
90
|
+
-----
|
|
91
|
+
- [X] Implement global machine Protocol (`Machine`,`MachineCollection`).
|
|
92
|
+
- [X] Implement global job Protocol (`Job`,`JobCollection`).
|
|
93
|
+
- [X] Implement abstract cluster using these protocols, for Cross-functional (`ClusterABC`), where each subclass implement the creation of jobs and machines.
|
|
94
|
+
- [X] Test `ClusterABC` (property based).
|
|
95
|
+
- [X] Implement Single slot cluster using the abstract class (`SingleSlotCluster`).
|
|
96
|
+
- [X] Test `SingleSlotCluster` (property based).
|
|
97
|
+
- [X] Implement DeepRM cluster using the abstract class (`DeepRMCluster`).
|
|
98
|
+
- [X] Test `DeepRMCluster` (property based).
|
|
99
|
+
- [X] Implement MetricBased cluster using the abstract class (`MetricCluster`).
|
|
100
|
+
- [X] Test `MetricCluster` (property based).
|
|
101
|
+
- [X] Implement Gym Environment that get cluster as dependency `BasicClusterEnv`.
|
|
102
|
+
- [X] Test `BasicClusterEnv` (property based) using random scheduler (`RandomScheduler`).
|
|
103
|
+
- [X] Implement Dilation Protocol `DilationProtocol`.
|
|
104
|
+
- [X] Implement Dilation numpy functionality (`hierarchical_pooling`, `get_window_from_cell`, etc..) `array_operation.py`.
|
|
105
|
+
- [X] Test Dilation numpy functionality.
|
|
106
|
+
- [X] Implement Dilation Service for Metric based cluster `MetricBasedDilator`.
|
|
107
|
+
- [X] Test Dilation `MetricBasedDilator`.
|
|
108
|
+
|
|
109
|
+
## Assumption
|
|
110
|
+
- Dilation assume that cluster state is bigger than dilation & the kernel has no 1 in each of its diminution
|
|
111
|
+
- For each Step which is not real allocation reward is set to 0
|
|
112
|
+
- On each job has reward of 1 if change status to running
|
|
113
|
+
- Dilation is operating by taking [n_machine, n_resource, n_ticks] and
|
|
114
|
+
padding to perpetrate size of [max_x_kernl, max_y_kernel, n_resources, n_ticks]
|
|
115
|
+
- Dilation implement both zoom in and zoom out when arriving to level 0 will cause real scheduling
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
src/envs/__init__.py,sha256=cTNZm1WxT_V7ZMq2llkY8QMSoW6kFVS4k6YOSZ6W6Mo,1909
|
|
3
|
+
src/envs/cluster_simulator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
src/envs/cluster_simulator/actions.py,sha256=a89vmxnhPym7HND2JUAtoxQb3GqJ3z-ejgoAO-ZtInc,1615
|
|
5
|
+
src/envs/cluster_simulator/basic.py,sha256=2ZgNEoWg7a-yc2VlPsX2n4fTdBq1L7GPWsJZUJXtp6k,2831
|
|
6
|
+
src/envs/cluster_simulator/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
+
src/envs/cluster_simulator/base/renderer.py,sha256=rY81B6390BTS3AHoWWZsC1_Hs0zMUegjKFdkxC0E3AI,1016
|
|
8
|
+
src/envs/cluster_simulator/base/extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
+
src/envs/cluster_simulator/base/extractors/information.py,sha256=MEtlsiakBnSVsaFzWI3WqcIGA6sY1aUS9xWcF09E5x0,807
|
|
10
|
+
src/envs/cluster_simulator/base/extractors/observation.py,sha256=JFfMaS-n6XAFyV51RVCVIgbssC7EUID4rdta2WQ4Gjw,1580
|
|
11
|
+
src/envs/cluster_simulator/base/extractors/reward.py,sha256=fL_qVtTV7A2xk2UHgcjQUzRpHmUwXC23DBCOD2RvJzs,1011
|
|
12
|
+
src/envs/cluster_simulator/base/internal/cluster.py,sha256=Da2vkCSR6RyBj3_eAKcg93eQt5DCtPmgdBAVAhH8IlM,4636
|
|
13
|
+
src/envs/cluster_simulator/base/internal/dilation.py,sha256=350iSUZpHHq7t4AAJ9qLScCtUwYGI-RFC-iUgHQ6ZjA,6598
|
|
14
|
+
src/envs/cluster_simulator/base/internal/job.py,sha256=WkN8zWnRlcKaX1xh_63IDEbW8djKrCNpq8vmcXauAk8,1644
|
|
15
|
+
src/envs/cluster_simulator/base/internal/machine.py,sha256=tzE-RFetCNP1OQU5XU46VU3biiuW_nqYwBVpam2E8xM,796
|
|
16
|
+
src/envs/cluster_simulator/deep_rm/__init__.py,sha256=UD9oY-Lkw0i9dn21A1A6IZFhxIUJ8Y8gLBjEcHIM2XQ,5495
|
|
17
|
+
src/envs/cluster_simulator/deep_rm/creator.py,sha256=jwBCaEWMLazyeYI5WU5bXKbXHVqvCID-WA74ChGZv5g,1725
|
|
18
|
+
src/envs/cluster_simulator/deep_rm/observation.py,sha256=uahjW4_sq5u7U0gZNglsjGCm727HAGi2Rm0aGpNuNr4,2741
|
|
19
|
+
src/envs/cluster_simulator/deep_rm/internal/custom_type.py,sha256=kJjdmO2JQnxTWJks0ydPW2LOGqClF8AyL2JTorHK720,808
|
|
20
|
+
src/envs/cluster_simulator/deep_rm/internal/jobs.py,sha256=bvXwh6I5nt3CBTc_QlESLZQPTt_PMUJphvDrBSExcxQ,2568
|
|
21
|
+
src/envs/cluster_simulator/deep_rm/internal/machines.py,sha256=QSsijFY2VQSQG5kzAjwv86Ix5vq42F_e3fbAxWWC5y8,1848
|
|
22
|
+
src/envs/cluster_simulator/metric_based/__init__.py,sha256=ReKV6znBOlAj8_74uyPhjtzTmOap2jjitqsPwf4-w6w,5619
|
|
23
|
+
src/envs/cluster_simulator/metric_based/creator.py,sha256=DsqXbEztbfNeA94kYKirh-Z-sbk0CvBH_y68xaZofKw,1791
|
|
24
|
+
src/envs/cluster_simulator/metric_based/observation.py,sha256=zMT65hfrj8a8V1cmWD3h3f6RptzueDiWfwW_Ez-0fhE,2869
|
|
25
|
+
src/envs/cluster_simulator/metric_based/renderer.py,sha256=aQVIVPKWDZmFM2YrgW6PuSND9LA_eEg9qno_XGoMIG8,18567
|
|
26
|
+
src/envs/cluster_simulator/metric_based/internal/custom_type.py,sha256=RpjLnZiBvuooYuEfOeZoH1Z0qE7B1r8LwwC-ZpyVjQc,719
|
|
27
|
+
src/envs/cluster_simulator/metric_based/internal/dilation.py,sha256=l5E1aNkRjpoSFTpbzM_e7IdPo-L2J0e-Qg8ju0WmgfA,2692
|
|
28
|
+
src/envs/cluster_simulator/metric_based/internal/jobs.py,sha256=AoYMEsoXbXfy0JAP6Ln6FjlxOk9iEgfhmEVLS1Wq6Zo,2579
|
|
29
|
+
src/envs/cluster_simulator/metric_based/internal/machines.py,sha256=NvtU109MUwMsO88cSaP42uCwzIAMovyJvCZYgsBaaqo,1825
|
|
30
|
+
src/envs/cluster_simulator/single_slot/__init__.py,sha256=wcgpd8nDvD6Go4K-B1gUXd103l408qKFTr5wAcnSnR4,2918
|
|
31
|
+
src/envs/cluster_simulator/single_slot/creator.py,sha256=9gZSTM40FonjABr5jDYSBFPc1j7JK9Sf9C403HKsghM,1551
|
|
32
|
+
src/envs/cluster_simulator/single_slot/observation.py,sha256=WGAq6WXy_m81gIDZT4Nj1Eo0p1cfxRqLqdewhkP6sp4,2475
|
|
33
|
+
src/envs/cluster_simulator/single_slot/internal/jobs.py,sha256=O5synsGDznVZsTiGYQJqxkH2NcLAuo7IpFUQ7D32QJk,1385
|
|
34
|
+
src/envs/cluster_simulator/single_slot/internal/machines.py,sha256=OwDwKXYRJcxAPvUIy9qRM-NGzQpVCk4_Uvi9YqAbIBg,1404
|
|
35
|
+
src/envs/cluster_simulator/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
36
|
+
src/envs/cluster_simulator/utils/array_operations.py,sha256=ozytluo1s6mX56yRngvvVcbto0D6XY43yAPn9F3lh5I,5553
|
|
37
|
+
src/scheduler/__init__.py,sha256=nLNJeGpkX0f99MZv7U-9pmO2ABru1crlsAvi8HJYtPM,297
|
|
38
|
+
src/scheduler/base_scheduler.py,sha256=gMjSEegVuQoNi_nQB9Bb3p1R2sfQvC6y-M7jWF3mMhM,1192
|
|
39
|
+
src/scheduler/first_come_first_served_scheduler.py,sha256=13hfdjgHnGRnN3bVUiw5Rwo8ANcFBR4aBAOSY2Dkm98,1134
|
|
40
|
+
src/scheduler/random_scheduler.py,sha256=vdDVmGB_r_9BcKibMe-Hpz-T4VBd2ckx3AYxvp7PjfU,1358
|
|
41
|
+
src/scheduler/round_robin_scheduler.py,sha256=CSaknCXC6E6d7dqMCXN8Gn6PzlUBzodrgcIMEnMQ_kc,1755
|
|
42
|
+
src/scheduler/shortest_job_first_scheduler.py,sha256=6JQ7goW5TXOclZKnFADZlOLJbB7o3oN0lXvJWFGdDgk,1542
|
|
43
|
+
src/wrappers/cluster_simulator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
44
|
+
src/wrappers/cluster_simulator/dilation_wrapper.py,sha256=559T_h5lVsbCsRjX56F5ydyE4ra3-QMHAtriAE9JRSU,5765
|
|
45
|
+
src/wrappers/cluster_simulator/render_wrapper.py,sha256=VeM70jR6SJ4nMyrLjU72754YgVmhd2l_VCYecOVQmFE,2288
|
|
46
|
+
server_simulator-0.1.0.dist-info/METADATA,sha256=jFYmB2wwPoKls7MrG_whatfrNNbM6loLnQfi_hCz52s,6212
|
|
47
|
+
server_simulator-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
48
|
+
server_simulator-0.1.0.dist-info/licenses/LICENSE,sha256=PCKzzTExyQOa6tToNr00723ehmJu4YuZG8E5p4Z-0m8,1064
|
|
49
|
+
server_simulator-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 dev0Guy
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
src/__init__.py
ADDED
|
File without changes
|
src/envs/__init__.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
from src.envs.cluster_simulator.base.extractors.reward import (
|
|
2
|
+
DifferentInPendingJobsRewardCaculator,
|
|
3
|
+
)
|
|
4
|
+
from src.envs.cluster_simulator.basic import BasicClusterEnv as BasicClusterEnv
|
|
5
|
+
from gymnasium import register
|
|
6
|
+
|
|
7
|
+
from src.envs.cluster_simulator.deep_rm.creator import (
|
|
8
|
+
DeepRMEnvCreator,
|
|
9
|
+
DeepRMCreatorParameters,
|
|
10
|
+
)
|
|
11
|
+
from src.envs.cluster_simulator.metric_based.creator import (
|
|
12
|
+
MetricBasedEnvCreator,
|
|
13
|
+
MetricBasedCreatorParameters,
|
|
14
|
+
)
|
|
15
|
+
from src.envs.cluster_simulator.single_slot.creator import (
|
|
16
|
+
SingleSlotEnvCreator,
|
|
17
|
+
SingleSlotCreatorParameters,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
register(
|
|
21
|
+
"ClusterScheduling-single-slot-v1",
|
|
22
|
+
SingleSlotEnvCreator(),
|
|
23
|
+
kwargs=SingleSlotCreatorParameters( # type: ignore
|
|
24
|
+
n_jobs=10,
|
|
25
|
+
n_machines=2,
|
|
26
|
+
reward_caculator=DifferentInPendingJobsRewardCaculator(),
|
|
27
|
+
seed=None,
|
|
28
|
+
),
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
register(
|
|
33
|
+
"ClusterScheduling-deeprm-v1",
|
|
34
|
+
DeepRMEnvCreator(),
|
|
35
|
+
kwargs=DeepRMCreatorParameters( # type: ignore
|
|
36
|
+
n_jobs=10,
|
|
37
|
+
n_machines=2,
|
|
38
|
+
n_resources=3,
|
|
39
|
+
n_resources_unit=5,
|
|
40
|
+
n_ticks=5,
|
|
41
|
+
reward_caculator=DifferentInPendingJobsRewardCaculator(),
|
|
42
|
+
seed=None,
|
|
43
|
+
),
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
register(
|
|
48
|
+
"ClusterScheduling-metric-offline-v1",
|
|
49
|
+
MetricBasedEnvCreator(),
|
|
50
|
+
kwargs=MetricBasedCreatorParameters( # type: ignore
|
|
51
|
+
n_jobs=10,
|
|
52
|
+
n_machines=2,
|
|
53
|
+
n_resources=3,
|
|
54
|
+
n_ticks=5,
|
|
55
|
+
poisson_lambda=4,
|
|
56
|
+
offline=True,
|
|
57
|
+
reward_caculator=DifferentInPendingJobsRewardCaculator(),
|
|
58
|
+
seed=None,
|
|
59
|
+
),
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
register(
|
|
63
|
+
"ClusterScheduling-metric-online-v1",
|
|
64
|
+
MetricBasedEnvCreator(),
|
|
65
|
+
kwargs=MetricBasedCreatorParameters( # type: ignore
|
|
66
|
+
n_jobs=10,
|
|
67
|
+
n_machines=2,
|
|
68
|
+
n_resources=3,
|
|
69
|
+
n_ticks=5,
|
|
70
|
+
poisson_lambda=4,
|
|
71
|
+
offline=False,
|
|
72
|
+
reward_caculator=DifferentInPendingJobsRewardCaculator(),
|
|
73
|
+
seed=None,
|
|
74
|
+
),
|
|
75
|
+
)
|
|
File without changes
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
from typing import NamedTuple, Tuple, TypeVar
|
|
2
|
+
import gymnasium as gym
|
|
3
|
+
|
|
4
|
+
from src.envs.cluster_simulator.base.internal.cluster import ClusterAction, ClusterABC
|
|
5
|
+
|
|
6
|
+
Cluster = TypeVar("Cluster", bound=ClusterABC)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class EnvironmentAction(NamedTuple):
|
|
10
|
+
should_schedule: bool
|
|
11
|
+
schedule: Tuple[int, int]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class DilationEnvironmentAction(NamedTuple):
|
|
15
|
+
selected_machine_cell: Tuple[int, int]
|
|
16
|
+
selected_job: int
|
|
17
|
+
execute_schedule_command: bool # a.k.a skip time
|
|
18
|
+
contract: bool
|
|
19
|
+
|
|
20
|
+
@classmethod
|
|
21
|
+
def into_action_space(
|
|
22
|
+
cls, kernel_shape: Tuple[int, int], n_jobs: int
|
|
23
|
+
) -> gym.Space[tuple]:
|
|
24
|
+
return gym.spaces.Tuple(
|
|
25
|
+
spaces=(
|
|
26
|
+
gym.spaces.MultiDiscrete(kernel_shape),
|
|
27
|
+
gym.spaces.Discrete(n_jobs),
|
|
28
|
+
gym.spaces.Discrete(2),
|
|
29
|
+
gym.spaces.Discrete(2),
|
|
30
|
+
)
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class ActionConvertor:
|
|
35
|
+
@staticmethod
|
|
36
|
+
def convert(original: EnvironmentAction) -> ClusterAction:
|
|
37
|
+
if original.should_schedule:
|
|
38
|
+
return ClusterAction.SkipTime()
|
|
39
|
+
|
|
40
|
+
assert all(idx >= 0 for idx in original.schedule)
|
|
41
|
+
return ClusterAction.Schedule(*original.schedule)
|
|
42
|
+
|
|
43
|
+
@staticmethod
|
|
44
|
+
def create_space(cluster: Cluster) -> gym.Space:
|
|
45
|
+
return gym.spaces.Tuple(
|
|
46
|
+
(
|
|
47
|
+
gym.spaces.Discrete(2),
|
|
48
|
+
gym.spaces.Tuple(
|
|
49
|
+
(
|
|
50
|
+
gym.spaces.Discrete(cluster.n_machines), # type: ignore
|
|
51
|
+
gym.spaces.Discrete(cluster.n_jobs), # type: ignore
|
|
52
|
+
)
|
|
53
|
+
),
|
|
54
|
+
)
|
|
55
|
+
)
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
from typing import TypeVar, TypedDict, Generic
|
|
3
|
+
import numpy.typing as npt
|
|
4
|
+
|
|
5
|
+
from src.envs.cluster_simulator.base.extractors.observation import ClusterObservation
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ClusterBaseInformation(TypedDict):
|
|
9
|
+
n_machines: int
|
|
10
|
+
n_jobs: int
|
|
11
|
+
jobs_status: npt.ArrayLike
|
|
12
|
+
current_tick: int
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
ClusterInformation = TypeVar("ClusterInformation", bound=ClusterBaseInformation)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class BaceClusterInformationExtractor(Generic[ClusterObservation, ClusterInformation]):
|
|
19
|
+
@abc.abstractmethod
|
|
20
|
+
def __call__(self, obs: ClusterObservation) -> ClusterInformation:
|
|
21
|
+
return ClusterBaseInformation(
|
|
22
|
+
n_machines=obs["machines"].shape[0],
|
|
23
|
+
n_jobs=obs["jobs_usage"].shape[0],
|
|
24
|
+
jobs_status=obs["jobs_status"],
|
|
25
|
+
current_tick=obs["current_tick"],
|
|
26
|
+
)
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
from typing import Protocol, TypeVar, runtime_checkable, overload, Literal
|
|
3
|
+
import gymnasium as gym
|
|
4
|
+
import numpy.typing as npt
|
|
5
|
+
|
|
6
|
+
from src.envs.cluster_simulator.base.internal.cluster import ClusterABC
|
|
7
|
+
from src.envs.cluster_simulator.base.internal.job import JobCollectionConvertor
|
|
8
|
+
from src.envs.cluster_simulator.base.internal.machine import MachinesCollectionConvertor
|
|
9
|
+
|
|
10
|
+
Cluster = TypeVar("Cluster", bound=ClusterABC)
|
|
11
|
+
MachinesRepresentation = TypeVar("MachinesRepresentation")
|
|
12
|
+
JobsRepresentation = TypeVar("JobsRepresentation")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@runtime_checkable
|
|
16
|
+
class BaseClusterObservation(Protocol[MachinesRepresentation, JobsRepresentation]):
|
|
17
|
+
"""Protocol ensuring dict-like access with specific required keys"""
|
|
18
|
+
|
|
19
|
+
@overload
|
|
20
|
+
def __getitem__(self, key: Literal["machines"]) -> MachinesRepresentation: ...
|
|
21
|
+
@overload
|
|
22
|
+
def __getitem__(self, key: Literal["jobs_usage"]) -> JobsRepresentation: ...
|
|
23
|
+
@overload
|
|
24
|
+
def __getitem__(self, key: Literal["jobs_status"]) -> npt.ArrayLike: ...
|
|
25
|
+
@overload
|
|
26
|
+
def __getitem__(self, key: Literal["current_tick"]) -> npt.ArrayLike: ...
|
|
27
|
+
def __getitem__(self, key: str) -> object: ...
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
ClusterObservation = TypeVar("ClusterObservation", bound=BaseClusterObservation)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class BaseObservationCreatorProtocol(Protocol[Cluster, ClusterObservation]):
|
|
34
|
+
_machines_convertor: MachinesCollectionConvertor
|
|
35
|
+
_jobs_convertor: JobCollectionConvertor
|
|
36
|
+
|
|
37
|
+
@abc.abstractmethod
|
|
38
|
+
def create(self, cluster: Cluster) -> ClusterObservation: ...
|
|
39
|
+
|
|
40
|
+
@abc.abstractmethod
|
|
41
|
+
def create_space(self, cluster: Cluster) -> gym.Space: ...
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from typing import Generic
|
|
2
|
+
import abc
|
|
3
|
+
|
|
4
|
+
from src.envs.cluster_simulator.base.internal.job import Status
|
|
5
|
+
from src.envs.cluster_simulator.base.extractors.information import ClusterInformation
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class RewardCaculator(Generic[ClusterInformation]):
|
|
9
|
+
@abc.abstractmethod
|
|
10
|
+
def __call__(
|
|
11
|
+
self,
|
|
12
|
+
prev_extra_information: ClusterInformation,
|
|
13
|
+
current_extra_information: ClusterInformation,
|
|
14
|
+
) -> float: ...
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class DifferentInPendingJobsRewardCaculator(RewardCaculator[ClusterInformation]):
|
|
18
|
+
def __call__(
|
|
19
|
+
self,
|
|
20
|
+
prev_extra_information: ClusterInformation,
|
|
21
|
+
current_extra_information: ClusterInformation,
|
|
22
|
+
) -> float:
|
|
23
|
+
prev_not_pending_jobs_count = sum(
|
|
24
|
+
s != Status.Pending for s in prev_extra_information["jobs_status"]
|
|
25
|
+
)
|
|
26
|
+
current_not_pending_jobs_count = sum(
|
|
27
|
+
s != Status.Pending for s in current_extra_information["jobs_status"]
|
|
28
|
+
)
|
|
29
|
+
return current_not_pending_jobs_count - prev_not_pending_jobs_count
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import typing as tp
|
|
2
|
+
import abc
|
|
3
|
+
|
|
4
|
+
from rust_enum import enum, Case
|
|
5
|
+
|
|
6
|
+
from src.envs.cluster_simulator.base.internal.job import Job, JobCollection
|
|
7
|
+
from src.envs.cluster_simulator.base.internal.job import Status as JobStatus
|
|
8
|
+
from src.envs.cluster_simulator.base.internal.machine import Machine, MachineCollection
|
|
9
|
+
import logging
|
|
10
|
+
|
|
11
|
+
T = tp.TypeVar("T")
|
|
12
|
+
|
|
13
|
+
Machines = tp.TypeVar("Machines", bound=MachineCollection)
|
|
14
|
+
Jobs = tp.TypeVar("Jobs", bound=JobCollection)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@enum
|
|
18
|
+
class ClusterAction:
|
|
19
|
+
SkipTime = Case()
|
|
20
|
+
Schedule = Case(machine=int, job=int)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class ClusterABC(tp.Generic[Machines, Jobs], abc.ABC):
|
|
24
|
+
@abc.abstractmethod
|
|
25
|
+
def workload_creator(self, seed: tp.Optional[tp.SupportsFloat] = None) -> Jobs: ...
|
|
26
|
+
|
|
27
|
+
@abc.abstractmethod
|
|
28
|
+
def machine_creator(
|
|
29
|
+
self, seed: tp.Optional[tp.SupportsFloat] = None
|
|
30
|
+
) -> Machines: ...
|
|
31
|
+
|
|
32
|
+
@abc.abstractmethod
|
|
33
|
+
def is_allocation_possible(self, machine: Machine[T], job: Job[T]) -> bool: ...
|
|
34
|
+
|
|
35
|
+
@abc.abstractmethod
|
|
36
|
+
def allocation(self, machine: Machine[T], job: Job[T]) -> None: ...
|
|
37
|
+
|
|
38
|
+
def __init__(self, seed: tp.Optional[tp.SupportsFloat]):
|
|
39
|
+
self._current_tick = 0
|
|
40
|
+
self._machines = self.machine_creator(seed)
|
|
41
|
+
self._jobs = self.workload_creator(seed)
|
|
42
|
+
self._jobs.execute_clock_tick(self._current_tick)
|
|
43
|
+
self._running_job_to_machine: dict[int, int] = {}
|
|
44
|
+
self.logger = logging.getLogger(type(self).__name__)
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def n_jobs(self) -> int:
|
|
48
|
+
return len(self._jobs)
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def n_machines(self) -> int:
|
|
52
|
+
return len(self._machines)
|
|
53
|
+
|
|
54
|
+
def has_completed(self) -> bool:
|
|
55
|
+
n_none_finished_jobs = sum(
|
|
56
|
+
job.status != JobStatus.Completed for job in self._jobs
|
|
57
|
+
)
|
|
58
|
+
self.logger.debug("Number of none completed jobs: %d", n_none_finished_jobs)
|
|
59
|
+
return n_none_finished_jobs == 0
|
|
60
|
+
|
|
61
|
+
def are_all_jobs_executed(self) -> bool:
|
|
62
|
+
arent_executed_jobs = sum(
|
|
63
|
+
job.status in (JobStatus.NotCreated, JobStatus.Pending)
|
|
64
|
+
for job in self._jobs
|
|
65
|
+
)
|
|
66
|
+
self.logger.debug("Number of none completed jobs: %d", arent_executed_jobs)
|
|
67
|
+
return arent_executed_jobs == 0
|
|
68
|
+
|
|
69
|
+
def schedule(self, m_idx: int, j_idx: int) -> bool:
|
|
70
|
+
job = self._jobs[j_idx]
|
|
71
|
+
machine = self._machines[m_idx]
|
|
72
|
+
|
|
73
|
+
if job.status != JobStatus.Pending:
|
|
74
|
+
self.logger.warning(
|
|
75
|
+
"Invalid action: job %d has status '%s' (expected: 'Pending')",
|
|
76
|
+
j_idx,
|
|
77
|
+
job.status.name,
|
|
78
|
+
)
|
|
79
|
+
return False
|
|
80
|
+
|
|
81
|
+
if not self.is_allocation_possible(machine, job):
|
|
82
|
+
self.logger.warning(
|
|
83
|
+
"Schedule rejected: insufficient capacity | machine=%d job=%d",
|
|
84
|
+
m_idx,
|
|
85
|
+
j_idx,
|
|
86
|
+
)
|
|
87
|
+
return False
|
|
88
|
+
|
|
89
|
+
self.allocation(machine, job)
|
|
90
|
+
self.logger.info(
|
|
91
|
+
"Scheduling job %d on machine %d",
|
|
92
|
+
j_idx,
|
|
93
|
+
m_idx,
|
|
94
|
+
)
|
|
95
|
+
job.status = JobStatus.Running
|
|
96
|
+
job.run_time = 1 # Assume that if start running the in next one will finish
|
|
97
|
+
self._running_job_to_machine[m_idx] = j_idx
|
|
98
|
+
self.logger.debug(
|
|
99
|
+
"Running job %d on machine %d",
|
|
100
|
+
j_idx,
|
|
101
|
+
m_idx,
|
|
102
|
+
)
|
|
103
|
+
return True
|
|
104
|
+
|
|
105
|
+
def execute_clock_tick(self) -> None:
|
|
106
|
+
self.logger.info(
|
|
107
|
+
"Executing clock tick: %d → %d",
|
|
108
|
+
self._current_tick,
|
|
109
|
+
self._current_tick + 1,
|
|
110
|
+
)
|
|
111
|
+
self._current_tick += 1
|
|
112
|
+
self._jobs.execute_clock_tick(self._current_tick)
|
|
113
|
+
running_jobs = {
|
|
114
|
+
j_idx
|
|
115
|
+
for j_idx, job in enumerate(iter(self._jobs))
|
|
116
|
+
if job.status != JobStatus.Running
|
|
117
|
+
}
|
|
118
|
+
self._running_job_to_machine = {
|
|
119
|
+
k: v for k, v in self._running_job_to_machine.items() if k in running_jobs
|
|
120
|
+
}
|
|
121
|
+
self._machines.execute_clock_tick()
|
|
122
|
+
|
|
123
|
+
def reset(self, seed: tp.Optional[tp.SupportsFloat]) -> None:
|
|
124
|
+
self._current_tick = 0
|
|
125
|
+
self._jobs = self.workload_creator(seed)
|
|
126
|
+
self._machines.clean_and_reset(seed)
|
|
127
|
+
|
|
128
|
+
def execute(self, action: ClusterAction) -> tp.Optional[bool]:
|
|
129
|
+
match action:
|
|
130
|
+
case ClusterAction.SkipTime():
|
|
131
|
+
return self.execute_clock_tick()
|
|
132
|
+
case ClusterAction.Schedule(machine_idx, job_idx):
|
|
133
|
+
return self.schedule(machine_idx, job_idx)
|
|
134
|
+
case _:
|
|
135
|
+
raise RuntimeError(
|
|
136
|
+
f"Provided command should be {ClusterAction.SkipTime.__class__} or {ClusterAction.Schedule.__class__} and not {type(action).__class__}"
|
|
137
|
+
)
|