DIRACCommon 9.0.0a66__py3-none-any.whl → 9.0.0a67__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- DIRACCommon/Core/Utilities/ClassAd/ClassAdLight.py +295 -0
- DIRACCommon/Core/Utilities/ClassAd/__init__.py +1 -0
- DIRACCommon/Core/Utilities/JDL.py +199 -0
- DIRACCommon/Core/Utilities/List.py +127 -0
- DIRACCommon/{Utils → Core/Utilities}/ReturnValues.py +1 -1
- DIRACCommon/Core/Utilities/StateMachine.py +185 -0
- DIRACCommon/Core/Utilities/TimeUtilities.py +259 -0
- DIRACCommon/Core/__init__.py +1 -0
- DIRACCommon/WorkloadManagementSystem/Client/JobState/JobManifest.py +235 -0
- DIRACCommon/WorkloadManagementSystem/Client/JobState/__init__.py +0 -0
- DIRACCommon/WorkloadManagementSystem/Client/JobStatus.py +95 -0
- DIRACCommon/WorkloadManagementSystem/Client/__init__.py +1 -0
- DIRACCommon/WorkloadManagementSystem/DB/JobDBUtils.py +170 -0
- DIRACCommon/WorkloadManagementSystem/DB/__init__.py +1 -0
- DIRACCommon/WorkloadManagementSystem/Utilities/JobModel.py +236 -0
- DIRACCommon/WorkloadManagementSystem/Utilities/JobStatusUtility.py +93 -0
- DIRACCommon/WorkloadManagementSystem/Utilities/ParametricJob.py +179 -0
- DIRACCommon/WorkloadManagementSystem/Utilities/__init__.py +1 -0
- DIRACCommon/WorkloadManagementSystem/__init__.py +1 -0
- {diraccommon-9.0.0a66.dist-info → diraccommon-9.0.0a67.dist-info}/METADATA +9 -4
- diraccommon-9.0.0a67.dist-info/RECORD +25 -0
- diraccommon-9.0.0a66.dist-info/RECORD +0 -7
- /DIRACCommon/{Utils → Core/Utilities}/DErrno.py +0 -0
- /DIRACCommon/{Utils → Core/Utilities}/__init__.py +0 -0
- {diraccommon-9.0.0a66.dist-info → diraccommon-9.0.0a67.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module contains constants and lists for the possible job states.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from DIRACCommon.Core.Utilities.StateMachine import State, StateMachine
|
|
6
|
+
|
|
7
|
+
#:
|
|
8
|
+
SUBMITTING = "Submitting"
|
|
9
|
+
#:
|
|
10
|
+
RECEIVED = "Received"
|
|
11
|
+
#:
|
|
12
|
+
CHECKING = "Checking"
|
|
13
|
+
#:
|
|
14
|
+
STAGING = "Staging"
|
|
15
|
+
#:
|
|
16
|
+
SCOUTING = "Scouting"
|
|
17
|
+
#:
|
|
18
|
+
WAITING = "Waiting"
|
|
19
|
+
#:
|
|
20
|
+
MATCHED = "Matched"
|
|
21
|
+
#: The Rescheduled status is effectively never stored in the DB.
|
|
22
|
+
#: It could be considered a "virtual" status, and might even be dropped.
|
|
23
|
+
RESCHEDULED = "Rescheduled"
|
|
24
|
+
#:
|
|
25
|
+
RUNNING = "Running"
|
|
26
|
+
#:
|
|
27
|
+
STALLED = "Stalled"
|
|
28
|
+
#:
|
|
29
|
+
COMPLETING = "Completing"
|
|
30
|
+
#:
|
|
31
|
+
DONE = "Done"
|
|
32
|
+
#:
|
|
33
|
+
COMPLETED = "Completed"
|
|
34
|
+
#:
|
|
35
|
+
FAILED = "Failed"
|
|
36
|
+
#:
|
|
37
|
+
DELETED = "Deleted"
|
|
38
|
+
#:
|
|
39
|
+
KILLED = "Killed"
|
|
40
|
+
|
|
41
|
+
#: Possible job states
|
|
42
|
+
JOB_STATES = [
|
|
43
|
+
SUBMITTING,
|
|
44
|
+
RECEIVED,
|
|
45
|
+
CHECKING,
|
|
46
|
+
SCOUTING,
|
|
47
|
+
STAGING,
|
|
48
|
+
WAITING,
|
|
49
|
+
MATCHED,
|
|
50
|
+
RESCHEDULED,
|
|
51
|
+
RUNNING,
|
|
52
|
+
STALLED,
|
|
53
|
+
COMPLETING,
|
|
54
|
+
DONE,
|
|
55
|
+
COMPLETED,
|
|
56
|
+
FAILED,
|
|
57
|
+
DELETED,
|
|
58
|
+
KILLED,
|
|
59
|
+
]
|
|
60
|
+
|
|
61
|
+
# Job States when the payload work has finished
|
|
62
|
+
JOB_FINAL_STATES = [DONE, COMPLETED, FAILED, KILLED]
|
|
63
|
+
|
|
64
|
+
# WMS internal job States indicating the job object won't be updated
|
|
65
|
+
JOB_REALLY_FINAL_STATES = [DELETED]
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class JobsStateMachine(StateMachine):
|
|
69
|
+
"""Jobs state machine"""
|
|
70
|
+
|
|
71
|
+
def __init__(self, state):
|
|
72
|
+
"""c'tor
|
|
73
|
+
Defines the state machine transactions
|
|
74
|
+
"""
|
|
75
|
+
super().__init__(state)
|
|
76
|
+
|
|
77
|
+
# States transitions
|
|
78
|
+
self.states = {
|
|
79
|
+
DELETED: State(15), # final state
|
|
80
|
+
KILLED: State(14, [DELETED], defState=KILLED),
|
|
81
|
+
FAILED: State(13, [RESCHEDULED, DELETED], defState=FAILED),
|
|
82
|
+
DONE: State(12, [DELETED], defState=DONE),
|
|
83
|
+
COMPLETED: State(11, [DONE, FAILED], defState=COMPLETED),
|
|
84
|
+
COMPLETING: State(10, [DONE, FAILED, COMPLETED, STALLED, KILLED], defState=COMPLETING),
|
|
85
|
+
STALLED: State(9, [RUNNING, FAILED, KILLED], defState=STALLED),
|
|
86
|
+
RUNNING: State(8, [STALLED, DONE, FAILED, RESCHEDULED, COMPLETING, KILLED, RECEIVED], defState=RUNNING),
|
|
87
|
+
RESCHEDULED: State(7, [WAITING, RECEIVED, DELETED, FAILED, KILLED], defState=RESCHEDULED),
|
|
88
|
+
MATCHED: State(6, [RUNNING, FAILED, RESCHEDULED, KILLED], defState=MATCHED),
|
|
89
|
+
WAITING: State(5, [MATCHED, RESCHEDULED, DELETED, KILLED], defState=WAITING),
|
|
90
|
+
STAGING: State(4, [CHECKING, WAITING, FAILED, KILLED], defState=STAGING),
|
|
91
|
+
SCOUTING: State(3, [CHECKING, FAILED, STALLED, KILLED], defState=SCOUTING),
|
|
92
|
+
CHECKING: State(2, [SCOUTING, STAGING, WAITING, RESCHEDULED, FAILED, DELETED, KILLED], defState=CHECKING),
|
|
93
|
+
RECEIVED: State(1, [SCOUTING, CHECKING, STAGING, WAITING, FAILED, DELETED, KILLED], defState=RECEIVED),
|
|
94
|
+
SUBMITTING: State(0, [RECEIVED, CHECKING, DELETED, KILLED], defState=SUBMITTING), # initial state
|
|
95
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""DIRACCommon WorkloadManagementSystem client utilities"""
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"""Stateless JobDB utilities extracted from DIRAC for DIRACCommon"""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import base64
|
|
6
|
+
import zlib
|
|
7
|
+
from typing import TypedDict
|
|
8
|
+
|
|
9
|
+
from DIRACCommon.Core.Utilities.ReturnValues import S_OK, DOKReturnType, S_ERROR
|
|
10
|
+
from DIRACCommon.Core.Utilities.DErrno import EWMSSUBM
|
|
11
|
+
from DIRACCommon.WorkloadManagementSystem.Client import JobStatus
|
|
12
|
+
from DIRACCommon.WorkloadManagementSystem.Client.JobState.JobManifest import JobManifest, JobManifestConfig
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def compressJDL(jdl):
|
|
16
|
+
"""Return compressed JDL string."""
|
|
17
|
+
return base64.b64encode(zlib.compress(jdl.encode(), -1)).decode()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def extractJDL(compressedJDL):
|
|
21
|
+
"""Return decompressed JDL string."""
|
|
22
|
+
# the starting bracket is guaranteeed by JobManager.submitJob
|
|
23
|
+
# we need the check to be backward compatible
|
|
24
|
+
if isinstance(compressedJDL, bytes):
|
|
25
|
+
if compressedJDL.startswith(b"["):
|
|
26
|
+
return compressedJDL.decode()
|
|
27
|
+
else:
|
|
28
|
+
if compressedJDL.startswith("["):
|
|
29
|
+
return compressedJDL
|
|
30
|
+
return zlib.decompress(base64.b64decode(compressedJDL)).decode()
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def fixJDL(jdl: str) -> str:
|
|
34
|
+
"""Fix possible lack of brackets in JDL"""
|
|
35
|
+
# 1.- insert original JDL on DB and get new JobID
|
|
36
|
+
# Fix the possible lack of the brackets in the JDL
|
|
37
|
+
if jdl.strip()[0].find("[") != 0:
|
|
38
|
+
jdl = "[" + jdl + "]"
|
|
39
|
+
return jdl
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class CheckAndPrepareJobConfig(TypedDict):
|
|
43
|
+
"""Dictionary type for defining the information checkAndPrepareJob needs from the CS"""
|
|
44
|
+
|
|
45
|
+
inputDataPolicyForVO: str
|
|
46
|
+
softwareDistModuleForVO: str
|
|
47
|
+
defaultCPUTimeForOwnerGroup: int
|
|
48
|
+
getDIRACPlatform: callable[list[str], DOKReturnType[list[str]]]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def checkAndPrepareJob(
|
|
52
|
+
jobID, classAdJob, classAdReq, owner, ownerGroup, jobAttrs, vo, *, config: CheckAndPrepareJobConfig
|
|
53
|
+
):
|
|
54
|
+
error = ""
|
|
55
|
+
|
|
56
|
+
jdlOwner = classAdJob.getAttributeString("Owner")
|
|
57
|
+
jdlOwnerGroup = classAdJob.getAttributeString("OwnerGroup")
|
|
58
|
+
jdlVO = classAdJob.getAttributeString("VirtualOrganization")
|
|
59
|
+
|
|
60
|
+
# The below is commented out since this is always overwritten by the submitter IDs
|
|
61
|
+
# but the check allows to findout inconsistent client environments
|
|
62
|
+
if jdlOwner and jdlOwner != owner:
|
|
63
|
+
error = "Wrong Owner in JDL"
|
|
64
|
+
elif jdlOwnerGroup and jdlOwnerGroup != ownerGroup:
|
|
65
|
+
error = "Wrong Owner Group in JDL"
|
|
66
|
+
elif jdlVO and jdlVO != vo:
|
|
67
|
+
error = "Wrong Virtual Organization in JDL"
|
|
68
|
+
|
|
69
|
+
classAdJob.insertAttributeString("Owner", owner)
|
|
70
|
+
classAdJob.insertAttributeString("OwnerGroup", ownerGroup)
|
|
71
|
+
|
|
72
|
+
if vo:
|
|
73
|
+
classAdJob.insertAttributeString("VirtualOrganization", vo)
|
|
74
|
+
|
|
75
|
+
classAdReq.insertAttributeString("Owner", owner)
|
|
76
|
+
classAdReq.insertAttributeString("OwnerGroup", ownerGroup)
|
|
77
|
+
if vo:
|
|
78
|
+
classAdReq.insertAttributeString("VirtualOrganization", vo)
|
|
79
|
+
|
|
80
|
+
if config["inputDataPolicyForVO"] and not classAdJob.lookupAttribute("InputDataModule"):
|
|
81
|
+
classAdJob.insertAttributeString("InputDataModule", config["inputDataPolicyForVO"])
|
|
82
|
+
|
|
83
|
+
if config["softwareDistModuleForVO"] and not classAdJob.lookupAttribute("SoftwareDistModule"):
|
|
84
|
+
classAdJob.insertAttributeString("SoftwareDistModule", config["softwareDistModuleForVO"])
|
|
85
|
+
|
|
86
|
+
# priority
|
|
87
|
+
priority = classAdJob.getAttributeInt("Priority")
|
|
88
|
+
if priority is None:
|
|
89
|
+
priority = 0
|
|
90
|
+
classAdReq.insertAttributeInt("UserPriority", priority)
|
|
91
|
+
|
|
92
|
+
# CPU time
|
|
93
|
+
cpuTime = classAdJob.getAttributeInt("CPUTime")
|
|
94
|
+
if cpuTime is None:
|
|
95
|
+
cpuTime = config["defaultCPUTimeForOwnerGroup"]
|
|
96
|
+
classAdReq.insertAttributeInt("CPUTime", cpuTime)
|
|
97
|
+
|
|
98
|
+
# platform(s)
|
|
99
|
+
platformList = classAdJob.getListFromExpression("Platform")
|
|
100
|
+
if platformList:
|
|
101
|
+
result = config["getDIRACPlatform"](platformList)
|
|
102
|
+
if not result["OK"]:
|
|
103
|
+
return result
|
|
104
|
+
if result["Value"]:
|
|
105
|
+
classAdReq.insertAttributeVectorString("Platforms", result["Value"])
|
|
106
|
+
else:
|
|
107
|
+
error = "OS compatibility info not found"
|
|
108
|
+
if error:
|
|
109
|
+
retVal = S_ERROR(EWMSSUBM, error)
|
|
110
|
+
retVal["JobId"] = jobID
|
|
111
|
+
retVal["Status"] = JobStatus.FAILED
|
|
112
|
+
retVal["MinorStatus"] = error
|
|
113
|
+
|
|
114
|
+
jobAttrs["Status"] = JobStatus.FAILED
|
|
115
|
+
|
|
116
|
+
jobAttrs["MinorStatus"] = error
|
|
117
|
+
return retVal
|
|
118
|
+
return S_OK()
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def checkAndAddOwner(
|
|
122
|
+
jdl: str, owner: str, ownerGroup: str, *, job_manifest_config: JobManifestConfig
|
|
123
|
+
) -> DOKReturnType[JobManifest]:
|
|
124
|
+
jobManifest = JobManifest()
|
|
125
|
+
res = jobManifest.load(jdl)
|
|
126
|
+
if not res["OK"]:
|
|
127
|
+
return res
|
|
128
|
+
|
|
129
|
+
jobManifest.setOptionsFromDict({"Owner": owner, "OwnerGroup": ownerGroup})
|
|
130
|
+
res = jobManifest.check(config=job_manifest_config)
|
|
131
|
+
if not res["OK"]:
|
|
132
|
+
return res
|
|
133
|
+
|
|
134
|
+
return S_OK(jobManifest)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def createJDLWithInitialStatus(
|
|
138
|
+
classAdJob, classAdReq, jdl2DBParameters, jobAttrs, initialStatus, initialMinorStatus, *, modern=False
|
|
139
|
+
):
|
|
140
|
+
"""
|
|
141
|
+
:param modern: if True, store boolean instead of string for VerifiedFlag (used by diracx only)
|
|
142
|
+
"""
|
|
143
|
+
priority = classAdJob.getAttributeInt("Priority")
|
|
144
|
+
if priority is None:
|
|
145
|
+
priority = 0
|
|
146
|
+
jobAttrs["UserPriority"] = priority
|
|
147
|
+
|
|
148
|
+
for jdlName in jdl2DBParameters:
|
|
149
|
+
# Defaults are set by the DB.
|
|
150
|
+
jdlValue = classAdJob.getAttributeString(jdlName)
|
|
151
|
+
if jdlValue:
|
|
152
|
+
jobAttrs[jdlName] = jdlValue
|
|
153
|
+
|
|
154
|
+
jdlValue = classAdJob.getAttributeString("Site")
|
|
155
|
+
if jdlValue:
|
|
156
|
+
if jdlValue.find(",") != -1:
|
|
157
|
+
jobAttrs["Site"] = "Multiple"
|
|
158
|
+
else:
|
|
159
|
+
jobAttrs["Site"] = jdlValue
|
|
160
|
+
|
|
161
|
+
jobAttrs["VerifiedFlag"] = True if modern else "True"
|
|
162
|
+
|
|
163
|
+
jobAttrs["Status"] = initialStatus
|
|
164
|
+
|
|
165
|
+
jobAttrs["MinorStatus"] = initialMinorStatus
|
|
166
|
+
|
|
167
|
+
reqJDL = classAdReq.asJDL()
|
|
168
|
+
classAdJob.insertAttributeInt("JobRequirements", reqJDL)
|
|
169
|
+
|
|
170
|
+
return classAdJob.asJDL()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""DIRACCommon WorkloadManagementSystem DB utilities"""
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
""" This module contains the JobModel class, which is used to validate the job description """
|
|
2
|
+
|
|
3
|
+
# pylint: disable=no-self-argument, no-self-use, invalid-name, missing-function-docstring
|
|
4
|
+
|
|
5
|
+
from collections.abc import Iterable
|
|
6
|
+
from typing import Annotated, Any, Callable, ClassVar, Self, TypeAlias, TypedDict
|
|
7
|
+
|
|
8
|
+
from pydantic import BaseModel, BeforeValidator, ConfigDict, field_validator, model_validator
|
|
9
|
+
|
|
10
|
+
from DIRACCommon.Core.Utilities.ReturnValues import DErrorReturnType
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# HACK: Convert appropriate iterables into sets
|
|
14
|
+
def default_set_validator(value):
|
|
15
|
+
if value is None:
|
|
16
|
+
return set()
|
|
17
|
+
elif not isinstance(value, Iterable):
|
|
18
|
+
return value
|
|
19
|
+
elif isinstance(value, (str, bytes, bytearray)):
|
|
20
|
+
return value
|
|
21
|
+
else:
|
|
22
|
+
return set(value)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
CoercibleSetStr: TypeAlias = Annotated[set[str], BeforeValidator(default_set_validator)]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class BaseJobDescriptionModelConfg(TypedDict):
|
|
29
|
+
"""Dictionary type for defining the information JobDescriptionModel needs from the CS"""
|
|
30
|
+
|
|
31
|
+
# Default values
|
|
32
|
+
cpuTime: int
|
|
33
|
+
priority: int
|
|
34
|
+
# Bounds
|
|
35
|
+
minCPUTime: int
|
|
36
|
+
maxCPUTime: int
|
|
37
|
+
allowedJobTypes: list[str]
|
|
38
|
+
maxInputDataFiles: int
|
|
39
|
+
minNumberOfProcessors: int
|
|
40
|
+
maxNumberOfProcessors: int
|
|
41
|
+
minPriority: int
|
|
42
|
+
maxPriority: int
|
|
43
|
+
possibleLogLevels: list[str]
|
|
44
|
+
sites: DErrorReturnType[list[str]]
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class BaseJobDescriptionModel(BaseModel):
|
|
48
|
+
"""Base model for the job description (not parametric)"""
|
|
49
|
+
|
|
50
|
+
model_config = ConfigDict(validate_assignment=True)
|
|
51
|
+
|
|
52
|
+
# This must be overridden in subclasses
|
|
53
|
+
_config_builder: ClassVar[Callable[[], BaseJobDescriptionModelConfg] | None] = None
|
|
54
|
+
|
|
55
|
+
@model_validator(mode="before")
|
|
56
|
+
def injectDefaultValues(cls, values: dict[str, Any]) -> dict[str, Any]:
|
|
57
|
+
if cls._config_builder is None:
|
|
58
|
+
raise NotImplementedError("You must define a _config_builder class attribute")
|
|
59
|
+
config = cls._config_builder()
|
|
60
|
+
values.setdefault("cpuTime", config["cpuTime"])
|
|
61
|
+
values.setdefault("priority", config["priority"])
|
|
62
|
+
return values
|
|
63
|
+
|
|
64
|
+
arguments: str = ""
|
|
65
|
+
bannedSites: CoercibleSetStr = set()
|
|
66
|
+
# TODO: This should use a field factory
|
|
67
|
+
cpuTime: int
|
|
68
|
+
executable: str
|
|
69
|
+
executionEnvironment: dict = None
|
|
70
|
+
gridCE: str = ""
|
|
71
|
+
inputSandbox: CoercibleSetStr = set()
|
|
72
|
+
inputData: CoercibleSetStr = set()
|
|
73
|
+
inputDataPolicy: str = ""
|
|
74
|
+
jobConfigArgs: str = ""
|
|
75
|
+
jobGroup: str = ""
|
|
76
|
+
jobType: str = "User"
|
|
77
|
+
jobName: str = "Name"
|
|
78
|
+
# TODO: This should be an StrEnum
|
|
79
|
+
logLevel: str = "INFO"
|
|
80
|
+
# TODO: This can't be None with this type hint
|
|
81
|
+
maxNumberOfProcessors: int = None
|
|
82
|
+
minNumberOfProcessors: int = 1
|
|
83
|
+
outputData: CoercibleSetStr = set()
|
|
84
|
+
outputPath: str = ""
|
|
85
|
+
outputSandbox: CoercibleSetStr = set()
|
|
86
|
+
outputSE: str = ""
|
|
87
|
+
platform: str = ""
|
|
88
|
+
# TODO: This should use a field factory
|
|
89
|
+
priority: int
|
|
90
|
+
sites: CoercibleSetStr = set()
|
|
91
|
+
stderr: str = "std.err"
|
|
92
|
+
stdout: str = "std.out"
|
|
93
|
+
tags: CoercibleSetStr = set()
|
|
94
|
+
extraFields: dict[str, Any] = {}
|
|
95
|
+
|
|
96
|
+
@field_validator("cpuTime")
|
|
97
|
+
def checkCPUTimeBounds(cls, v):
|
|
98
|
+
minCPUTime = cls._config_builder()["minCPUTime"]
|
|
99
|
+
maxCPUTime = cls._config_builder()["maxCPUTime"]
|
|
100
|
+
if not minCPUTime <= v <= maxCPUTime:
|
|
101
|
+
raise ValueError(f"cpuTime out of bounds (must be between {minCPUTime} and {maxCPUTime})")
|
|
102
|
+
return v
|
|
103
|
+
|
|
104
|
+
@field_validator("executable")
|
|
105
|
+
def checkExecutableIsNotAnEmptyString(cls, v: str):
|
|
106
|
+
if not v:
|
|
107
|
+
raise ValueError("executable must not be an empty string")
|
|
108
|
+
return v
|
|
109
|
+
|
|
110
|
+
@field_validator("jobType")
|
|
111
|
+
def checkJobTypeIsAllowed(cls, v: str):
|
|
112
|
+
allowedTypes = cls._config_builder()["allowedJobTypes"]
|
|
113
|
+
if v not in allowedTypes:
|
|
114
|
+
raise ValueError(f"jobType '{v}' is not allowed for this kind of user (must be in {allowedTypes})")
|
|
115
|
+
return v
|
|
116
|
+
|
|
117
|
+
@field_validator("inputData")
|
|
118
|
+
def checkInputDataDoesntContainDoubleSlashes(cls, v):
|
|
119
|
+
if v:
|
|
120
|
+
for lfn in v:
|
|
121
|
+
if lfn.find("//") > -1:
|
|
122
|
+
raise ValueError("Input data contains //")
|
|
123
|
+
return v
|
|
124
|
+
|
|
125
|
+
@field_validator("inputData")
|
|
126
|
+
def addLFNPrefixIfStringStartsWithASlash(cls, v: set[str]):
|
|
127
|
+
if v:
|
|
128
|
+
v = {lfn.strip() for lfn in v if lfn.strip()}
|
|
129
|
+
v = {f"LFN:{lfn}" if lfn.startswith("/") else lfn for lfn in v}
|
|
130
|
+
|
|
131
|
+
for lfn in v:
|
|
132
|
+
if not lfn.startswith("LFN:/"):
|
|
133
|
+
raise ValueError("Input data files must start with LFN:/")
|
|
134
|
+
return v
|
|
135
|
+
|
|
136
|
+
@model_validator(mode="after")
|
|
137
|
+
def checkNumberOfInputDataFiles(self) -> Self:
|
|
138
|
+
if self.inputData:
|
|
139
|
+
maxInputDataFiles = self._config_builder()["maxInputDataFiles"]
|
|
140
|
+
if self.jobType == "User" and len(self.inputData) >= maxInputDataFiles:
|
|
141
|
+
raise ValueError(f"inputData contains too many files (must contain at most {maxInputDataFiles})")
|
|
142
|
+
return self
|
|
143
|
+
|
|
144
|
+
@field_validator("inputSandbox")
|
|
145
|
+
def checkLFNSandboxesAreWellFormated(cls, v: set[str]):
|
|
146
|
+
for inputSandbox in v:
|
|
147
|
+
if inputSandbox.startswith("LFN:") and not inputSandbox.startswith("LFN:/"):
|
|
148
|
+
raise ValueError("LFN files must start by LFN:/")
|
|
149
|
+
return v
|
|
150
|
+
|
|
151
|
+
@field_validator("logLevel")
|
|
152
|
+
def checkLogLevelIsValid(cls, v: str):
|
|
153
|
+
v = v.upper()
|
|
154
|
+
possibleLogLevels = cls._config_builder()["possibleLogLevels"]
|
|
155
|
+
if v not in possibleLogLevels:
|
|
156
|
+
raise ValueError(f"Log level {v} not in {possibleLogLevels}")
|
|
157
|
+
return v
|
|
158
|
+
|
|
159
|
+
@field_validator("minNumberOfProcessors")
|
|
160
|
+
def checkMinNumberOfProcessorsBounds(cls, v):
|
|
161
|
+
minNumberOfProcessors = cls._config_builder()["minNumberOfProcessors"]
|
|
162
|
+
maxNumberOfProcessors = cls._config_builder()["maxNumberOfProcessors"]
|
|
163
|
+
if not minNumberOfProcessors <= v <= maxNumberOfProcessors:
|
|
164
|
+
raise ValueError(
|
|
165
|
+
f"minNumberOfProcessors out of bounds (must be between {minNumberOfProcessors} and {maxNumberOfProcessors})"
|
|
166
|
+
)
|
|
167
|
+
return v
|
|
168
|
+
|
|
169
|
+
@field_validator("maxNumberOfProcessors")
|
|
170
|
+
def checkMaxNumberOfProcessorsBounds(cls, v):
|
|
171
|
+
minNumberOfProcessors = cls._config_builder()["minNumberOfProcessors"]
|
|
172
|
+
maxNumberOfProcessors = cls._config_builder()["maxNumberOfProcessors"]
|
|
173
|
+
if not minNumberOfProcessors <= v <= maxNumberOfProcessors:
|
|
174
|
+
raise ValueError(
|
|
175
|
+
f"maxNumberOfProcessors out of bounds (must be between {minNumberOfProcessors} and {maxNumberOfProcessors})"
|
|
176
|
+
)
|
|
177
|
+
return v
|
|
178
|
+
|
|
179
|
+
@model_validator(mode="after")
|
|
180
|
+
def checkThatMaxNumberOfProcessorsIsGreaterThanMinNumberOfProcessors(self) -> Self:
|
|
181
|
+
if self.maxNumberOfProcessors:
|
|
182
|
+
if self.maxNumberOfProcessors < self.minNumberOfProcessors:
|
|
183
|
+
raise ValueError("maxNumberOfProcessors must be greater than minNumberOfProcessors")
|
|
184
|
+
return self
|
|
185
|
+
|
|
186
|
+
@model_validator(mode="after")
|
|
187
|
+
def addTagsDependingOnNumberOfProcessors(self) -> Self:
|
|
188
|
+
if self.minNumberOfProcessors == self.maxNumberOfProcessors:
|
|
189
|
+
self.tags.add(f"{self.minNumberOfProcessors}Processors")
|
|
190
|
+
if self.minNumberOfProcessors > 1:
|
|
191
|
+
self.tags.add("MultiProcessor")
|
|
192
|
+
return self
|
|
193
|
+
|
|
194
|
+
@field_validator("sites")
|
|
195
|
+
def checkSites(cls, v: set[str]):
|
|
196
|
+
if v:
|
|
197
|
+
res = cls._config_builder()["sites"]
|
|
198
|
+
if not res["OK"]:
|
|
199
|
+
raise ValueError(res["Message"])
|
|
200
|
+
invalidSites = v - set(res["Value"]).union({"ANY"})
|
|
201
|
+
if invalidSites:
|
|
202
|
+
raise ValueError(f"Invalid sites: {' '.join(invalidSites)}")
|
|
203
|
+
return v
|
|
204
|
+
|
|
205
|
+
@model_validator(mode="after")
|
|
206
|
+
def checkThatSitesAndBannedSitesAreNotMutuallyExclusive(self) -> Self:
|
|
207
|
+
if self.sites and self.bannedSites:
|
|
208
|
+
while self.bannedSites:
|
|
209
|
+
self.sites.discard(self.bannedSites.pop())
|
|
210
|
+
if not self.sites:
|
|
211
|
+
raise ValueError("sites and bannedSites are mutually exclusive")
|
|
212
|
+
return self
|
|
213
|
+
|
|
214
|
+
@field_validator("priority")
|
|
215
|
+
def checkPriorityBounds(cls, v):
|
|
216
|
+
minPriority = cls._config_builder()["minPriority"]
|
|
217
|
+
maxPriority = cls._config_builder()["maxPriority"]
|
|
218
|
+
if not minPriority <= v <= maxPriority:
|
|
219
|
+
raise ValueError(f"priority out of bounds (must be between {minPriority} and {maxPriority})")
|
|
220
|
+
return v
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
class JobDescriptionModel(BaseJobDescriptionModel):
|
|
224
|
+
"""Model for the job description (non parametric job with user credentials, i.e server side)"""
|
|
225
|
+
|
|
226
|
+
owner: str
|
|
227
|
+
ownerGroup: str
|
|
228
|
+
vo: str
|
|
229
|
+
|
|
230
|
+
@model_validator(mode="after")
|
|
231
|
+
def checkLFNMatchesREGEX(self) -> Self:
|
|
232
|
+
if self.inputData:
|
|
233
|
+
for lfn in self.inputData:
|
|
234
|
+
if not lfn.startswith(f"LFN:/{self.vo}/"):
|
|
235
|
+
raise ValueError(f"Input data not correctly specified (must start with LFN:/{self.vo}/)")
|
|
236
|
+
return self
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""Stateless job status utility functions"""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from DIRACCommon.Core.Utilities.ReturnValues import S_OK
|
|
9
|
+
from DIRACCommon.Core.Utilities.TimeUtilities import toEpoch, fromString
|
|
10
|
+
from DIRACCommon.WorkloadManagementSystem.Client.JobStatus import RUNNING, JOB_FINAL_STATES, JobsStateMachine
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def getStartAndEndTime(startTime, endTime, updateTimes, timeStamps, statusDict):
|
|
14
|
+
"""Get start and end times from job status updates
|
|
15
|
+
|
|
16
|
+
:param startTime: current start time
|
|
17
|
+
:param endTime: current end time
|
|
18
|
+
:param updateTimes: list of update times
|
|
19
|
+
:param timeStamps: list of (timestamp, status) tuples
|
|
20
|
+
:param statusDict: dictionary mapping update times to status dictionaries
|
|
21
|
+
:return: tuple of (newStartTime, newEndTime)
|
|
22
|
+
"""
|
|
23
|
+
newStat = ""
|
|
24
|
+
firstUpdate = toEpoch(fromString(updateTimes[0]))
|
|
25
|
+
for ts, st in timeStamps:
|
|
26
|
+
if firstUpdate >= ts:
|
|
27
|
+
newStat = st
|
|
28
|
+
# Pick up start and end times from all updates
|
|
29
|
+
for updTime in updateTimes:
|
|
30
|
+
sDict = statusDict[updTime]
|
|
31
|
+
newStat = sDict.get("Status", newStat)
|
|
32
|
+
|
|
33
|
+
if not startTime and newStat == RUNNING:
|
|
34
|
+
# Pick up the start date when the job starts running if not existing
|
|
35
|
+
startTime = updTime
|
|
36
|
+
elif not endTime and newStat in JOB_FINAL_STATES:
|
|
37
|
+
# Pick up the end time when the job is in a final status
|
|
38
|
+
endTime = updTime
|
|
39
|
+
|
|
40
|
+
return startTime, endTime
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def getNewStatus(
|
|
44
|
+
jobID: int,
|
|
45
|
+
updateTimes: list[datetime],
|
|
46
|
+
lastTime: datetime,
|
|
47
|
+
statusDict: dict[datetime, Any],
|
|
48
|
+
currentStatus,
|
|
49
|
+
force: bool,
|
|
50
|
+
log,
|
|
51
|
+
):
|
|
52
|
+
"""Get new job status from status updates
|
|
53
|
+
|
|
54
|
+
:param jobID: job ID
|
|
55
|
+
:param updateTimes: list of update times
|
|
56
|
+
:param lastTime: last update time
|
|
57
|
+
:param statusDict: dictionary mapping update times to status dictionaries
|
|
58
|
+
:param currentStatus: current job status
|
|
59
|
+
:param force: whether to force status update without state machine validation
|
|
60
|
+
:param log: logger object
|
|
61
|
+
:return: S_OK((status, minor, application)) or S_ERROR
|
|
62
|
+
"""
|
|
63
|
+
status = ""
|
|
64
|
+
minor = ""
|
|
65
|
+
application = ""
|
|
66
|
+
# Get the last status values looping on the most recent upupdateTimes in chronological order
|
|
67
|
+
for updTime in [dt for dt in updateTimes if dt >= lastTime]:
|
|
68
|
+
sDict = statusDict[updTime]
|
|
69
|
+
log.debug(f"\tTime {updTime} - Statuses {str(sDict)}")
|
|
70
|
+
status = sDict.get("Status", currentStatus)
|
|
71
|
+
# evaluate the state machine if the status is changing
|
|
72
|
+
if not force and status != currentStatus:
|
|
73
|
+
res = JobsStateMachine(currentStatus).getNextState(status)
|
|
74
|
+
if not res["OK"]:
|
|
75
|
+
return res
|
|
76
|
+
newStat = res["Value"]
|
|
77
|
+
# If the JobsStateMachine does not accept the candidate, don't update
|
|
78
|
+
if newStat != status:
|
|
79
|
+
# keeping the same status
|
|
80
|
+
log.error(
|
|
81
|
+
f"Job Status Error: {jobID} can't move from {currentStatus} to {status}: using {newStat}",
|
|
82
|
+
)
|
|
83
|
+
status = newStat
|
|
84
|
+
sDict["Status"] = newStat
|
|
85
|
+
# Change the source to indicate this is not what was requested
|
|
86
|
+
source = sDict.get("Source", "")
|
|
87
|
+
sDict["Source"] = source + "(SM)"
|
|
88
|
+
# at this stage status == newStat. Set currentStatus to this new status
|
|
89
|
+
currentStatus = newStat
|
|
90
|
+
|
|
91
|
+
minor = sDict.get("MinorStatus", minor)
|
|
92
|
+
application = sDict.get("ApplicationStatus", application)
|
|
93
|
+
return S_OK((status, minor, application))
|