DIRAC 9.0.0a66__py3-none-any.whl → 9.0.0a67__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- DIRAC/ConfigurationSystem/Client/VOMS2CSSynchronizer.py +1 -1
- DIRAC/Core/Security/IAMService.py +4 -3
- DIRAC/Core/Utilities/ClassAd/ClassAdLight.py +4 -290
- DIRAC/Core/Utilities/DErrno.py +1 -1
- DIRAC/Core/Utilities/JDL.py +1 -195
- DIRAC/Core/Utilities/List.py +1 -127
- DIRAC/Core/Utilities/ReturnValues.py +2 -2
- DIRAC/Core/Utilities/StateMachine.py +12 -178
- DIRAC/Core/Utilities/TimeUtilities.py +10 -253
- DIRAC/Core/Utilities/test/Test_JDL.py +0 -3
- DIRAC/DataManagementSystem/DB/FTS3DB.py +3 -0
- DIRAC/RequestManagementSystem/DB/test/RMSTestScenari.py +2 -0
- DIRAC/Resources/Catalog/RucioFileCatalogClient.py +1 -1
- DIRAC/Resources/Computing/test/Test_PoolComputingElement.py +2 -1
- DIRAC/Workflow/Modules/test/Test_Modules.py +5 -0
- DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_JobAgent.py +2 -0
- DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_PushJobAgent.py +1 -0
- DIRAC/WorkloadManagementSystem/Client/JobState/JobManifest.py +32 -261
- DIRAC/WorkloadManagementSystem/Client/JobStatus.py +8 -93
- DIRAC/WorkloadManagementSystem/DB/JobDBUtils.py +18 -147
- DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapper.py +4 -2
- DIRAC/WorkloadManagementSystem/JobWrapper/test/Test_JobWrapper.py +21 -5
- DIRAC/WorkloadManagementSystem/JobWrapper/test/Test_JobWrapperTemplate.py +4 -0
- DIRAC/WorkloadManagementSystem/Utilities/JobModel.py +28 -199
- DIRAC/WorkloadManagementSystem/Utilities/JobStatusUtility.py +1 -63
- DIRAC/WorkloadManagementSystem/Utilities/ParametricJob.py +7 -171
- DIRAC/WorkloadManagementSystem/Utilities/test/Test_JobModel.py +1 -5
- DIRAC/WorkloadManagementSystem/Utilities/test/Test_ParametricJob.py +45 -128
- {dirac-9.0.0a66.dist-info → dirac-9.0.0a67.dist-info}/METADATA +2 -2
- {dirac-9.0.0a66.dist-info → dirac-9.0.0a67.dist-info}/RECORD +34 -36
- DIRAC/Core/Utilities/test/Test_List.py +0 -150
- DIRAC/Core/Utilities/test/Test_Time.py +0 -88
- {dirac-9.0.0a66.dist-info → dirac-9.0.0a67.dist-info}/WHEEL +0 -0
- {dirac-9.0.0a66.dist-info → dirac-9.0.0a67.dist-info}/entry_points.txt +0 -0
- {dirac-9.0.0a66.dist-info → dirac-9.0.0a67.dist-info}/licenses/LICENSE +0 -0
- {dirac-9.0.0a66.dist-info → dirac-9.0.0a67.dist-info}/top_level.txt +0 -0
|
@@ -1,266 +1,37 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from
|
|
4
|
-
from DIRAC.Core.Utilities import List
|
|
5
|
-
from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations
|
|
6
|
-
from DIRAC.Core.Utilities.JDL import loadJDLAsCFG, dumpCFGAsJDL
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class JobManifest:
|
|
10
|
-
def __init__(self, manifest=""):
|
|
11
|
-
self.__manifest = CFG()
|
|
12
|
-
self.__dirty = False
|
|
13
|
-
self.__ops = False
|
|
14
|
-
if manifest:
|
|
15
|
-
result = self.load(manifest)
|
|
16
|
-
if not result["OK"]:
|
|
17
|
-
raise Exception(result["Message"])
|
|
18
|
-
|
|
19
|
-
def isDirty(self):
|
|
20
|
-
return self.__dirty
|
|
21
|
-
|
|
22
|
-
def setDirty(self):
|
|
23
|
-
self.__dirty = True
|
|
24
|
-
|
|
25
|
-
def clearDirty(self):
|
|
26
|
-
self.__dirty = False
|
|
27
|
-
|
|
28
|
-
def load(self, dataString):
|
|
29
|
-
"""
|
|
30
|
-
Auto discover format type based on [ .. ] of JDL
|
|
31
|
-
"""
|
|
32
|
-
dataString = dataString.strip()
|
|
33
|
-
if dataString[0] == "[" and dataString[-1] == "]":
|
|
34
|
-
return self.loadJDL(dataString)
|
|
35
|
-
else:
|
|
36
|
-
return self.loadCFG(dataString)
|
|
37
|
-
|
|
38
|
-
def loadJDL(self, jdlString):
|
|
39
|
-
"""
|
|
40
|
-
Load job manifest from JDL format
|
|
41
|
-
"""
|
|
42
|
-
result = loadJDLAsCFG(jdlString.strip())
|
|
43
|
-
if not result["OK"]:
|
|
44
|
-
self.__manifest = CFG()
|
|
45
|
-
return result
|
|
46
|
-
self.__manifest = result["Value"][0]
|
|
47
|
-
return S_OK()
|
|
48
|
-
|
|
49
|
-
def loadCFG(self, cfgString):
|
|
50
|
-
"""
|
|
51
|
-
Load job manifest from CFG format
|
|
52
|
-
"""
|
|
53
|
-
try:
|
|
54
|
-
self.__manifest.loadFromBuffer(cfgString)
|
|
55
|
-
except Exception as e:
|
|
56
|
-
return S_ERROR(f"Can't load manifest from cfg: {str(e)}")
|
|
57
|
-
return S_OK()
|
|
58
|
-
|
|
59
|
-
def dumpAsCFG(self):
|
|
60
|
-
return str(self.__manifest)
|
|
61
|
-
|
|
62
|
-
def getAsCFG(self):
|
|
63
|
-
return self.__manifest.clone()
|
|
64
|
-
|
|
65
|
-
def dumpAsJDL(self):
|
|
66
|
-
return dumpCFGAsJDL(self.__manifest)
|
|
67
|
-
|
|
68
|
-
def __getCSValue(self, varName, defaultVal=None):
|
|
69
|
-
if not self.__ops:
|
|
70
|
-
self.__ops = Operations(group=self.__manifest["OwnerGroup"])
|
|
71
|
-
if varName[0] != "/":
|
|
72
|
-
varName = f"JobDescription/{varName}"
|
|
73
|
-
return self.__ops.getValue(varName, defaultVal)
|
|
74
|
-
|
|
75
|
-
def __checkNumericalVar(self, varName, defaultVal, minVal, maxVal):
|
|
76
|
-
"""
|
|
77
|
-
Check a numerical var
|
|
78
|
-
"""
|
|
79
|
-
initialVal = False
|
|
80
|
-
if varName not in self.__manifest:
|
|
81
|
-
varValue = self.__getCSValue(f"Default{varName}", defaultVal)
|
|
82
|
-
else:
|
|
83
|
-
varValue = self.__manifest[varName]
|
|
84
|
-
initialVal = varValue
|
|
85
|
-
try:
|
|
86
|
-
varValue = int(varValue)
|
|
87
|
-
except ValueError:
|
|
88
|
-
return S_ERROR(f"{varName} must be a number")
|
|
89
|
-
minVal = self.__getCSValue(f"Min{varName}", minVal)
|
|
90
|
-
maxVal = self.__getCSValue(f"Max{varName}", maxVal)
|
|
91
|
-
varValue = max(minVal, min(varValue, maxVal))
|
|
92
|
-
if initialVal != varValue:
|
|
93
|
-
self.__manifest.setOption(varName, varValue)
|
|
94
|
-
return S_OK(varValue)
|
|
95
|
-
|
|
96
|
-
def __checkChoiceVar(self, varName, defaultVal, choices):
|
|
97
|
-
"""
|
|
98
|
-
Check a choice var
|
|
99
|
-
"""
|
|
100
|
-
initialVal = False
|
|
101
|
-
if varName not in self.__manifest:
|
|
102
|
-
varValue = self.__getCSValue(f"Default{varName}", defaultVal)
|
|
103
|
-
else:
|
|
104
|
-
varValue = self.__manifest[varName]
|
|
105
|
-
initialVal = varValue
|
|
106
|
-
if varValue not in self.__getCSValue(f"Choices{varName}", choices):
|
|
107
|
-
return S_ERROR(f"{varValue} is not a valid value for {varName}")
|
|
108
|
-
if initialVal != varValue:
|
|
109
|
-
self.__manifest.setOption(varName, varValue)
|
|
110
|
-
return S_OK(varValue)
|
|
111
|
-
|
|
112
|
-
def __checkMultiChoice(self, varName, choices):
|
|
113
|
-
"""
|
|
114
|
-
Check a multi choice var
|
|
115
|
-
"""
|
|
116
|
-
initialVal = False
|
|
117
|
-
if varName not in self.__manifest:
|
|
118
|
-
return S_OK()
|
|
119
|
-
else:
|
|
120
|
-
varValue = self.__manifest[varName]
|
|
121
|
-
initialVal = varValue
|
|
122
|
-
choices = self.__getCSValue(f"Choices{varName}", choices)
|
|
123
|
-
for v in List.fromChar(varValue):
|
|
124
|
-
if v not in choices:
|
|
125
|
-
return S_ERROR(f"{v} is not a valid value for {varName}")
|
|
126
|
-
if initialVal != varValue:
|
|
127
|
-
self.__manifest.setOption(varName, varValue)
|
|
128
|
-
return S_OK(varValue)
|
|
3
|
+
from DIRACCommon.WorkloadManagementSystem.Client.JobState.JobManifest import * # noqa: F401, F403
|
|
129
4
|
|
|
130
|
-
|
|
131
|
-
"""
|
|
132
|
-
Check Maximum Number of Input Data files allowed
|
|
133
|
-
"""
|
|
134
|
-
varName = "InputData"
|
|
135
|
-
if varName not in self.__manifest:
|
|
136
|
-
return S_OK()
|
|
137
|
-
varValue = self.__manifest[varName]
|
|
138
|
-
if len(List.fromChar(varValue)) > maxNumber:
|
|
139
|
-
return S_ERROR(
|
|
140
|
-
"Number of Input Data Files (%s) greater than current limit: %s"
|
|
141
|
-
% (len(List.fromChar(varValue)), maxNumber)
|
|
142
|
-
)
|
|
143
|
-
return S_OK()
|
|
144
|
-
|
|
145
|
-
def __contains__(self, key):
|
|
146
|
-
"""Check if the manifest has the required key"""
|
|
147
|
-
return key in self.__manifest
|
|
5
|
+
from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations
|
|
148
6
|
|
|
149
|
-
def setOptionsFromDict(self, varDict):
|
|
150
|
-
for k in sorted(varDict):
|
|
151
|
-
self.setOption(k, varDict[k])
|
|
152
7
|
|
|
8
|
+
def makeJobManifestConfig(ownerGroup: str) -> JobManifestConfig:
|
|
9
|
+
ops = Operations(group=ownerGroup)
|
|
10
|
+
|
|
11
|
+
allowedJobTypesForGroup = ops.getValue(
|
|
12
|
+
"JobDescription/ChoicesJobType",
|
|
13
|
+
ops.getValue("JobDescription/AllowedJobTypes", ["User", "Test", "Hospital"])
|
|
14
|
+
+ ops.getValue("Transformations/DataProcessing", []),
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
return {
|
|
18
|
+
"defaultForGroup": {
|
|
19
|
+
"CPUTime": ops.getValue("JobDescription/DefaultCPUTime", 86400),
|
|
20
|
+
"Priority": ops.getValue("JobDescription/DefaultPriority", 1),
|
|
21
|
+
},
|
|
22
|
+
"minForGroup": {
|
|
23
|
+
"CPUTime": ops.getValue("JobDescription/MinCPUTime", 100),
|
|
24
|
+
"Priority": ops.getValue("JobDescription/MinPriority", 0),
|
|
25
|
+
},
|
|
26
|
+
"maxForGroup": {
|
|
27
|
+
"CPUTime": ops.getValue("JobDescription/MaxCPUTime", 500000),
|
|
28
|
+
"Priority": ops.getValue("JobDescription/MaxPriority", 10),
|
|
29
|
+
},
|
|
30
|
+
"allowedJobTypesForGroup": allowedJobTypesForGroup,
|
|
31
|
+
"maxInputData": Operations().getValue("JobDescription/MaxInputData", 500),
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class JobManifest(JobManifest): # noqa: F405 pylint: disable=function-redefined
|
|
153
36
|
def check(self):
|
|
154
|
-
""
|
|
155
|
-
Check that the manifest is OK
|
|
156
|
-
"""
|
|
157
|
-
for k in ["Owner", "OwnerGroup"]:
|
|
158
|
-
if k not in self.__manifest:
|
|
159
|
-
return S_ERROR(f"Missing var {k} in manifest")
|
|
160
|
-
|
|
161
|
-
# Check CPUTime
|
|
162
|
-
result = self.__checkNumericalVar("CPUTime", 86400, 100, 500000)
|
|
163
|
-
if not result["OK"]:
|
|
164
|
-
return result
|
|
165
|
-
|
|
166
|
-
result = self.__checkNumericalVar("Priority", 1, 0, 10)
|
|
167
|
-
if not result["OK"]:
|
|
168
|
-
return result
|
|
169
|
-
|
|
170
|
-
maxInputData = Operations().getValue("JobDescription/MaxInputData", 500)
|
|
171
|
-
result = self.__checkMaxInputData(maxInputData)
|
|
172
|
-
if not result["OK"]:
|
|
173
|
-
return result
|
|
174
|
-
|
|
175
|
-
operation = Operations(group=self.__manifest["OwnerGroup"])
|
|
176
|
-
allowedJobTypes = operation.getValue("JobDescription/AllowedJobTypes", ["User", "Test", "Hospital"])
|
|
177
|
-
transformationTypes = operation.getValue("Transformations/DataProcessing", [])
|
|
178
|
-
result = self.__checkMultiChoice("JobType", allowedJobTypes + transformationTypes)
|
|
179
|
-
if not result["OK"]:
|
|
180
|
-
return result
|
|
181
|
-
return S_OK()
|
|
182
|
-
|
|
183
|
-
def createSection(self, secName, contents=False):
|
|
184
|
-
if secName not in self.__manifest:
|
|
185
|
-
if contents and not isinstance(contents, CFG):
|
|
186
|
-
return S_ERROR(f"Contents for section {secName} is not a cfg object")
|
|
187
|
-
self.__dirty = True
|
|
188
|
-
return S_OK(self.__manifest.createNewSection(secName, contents=contents))
|
|
189
|
-
return S_ERROR(f"Section {secName} already exists")
|
|
190
|
-
|
|
191
|
-
def getSection(self, secName):
|
|
192
|
-
self.__dirty = True
|
|
193
|
-
if secName not in self.__manifest:
|
|
194
|
-
return S_ERROR(f"{secName} does not exist")
|
|
195
|
-
sec = self.__manifest[secName]
|
|
196
|
-
if not sec:
|
|
197
|
-
return S_ERROR(f"{secName} section empty")
|
|
198
|
-
return S_OK(sec)
|
|
199
|
-
|
|
200
|
-
def setSectionContents(self, secName, contents):
|
|
201
|
-
if contents and not isinstance(contents, CFG):
|
|
202
|
-
return S_ERROR(f"Contents for section {secName} is not a cfg object")
|
|
203
|
-
self.__dirty = True
|
|
204
|
-
if secName in self.__manifest:
|
|
205
|
-
self.__manifest[secName].reset()
|
|
206
|
-
self.__manifest[secName].mergeWith(contents)
|
|
207
|
-
else:
|
|
208
|
-
self.__manifest.createNewSection(secName, contents=contents)
|
|
209
|
-
|
|
210
|
-
def setOption(self, varName, varValue):
|
|
211
|
-
"""
|
|
212
|
-
Set a var in job manifest
|
|
213
|
-
"""
|
|
214
|
-
self.__dirty = True
|
|
215
|
-
levels = List.fromChar(varName, "/")
|
|
216
|
-
cfg = self.__manifest
|
|
217
|
-
for l in levels[:-1]:
|
|
218
|
-
if l not in cfg:
|
|
219
|
-
cfg.createNewSection(l)
|
|
220
|
-
cfg = cfg[l]
|
|
221
|
-
cfg.setOption(levels[-1], varValue)
|
|
222
|
-
|
|
223
|
-
def remove(self, opName):
|
|
224
|
-
levels = List.fromChar(opName, "/")
|
|
225
|
-
cfg = self.__manifest
|
|
226
|
-
for l in levels[:-1]:
|
|
227
|
-
if l not in cfg:
|
|
228
|
-
return S_ERROR(f"{opName} does not exist")
|
|
229
|
-
cfg = cfg[l]
|
|
230
|
-
if cfg.deleteKey(levels[-1]):
|
|
231
|
-
self.__dirty = True
|
|
232
|
-
return S_OK()
|
|
233
|
-
return S_ERROR(f"{opName} does not exist")
|
|
234
|
-
|
|
235
|
-
def getOption(self, varName, defaultValue=None):
|
|
236
|
-
"""
|
|
237
|
-
Get a variable from the job manifest
|
|
238
|
-
"""
|
|
239
|
-
cfg = self.__manifest
|
|
240
|
-
return cfg.getOption(varName, defaultValue)
|
|
241
|
-
|
|
242
|
-
def getOptionList(self, section=""):
|
|
243
|
-
"""
|
|
244
|
-
Get a list of variables in a section of the job manifest
|
|
245
|
-
"""
|
|
246
|
-
cfg = self.__manifest.getRecursive(section)
|
|
247
|
-
if not cfg or "value" not in cfg:
|
|
248
|
-
return []
|
|
249
|
-
cfg = cfg["value"]
|
|
250
|
-
return cfg.listOptions()
|
|
251
|
-
|
|
252
|
-
def isOption(self, opName):
|
|
253
|
-
"""
|
|
254
|
-
Check if it is a valid option
|
|
255
|
-
"""
|
|
256
|
-
return self.__manifest.isOption(opName)
|
|
257
|
-
|
|
258
|
-
def getSectionList(self, section=""):
|
|
259
|
-
"""
|
|
260
|
-
Get a list of sections in the job manifest
|
|
261
|
-
"""
|
|
262
|
-
cfg = self.__manifest.getRecursive(section)
|
|
263
|
-
if not cfg or "value" not in cfg:
|
|
264
|
-
return []
|
|
265
|
-
cfg = cfg["value"]
|
|
266
|
-
return cfg.listSections()
|
|
37
|
+
return super().check(config=makeJobManifestConfig(self.__manifest["OwnerGroup"]))
|
|
@@ -1,95 +1,10 @@
|
|
|
1
|
-
"""
|
|
2
|
-
This module contains constants and lists for the possible job states.
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
from DIRAC.Core.Utilities.StateMachine import State, StateMachine
|
|
6
|
-
|
|
7
|
-
#:
|
|
8
|
-
SUBMITTING = "Submitting"
|
|
9
|
-
#:
|
|
10
|
-
RECEIVED = "Received"
|
|
11
|
-
#:
|
|
12
|
-
CHECKING = "Checking"
|
|
13
|
-
#:
|
|
14
|
-
STAGING = "Staging"
|
|
15
|
-
#:
|
|
16
|
-
SCOUTING = "Scouting"
|
|
17
|
-
#:
|
|
18
|
-
WAITING = "Waiting"
|
|
19
|
-
#:
|
|
20
|
-
MATCHED = "Matched"
|
|
21
|
-
#: The Rescheduled status is effectively never stored in the DB.
|
|
22
|
-
#: It could be considered a "virtual" status, and might even be dropped.
|
|
23
|
-
RESCHEDULED = "Rescheduled"
|
|
24
|
-
#:
|
|
25
|
-
RUNNING = "Running"
|
|
26
|
-
#:
|
|
27
|
-
STALLED = "Stalled"
|
|
28
|
-
#:
|
|
29
|
-
COMPLETING = "Completing"
|
|
30
|
-
#:
|
|
31
|
-
DONE = "Done"
|
|
32
|
-
#:
|
|
33
|
-
COMPLETED = "Completed"
|
|
34
|
-
#:
|
|
35
|
-
FAILED = "Failed"
|
|
36
|
-
#:
|
|
37
|
-
DELETED = "Deleted"
|
|
38
|
-
#:
|
|
39
|
-
KILLED = "Killed"
|
|
40
|
-
|
|
41
|
-
#: Possible job states
|
|
42
|
-
JOB_STATES = [
|
|
43
|
-
SUBMITTING,
|
|
44
|
-
RECEIVED,
|
|
45
|
-
CHECKING,
|
|
46
|
-
SCOUTING,
|
|
47
|
-
STAGING,
|
|
48
|
-
WAITING,
|
|
49
|
-
MATCHED,
|
|
50
|
-
RESCHEDULED,
|
|
51
|
-
RUNNING,
|
|
52
|
-
STALLED,
|
|
53
|
-
COMPLETING,
|
|
54
|
-
DONE,
|
|
55
|
-
COMPLETED,
|
|
56
|
-
FAILED,
|
|
57
|
-
DELETED,
|
|
58
|
-
KILLED,
|
|
59
|
-
]
|
|
1
|
+
"""Backward compatibility wrapper - moved to DIRACCommon
|
|
60
2
|
|
|
61
|
-
|
|
62
|
-
|
|
3
|
+
This module has been moved to DIRACCommon.WorkloadManagementSystem.Client.JobStatus to avoid
|
|
4
|
+
circular dependencies and allow DiracX to use these utilities without
|
|
5
|
+
triggering DIRAC's global state initialization.
|
|
63
6
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
class JobsStateMachine(StateMachine):
|
|
69
|
-
"""Jobs state machine"""
|
|
70
|
-
|
|
71
|
-
def __init__(self, state):
|
|
72
|
-
"""c'tor
|
|
73
|
-
Defines the state machine transactions
|
|
74
|
-
"""
|
|
75
|
-
super().__init__(state)
|
|
76
|
-
|
|
77
|
-
# States transitions
|
|
78
|
-
self.states = {
|
|
79
|
-
DELETED: State(15), # final state
|
|
80
|
-
KILLED: State(14, [DELETED], defState=KILLED),
|
|
81
|
-
FAILED: State(13, [RESCHEDULED, DELETED], defState=FAILED),
|
|
82
|
-
DONE: State(12, [DELETED], defState=DONE),
|
|
83
|
-
COMPLETED: State(11, [DONE, FAILED], defState=COMPLETED),
|
|
84
|
-
COMPLETING: State(10, [DONE, FAILED, COMPLETED, STALLED, KILLED], defState=COMPLETING),
|
|
85
|
-
STALLED: State(9, [RUNNING, FAILED, KILLED], defState=STALLED),
|
|
86
|
-
RUNNING: State(8, [STALLED, DONE, FAILED, RESCHEDULED, COMPLETING, KILLED, RECEIVED], defState=RUNNING),
|
|
87
|
-
RESCHEDULED: State(7, [WAITING, RECEIVED, DELETED, FAILED, KILLED], defState=RESCHEDULED),
|
|
88
|
-
MATCHED: State(6, [RUNNING, FAILED, RESCHEDULED, KILLED], defState=MATCHED),
|
|
89
|
-
WAITING: State(5, [MATCHED, RESCHEDULED, DELETED, KILLED], defState=WAITING),
|
|
90
|
-
STAGING: State(4, [CHECKING, WAITING, FAILED, KILLED], defState=STAGING),
|
|
91
|
-
SCOUTING: State(3, [CHECKING, FAILED, STALLED, KILLED], defState=SCOUTING),
|
|
92
|
-
CHECKING: State(2, [SCOUTING, STAGING, WAITING, RESCHEDULED, FAILED, DELETED, KILLED], defState=CHECKING),
|
|
93
|
-
RECEIVED: State(1, [SCOUTING, CHECKING, STAGING, WAITING, FAILED, DELETED, KILLED], defState=RECEIVED),
|
|
94
|
-
SUBMITTING: State(0, [RECEIVED, CHECKING, DELETED, KILLED], defState=SUBMITTING), # initial state
|
|
95
|
-
}
|
|
7
|
+
All exports are maintained for backward compatibility.
|
|
8
|
+
"""
|
|
9
|
+
# Re-export everything from DIRACCommon for backward compatibility
|
|
10
|
+
from DIRACCommon.WorkloadManagementSystem.Client.JobStatus import * # noqa: F401, F403
|
|
@@ -1,162 +1,33 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
import
|
|
3
|
+
# Import stateless functions from DIRACCommon for backward compatibility
|
|
4
|
+
from DIRACCommon.WorkloadManagementSystem.DB.JobDBUtils import *
|
|
5
5
|
|
|
6
6
|
from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations
|
|
7
|
-
from DIRAC.Core.Utilities.DErrno import EWMSSUBM
|
|
8
7
|
from DIRAC.Core.Utilities.ObjectLoader import ObjectLoader
|
|
9
|
-
from DIRAC.Core.Utilities.ReturnValues import
|
|
10
|
-
from DIRAC.WorkloadManagementSystem.Client import
|
|
11
|
-
from DIRAC.WorkloadManagementSystem.Client.JobState.JobManifest import JobManifest
|
|
8
|
+
from DIRAC.Core.Utilities.ReturnValues import returnValueOrRaise
|
|
9
|
+
from DIRAC.WorkloadManagementSystem.Client.JobState.JobManifest import makeJobManifestConfig
|
|
12
10
|
|
|
13
11
|
getDIRACPlatform = returnValueOrRaise(
|
|
14
12
|
ObjectLoader().loadObject("ConfigurationSystem.Client.Helpers.Resources", "getDIRACPlatform")
|
|
15
13
|
)
|
|
16
14
|
|
|
17
15
|
|
|
18
|
-
def
|
|
19
|
-
|
|
20
|
-
|
|
16
|
+
def checkAndPrepareJob(
|
|
17
|
+
jobID, classAdJob, classAdReq, owner, ownerGroup, jobAttrs, vo
|
|
18
|
+
): # pylint: disable=function-redefined
|
|
19
|
+
from DIRACCommon.WorkloadManagementSystem.DB.JobDBUtils import checkAndPrepareJob
|
|
21
20
|
|
|
21
|
+
config = {
|
|
22
|
+
"inputDataPolicyForVO": Operations(vo=vo).getValue("InputDataPolicy/InputDataModule"),
|
|
23
|
+
"softwareDistModuleForVO": Operations(vo=vo).getValue("SoftwareDistModule"),
|
|
24
|
+
"defaultCPUTimeForOwnerGroup": Operations(group=ownerGroup).getValue("JobDescription/DefaultCPUTime", 86400),
|
|
25
|
+
"getDIRACPlatform": getDIRACPlatform,
|
|
26
|
+
}
|
|
27
|
+
return checkAndPrepareJob(jobID, classAdJob, classAdReq, owner, ownerGroup, jobAttrs, vo, config=config)
|
|
22
28
|
|
|
23
|
-
def extractJDL(compressedJDL):
|
|
24
|
-
"""Return decompressed JDL string."""
|
|
25
|
-
# the starting bracket is guaranteeed by JobManager.submitJob
|
|
26
|
-
# we need the check to be backward compatible
|
|
27
|
-
if isinstance(compressedJDL, bytes):
|
|
28
|
-
if compressedJDL.startswith(b"["):
|
|
29
|
-
return compressedJDL.decode()
|
|
30
|
-
else:
|
|
31
|
-
if compressedJDL.startswith("["):
|
|
32
|
-
return compressedJDL
|
|
33
|
-
return zlib.decompress(base64.b64decode(compressedJDL)).decode()
|
|
34
29
|
|
|
30
|
+
def checkAndAddOwner(jdl: str, owner: str, ownerGroup: str): # pylint: disable=function-redefined
|
|
31
|
+
from DIRACCommon.WorkloadManagementSystem.DB.JobDBUtils import checkAndAddOwner
|
|
35
32
|
|
|
36
|
-
|
|
37
|
-
jobManifest = JobManifest()
|
|
38
|
-
res = jobManifest.load(jdl)
|
|
39
|
-
if not res["OK"]:
|
|
40
|
-
return res
|
|
41
|
-
|
|
42
|
-
jobManifest.setOptionsFromDict({"Owner": owner, "OwnerGroup": ownerGroup})
|
|
43
|
-
res = jobManifest.check()
|
|
44
|
-
if not res["OK"]:
|
|
45
|
-
return res
|
|
46
|
-
|
|
47
|
-
return S_OK(jobManifest)
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
def fixJDL(jdl: str) -> str:
|
|
51
|
-
# 1.- insert original JDL on DB and get new JobID
|
|
52
|
-
# Fix the possible lack of the brackets in the JDL
|
|
53
|
-
if jdl.strip()[0].find("[") != 0:
|
|
54
|
-
jdl = "[" + jdl + "]"
|
|
55
|
-
return jdl
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
def checkAndPrepareJob(jobID, classAdJob, classAdReq, owner, ownerGroup, jobAttrs, vo):
|
|
59
|
-
error = ""
|
|
60
|
-
|
|
61
|
-
jdlOwner = classAdJob.getAttributeString("Owner")
|
|
62
|
-
jdlOwnerGroup = classAdJob.getAttributeString("OwnerGroup")
|
|
63
|
-
jdlVO = classAdJob.getAttributeString("VirtualOrganization")
|
|
64
|
-
|
|
65
|
-
# The below is commented out since this is always overwritten by the submitter IDs
|
|
66
|
-
# but the check allows to findout inconsistent client environments
|
|
67
|
-
if jdlOwner and jdlOwner != owner:
|
|
68
|
-
error = "Wrong Owner in JDL"
|
|
69
|
-
elif jdlOwnerGroup and jdlOwnerGroup != ownerGroup:
|
|
70
|
-
error = "Wrong Owner Group in JDL"
|
|
71
|
-
elif jdlVO and jdlVO != vo:
|
|
72
|
-
error = "Wrong Virtual Organization in JDL"
|
|
73
|
-
|
|
74
|
-
classAdJob.insertAttributeString("Owner", owner)
|
|
75
|
-
classAdJob.insertAttributeString("OwnerGroup", ownerGroup)
|
|
76
|
-
|
|
77
|
-
if vo:
|
|
78
|
-
classAdJob.insertAttributeString("VirtualOrganization", vo)
|
|
79
|
-
|
|
80
|
-
classAdReq.insertAttributeString("Owner", owner)
|
|
81
|
-
classAdReq.insertAttributeString("OwnerGroup", ownerGroup)
|
|
82
|
-
if vo:
|
|
83
|
-
classAdReq.insertAttributeString("VirtualOrganization", vo)
|
|
84
|
-
|
|
85
|
-
inputDataPolicy = Operations(vo=vo).getValue("InputDataPolicy/InputDataModule")
|
|
86
|
-
if inputDataPolicy and not classAdJob.lookupAttribute("InputDataModule"):
|
|
87
|
-
classAdJob.insertAttributeString("InputDataModule", inputDataPolicy)
|
|
88
|
-
|
|
89
|
-
softwareDistModule = Operations(vo=vo).getValue("SoftwareDistModule")
|
|
90
|
-
if softwareDistModule and not classAdJob.lookupAttribute("SoftwareDistModule"):
|
|
91
|
-
classAdJob.insertAttributeString("SoftwareDistModule", softwareDistModule)
|
|
92
|
-
|
|
93
|
-
# priority
|
|
94
|
-
priority = classAdJob.getAttributeInt("Priority")
|
|
95
|
-
if priority is None:
|
|
96
|
-
priority = 0
|
|
97
|
-
classAdReq.insertAttributeInt("UserPriority", priority)
|
|
98
|
-
|
|
99
|
-
# CPU time
|
|
100
|
-
cpuTime = classAdJob.getAttributeInt("CPUTime")
|
|
101
|
-
if cpuTime is None:
|
|
102
|
-
opsHelper = Operations(group=ownerGroup)
|
|
103
|
-
cpuTime = opsHelper.getValue("JobDescription/DefaultCPUTime", 86400)
|
|
104
|
-
classAdReq.insertAttributeInt("CPUTime", cpuTime)
|
|
105
|
-
|
|
106
|
-
# platform(s)
|
|
107
|
-
platformList = classAdJob.getListFromExpression("Platform")
|
|
108
|
-
if platformList:
|
|
109
|
-
result = getDIRACPlatform(platformList)
|
|
110
|
-
if not result["OK"]:
|
|
111
|
-
return result
|
|
112
|
-
if result["Value"]:
|
|
113
|
-
classAdReq.insertAttributeVectorString("Platforms", result["Value"])
|
|
114
|
-
else:
|
|
115
|
-
error = "OS compatibility info not found"
|
|
116
|
-
if error:
|
|
117
|
-
retVal = S_ERROR(EWMSSUBM, error)
|
|
118
|
-
retVal["JobId"] = jobID
|
|
119
|
-
retVal["Status"] = JobStatus.FAILED
|
|
120
|
-
retVal["MinorStatus"] = error
|
|
121
|
-
|
|
122
|
-
jobAttrs["Status"] = JobStatus.FAILED
|
|
123
|
-
|
|
124
|
-
jobAttrs["MinorStatus"] = error
|
|
125
|
-
return retVal
|
|
126
|
-
return S_OK()
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
def createJDLWithInitialStatus(
|
|
130
|
-
classAdJob, classAdReq, jdl2DBParameters, jobAttrs, initialStatus, initialMinorStatus, *, modern=False
|
|
131
|
-
):
|
|
132
|
-
"""
|
|
133
|
-
:param modern: if True, store boolean instead of string for VerifiedFlag (used by diracx only)
|
|
134
|
-
"""
|
|
135
|
-
priority = classAdJob.getAttributeInt("Priority")
|
|
136
|
-
if priority is None:
|
|
137
|
-
priority = 0
|
|
138
|
-
jobAttrs["UserPriority"] = priority
|
|
139
|
-
|
|
140
|
-
for jdlName in jdl2DBParameters:
|
|
141
|
-
# Defaults are set by the DB.
|
|
142
|
-
jdlValue = classAdJob.getAttributeString(jdlName)
|
|
143
|
-
if jdlValue:
|
|
144
|
-
jobAttrs[jdlName] = jdlValue
|
|
145
|
-
|
|
146
|
-
jdlValue = classAdJob.getAttributeString("Site")
|
|
147
|
-
if jdlValue:
|
|
148
|
-
if jdlValue.find(",") != -1:
|
|
149
|
-
jobAttrs["Site"] = "Multiple"
|
|
150
|
-
else:
|
|
151
|
-
jobAttrs["Site"] = jdlValue
|
|
152
|
-
|
|
153
|
-
jobAttrs["VerifiedFlag"] = True if modern else "True"
|
|
154
|
-
|
|
155
|
-
jobAttrs["Status"] = initialStatus
|
|
156
|
-
|
|
157
|
-
jobAttrs["MinorStatus"] = initialMinorStatus
|
|
158
|
-
|
|
159
|
-
reqJDL = classAdReq.asJDL()
|
|
160
|
-
classAdJob.insertAttributeInt("JobRequirements", reqJDL)
|
|
161
|
-
|
|
162
|
-
return classAdJob.asJDL()
|
|
33
|
+
return checkAndAddOwner(jdl, owner, ownerGroup, job_manifest_config=makeJobManifestConfig(ownerGroup))
|
|
@@ -55,6 +55,8 @@ from DIRAC.WorkloadManagementSystem.Client.JobStateUpdateClient import JobStateU
|
|
|
55
55
|
from DIRAC.WorkloadManagementSystem.Client.SandboxStoreClient import SandboxStoreClient
|
|
56
56
|
from DIRAC.WorkloadManagementSystem.JobWrapper.Watchdog import Watchdog
|
|
57
57
|
|
|
58
|
+
CHILD_PID_POLL_INTERVALS = list(range(5, 40, 5))
|
|
59
|
+
|
|
58
60
|
|
|
59
61
|
class JobWrapper:
|
|
60
62
|
"""The only user of the JobWrapper is the JobWrapperTemplate"""
|
|
@@ -430,14 +432,14 @@ class JobWrapper:
|
|
|
430
432
|
)
|
|
431
433
|
exeThread.start()
|
|
432
434
|
payloadPID = None
|
|
433
|
-
for seconds in
|
|
435
|
+
for seconds in CHILD_PID_POLL_INTERVALS:
|
|
434
436
|
time.sleep(seconds)
|
|
435
437
|
payloadPID = spObject.getChildPID()
|
|
436
438
|
if payloadPID:
|
|
437
439
|
self.__setJobParam("PayloadPID", payloadPID)
|
|
438
440
|
break
|
|
439
441
|
if not payloadPID:
|
|
440
|
-
return S_ERROR("Payload process could not start after
|
|
442
|
+
return S_ERROR(f"Payload process could not start after {sum(CHILD_PID_POLL_INTERVALS)} seconds")
|
|
441
443
|
|
|
442
444
|
watchdog = Watchdog(
|
|
443
445
|
pid=self.currentPID,
|
|
@@ -344,24 +344,40 @@ def test_processQuickExecutionNoWatchdog(mocker):
|
|
|
344
344
|
|
|
345
345
|
|
|
346
346
|
@pytest.mark.slow
|
|
347
|
-
|
|
348
|
-
|
|
347
|
+
@pytest.mark.parametrize("expect_failure", [True, False])
|
|
348
|
+
def test_processSubprocessFailureNoPid(mocker, monkeypatch, expect_failure):
|
|
349
|
+
"""Test the process method of the JobWrapper class: the subprocess fails and no PID is returned.
|
|
350
|
+
|
|
351
|
+
expect_failure is used to ensure that the JobWrapper is functioning correctly even with the other patching
|
|
352
|
+
that is applied in the test (e.g. CHILD_PID_POLL_INTERVALS).
|
|
353
|
+
"""
|
|
349
354
|
# Test failure in starting the payload process
|
|
350
355
|
jw = JobWrapper()
|
|
351
356
|
jw.jobArgs = {}
|
|
352
357
|
|
|
353
358
|
mocker.patch.object(jw, "_JobWrapper__report")
|
|
354
359
|
mocker.patch.object(jw, "_JobWrapper__setJobParam")
|
|
360
|
+
monkeypatch.setattr(
|
|
361
|
+
"DIRAC.WorkloadManagementSystem.JobWrapper.JobWrapper.CHILD_PID_POLL_INTERVALS", [0.1, 0.2, 0.3, 0.4, 0.5]
|
|
362
|
+
)
|
|
363
|
+
|
|
355
364
|
mock_exeThread = mocker.Mock()
|
|
356
365
|
mock_exeThread.start.side_effect = lambda: time.sleep(0.1)
|
|
357
|
-
|
|
366
|
+
if expect_failure:
|
|
367
|
+
mocker.patch(
|
|
368
|
+
"DIRAC.WorkloadManagementSystem.JobWrapper.JobWrapper.ExecutionThread", return_value=mock_exeThread
|
|
369
|
+
)
|
|
358
370
|
|
|
359
371
|
with tempfile.NamedTemporaryFile(delete=True) as std_out, tempfile.NamedTemporaryFile(delete=True) as std_err:
|
|
360
372
|
jw.outputFile = std_out.name
|
|
361
373
|
jw.errorFile = std_err.name
|
|
362
374
|
result = jw.process(command="mock_command", env={})
|
|
363
|
-
|
|
364
|
-
|
|
375
|
+
|
|
376
|
+
if expect_failure:
|
|
377
|
+
assert not result["OK"]
|
|
378
|
+
assert "Payload process could not start after 1.5 seconds" in result["Message"]
|
|
379
|
+
else:
|
|
380
|
+
assert result["OK"]
|
|
365
381
|
|
|
366
382
|
|
|
367
383
|
# -------------------------------------------------------------------------------------------------
|
|
@@ -72,6 +72,7 @@ def extraOptions():
|
|
|
72
72
|
os.remove(extraOptions)
|
|
73
73
|
|
|
74
74
|
|
|
75
|
+
@pytest.mark.slow
|
|
75
76
|
def test_createAndExecuteJobWrapperTemplate_success(extraOptions):
|
|
76
77
|
"""Test the creation of a classical job wrapper and its execution:
|
|
77
78
|
There is an extra option cfg file to be passed to the job wrapper.
|
|
@@ -144,6 +145,7 @@ def test_createAndExecuteJobWrapperTemplate_success(extraOptions):
|
|
|
144
145
|
shutil.rmtree(os.path.join(os.getcwd(), "job"))
|
|
145
146
|
|
|
146
147
|
|
|
148
|
+
@pytest.mark.slow
|
|
147
149
|
def test_createAndExecuteJobWrapperTemplate_missingExtraOptions():
|
|
148
150
|
"""Test the creation of a classical job wrapper and its execution:
|
|
149
151
|
There is no extra options to be passed to the job wrapper.
|
|
@@ -205,6 +207,7 @@ def test_createAndExecuteJobWrapperTemplate_missingExtraOptions():
|
|
|
205
207
|
shutil.rmtree(os.path.join(os.getcwd(), "job"))
|
|
206
208
|
|
|
207
209
|
|
|
210
|
+
@pytest.mark.slow
|
|
208
211
|
def test_createAndExecuteRelocatedJobWrapperTemplate_success(extraOptions):
|
|
209
212
|
"""Test the creation of a relocated job wrapper and its execution:
|
|
210
213
|
This is generally used when containers are involved (SingularityCE).
|
|
@@ -325,6 +328,7 @@ def test_createAndExecuteRelocatedJobWrapperTemplate_success(extraOptions):
|
|
|
325
328
|
shutil.rmtree(wrapperPath)
|
|
326
329
|
|
|
327
330
|
|
|
331
|
+
@pytest.mark.slow
|
|
328
332
|
def test_createAndExecuteJobWrapperOfflineTemplate_success(extraOptions):
|
|
329
333
|
"""Test the creation of an offline job wrapper and its execution:
|
|
330
334
|
This is generally used when pre/post processing operations are executed locally,
|