DIRAC 9.0.13__py3-none-any.whl → 9.0.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- DIRAC/ConfigurationSystem/Client/CSAPI.py +11 -0
- DIRAC/Core/Utilities/CGroups2.py +1 -0
- DIRAC/Core/Utilities/ElasticSearchDB.py +1 -1
- DIRAC/Core/Utilities/MySQL.py +51 -25
- DIRAC/DataManagementSystem/Client/DataManager.py +7 -10
- DIRAC/DataManagementSystem/Client/FTS3Job.py +12 -3
- DIRAC/FrameworkSystem/Service/SystemAdministratorHandler.py +41 -11
- DIRAC/Interfaces/API/Dirac.py +12 -4
- DIRAC/Interfaces/API/Job.py +62 -17
- DIRAC/RequestManagementSystem/private/RequestTask.py +2 -1
- DIRAC/Resources/Catalog/FileCatalogClient.py +18 -7
- DIRAC/Resources/Catalog/Utilities.py +3 -3
- DIRAC/Resources/Computing/BatchSystems/SLURM.py +1 -1
- DIRAC/Resources/Computing/BatchSystems/TimeLeft/TimeLeft.py +3 -1
- DIRAC/Resources/Computing/ComputingElement.py +39 -34
- DIRAC/Resources/Computing/InProcessComputingElement.py +20 -7
- DIRAC/Resources/Computing/PoolComputingElement.py +76 -37
- DIRAC/Resources/Computing/SingularityComputingElement.py +19 -9
- DIRAC/Resources/Computing/test/Test_InProcessComputingElement.py +69 -8
- DIRAC/Resources/Computing/test/Test_PoolComputingElement.py +102 -35
- DIRAC/Resources/Storage/GFAL2_StorageBase.py +9 -0
- DIRAC/TransformationSystem/Agent/TransformationAgent.py +12 -13
- DIRAC/WorkloadManagementSystem/Agent/JobCleaningAgent.py +1 -1
- DIRAC/WorkloadManagementSystem/Agent/PilotSyncAgent.py +4 -3
- DIRAC/WorkloadManagementSystem/Agent/StalledJobAgent.py +1 -1
- DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_JobAgent.py +4 -3
- DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_PilotLoggingAgent.py +3 -3
- DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_PilotStatusAgent.py +4 -2
- DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_PushJobAgent.py +5 -4
- DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_StalledJobAgent.py +4 -2
- DIRAC/WorkloadManagementSystem/Client/JobReport.py +10 -6
- DIRAC/WorkloadManagementSystem/Client/JobState/JobState.py +12 -3
- DIRAC/WorkloadManagementSystem/Client/Matcher.py +18 -24
- DIRAC/WorkloadManagementSystem/DB/TaskQueueDB.py +137 -7
- DIRAC/WorkloadManagementSystem/Executor/JobScheduling.py +8 -14
- DIRAC/WorkloadManagementSystem/Executor/test/Test_Executor.py +3 -5
- DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapper.py +4 -5
- DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapperOfflineTemplate.py +1 -1
- DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapperTemplate.py +1 -2
- DIRAC/WorkloadManagementSystem/JobWrapper/test/Test_JobWrapper.py +1 -1
- DIRAC/WorkloadManagementSystem/Utilities/JobParameters.py +81 -2
- DIRAC/WorkloadManagementSystem/Utilities/QueueUtilities.py +5 -5
- DIRAC/WorkloadManagementSystem/Utilities/RemoteRunner.py +2 -1
- DIRAC/WorkloadManagementSystem/Utilities/test/Test_RemoteRunner.py +7 -3
- DIRAC/WorkloadManagementSystem/scripts/dirac_wms_get_wn_parameters.py +3 -3
- DIRAC/__init__.py +1 -1
- DIRAC/tests/Utilities/testJobDefinitions.py +57 -20
- {dirac-9.0.13.dist-info → dirac-9.0.15.dist-info}/METADATA +2 -2
- {dirac-9.0.13.dist-info → dirac-9.0.15.dist-info}/RECORD +53 -53
- {dirac-9.0.13.dist-info → dirac-9.0.15.dist-info}/WHEEL +0 -0
- {dirac-9.0.13.dist-info → dirac-9.0.15.dist-info}/entry_points.txt +0 -0
- {dirac-9.0.13.dist-info → dirac-9.0.15.dist-info}/licenses/LICENSE +0 -0
- {dirac-9.0.13.dist-info → dirac-9.0.15.dist-info}/top_level.txt +0 -0
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
"""
|
|
3
3
|
tests for PoolComputingElement module
|
|
4
4
|
"""
|
|
5
|
+
|
|
5
6
|
import os
|
|
6
7
|
import time
|
|
7
8
|
|
|
@@ -50,7 +51,7 @@ def _stopJob(nJob):
|
|
|
50
51
|
|
|
51
52
|
@pytest.fixture
|
|
52
53
|
def createAndDelete():
|
|
53
|
-
for i in range(
|
|
54
|
+
for i in range(9):
|
|
54
55
|
with open(f"testPoolCEJob_{i}.py", "w") as execFile:
|
|
55
56
|
execFile.write(jobScript % i)
|
|
56
57
|
os.chmod(f"testPoolCEJob_{i}.py", 0o755)
|
|
@@ -66,24 +67,28 @@ def createAndDelete():
|
|
|
66
67
|
time.sleep(0.5)
|
|
67
68
|
|
|
68
69
|
# stopping the jobs
|
|
69
|
-
for i in range(
|
|
70
|
+
for i in range(9):
|
|
70
71
|
_stopJob(i)
|
|
71
72
|
|
|
72
73
|
# removing testPoolCEJob files
|
|
73
74
|
# this will also stop the futures unless they are already stopped!
|
|
74
|
-
for i in range(
|
|
75
|
+
for i in range(9):
|
|
75
76
|
try:
|
|
76
77
|
os.remove(f"testPoolCEJob_{i}.py")
|
|
77
|
-
os.remove("testBadPoolCEJob.py")
|
|
78
78
|
except OSError:
|
|
79
79
|
pass
|
|
80
80
|
|
|
81
|
+
try:
|
|
82
|
+
os.remove("testBadPoolCEJob.py")
|
|
83
|
+
except OSError:
|
|
84
|
+
pass
|
|
85
|
+
|
|
81
86
|
|
|
82
87
|
@pytest.mark.slow
|
|
83
88
|
def test_submit_and_shutdown(createAndDelete):
|
|
84
89
|
time.sleep(0.5)
|
|
85
90
|
|
|
86
|
-
ceParameters = {"WholeNode": True, "NumberOfProcessors": 4}
|
|
91
|
+
ceParameters = {"WholeNode": True, "NumberOfProcessors": 4, "MaxRAM": 3800}
|
|
87
92
|
ce = PoolComputingElement("TestPoolCE")
|
|
88
93
|
ce.setParameters(ceParameters)
|
|
89
94
|
|
|
@@ -145,7 +150,7 @@ def test_executeJob_wholeNode4(createAndDelete):
|
|
|
145
150
|
time.sleep(0.5)
|
|
146
151
|
taskIDs = {}
|
|
147
152
|
|
|
148
|
-
ceParameters = {"WholeNode": True, "NumberOfProcessors": 4}
|
|
153
|
+
ceParameters = {"WholeNode": True, "NumberOfProcessors": 4, "MaxRAM": 16000}
|
|
149
154
|
ce = PoolComputingElement("TestPoolCE")
|
|
150
155
|
ce.setParameters(ceParameters)
|
|
151
156
|
|
|
@@ -159,9 +164,11 @@ def test_executeJob_wholeNode4(createAndDelete):
|
|
|
159
164
|
result = ce.getCEStatus()
|
|
160
165
|
assert result["UsedProcessors"] == 1
|
|
161
166
|
assert result["AvailableProcessors"] == 3
|
|
167
|
+
assert result["UsedRAM"] == 0
|
|
168
|
+
assert result["AvailableRAM"] == 16000
|
|
162
169
|
assert result["RunningJobs"] == 1
|
|
163
170
|
|
|
164
|
-
jobParams = {"mpTag": True, "numberOfProcessors": 2}
|
|
171
|
+
jobParams = {"mpTag": True, "numberOfProcessors": 2, "MaxRAM": 4000}
|
|
165
172
|
result = ce.submitJob("testPoolCEJob_1.py", None, **jobParams)
|
|
166
173
|
assert result["OK"] is True
|
|
167
174
|
taskID = result["Value"]
|
|
@@ -171,6 +178,9 @@ def test_executeJob_wholeNode4(createAndDelete):
|
|
|
171
178
|
result = ce.getCEStatus()
|
|
172
179
|
assert result["UsedProcessors"] == 3
|
|
173
180
|
assert result["AvailableProcessors"] == 1
|
|
181
|
+
assert result["UsedRAM"] == 4000
|
|
182
|
+
assert result["AvailableRAM"] == 12000
|
|
183
|
+
|
|
174
184
|
assert result["RunningJobs"] == 2
|
|
175
185
|
|
|
176
186
|
# now trying again would fail
|
|
@@ -190,13 +200,20 @@ def test_executeJob_wholeNode4(createAndDelete):
|
|
|
190
200
|
|
|
191
201
|
|
|
192
202
|
@pytest.mark.slow
|
|
193
|
-
|
|
203
|
+
@pytest.mark.parametrize(
|
|
204
|
+
"ce_parameters",
|
|
205
|
+
[
|
|
206
|
+
({"NumberOfProcessors": 8}),
|
|
207
|
+
({"NumberOfProcessors": 8, "MaxRAM": 32000}),
|
|
208
|
+
({"WholeNode": True, "NumberOfProcessors": 8, "MaxRAM": 32000}),
|
|
209
|
+
],
|
|
210
|
+
)
|
|
211
|
+
def test_executeJob_wholeNode8(createAndDelete, ce_parameters):
|
|
194
212
|
time.sleep(0.5)
|
|
195
213
|
taskIDs = {}
|
|
196
214
|
|
|
197
|
-
ceParameters = {"WholeNode": True, "NumberOfProcessors": 8}
|
|
198
215
|
ce = PoolComputingElement("TestPoolCE")
|
|
199
|
-
ce.setParameters(
|
|
216
|
+
ce.setParameters(ce_parameters)
|
|
200
217
|
|
|
201
218
|
jobParams = {"mpTag": True, "numberOfProcessors": 2, "maxNumberOfProcessors": 2}
|
|
202
219
|
result = ce.submitJob("testPoolCEJob_2.py", None, **jobParams)
|
|
@@ -207,6 +224,8 @@ def test_executeJob_wholeNode8(createAndDelete):
|
|
|
207
224
|
|
|
208
225
|
result = ce.getCEStatus()
|
|
209
226
|
assert result["UsedProcessors"] == 2
|
|
227
|
+
assert result["UsedRAM"] == 0
|
|
228
|
+
assert result["AvailableRAM"] == ce_parameters.get("MaxRAM", 0)
|
|
210
229
|
|
|
211
230
|
jobParams = {"mpTag": True, "numberOfProcessors": 1, "maxNumberOfProcessors": 3}
|
|
212
231
|
result = ce.submitJob("testPoolCEJob_3.py", None, **jobParams)
|
|
@@ -217,8 +236,10 @@ def test_executeJob_wholeNode8(createAndDelete):
|
|
|
217
236
|
|
|
218
237
|
result = ce.getCEStatus()
|
|
219
238
|
assert result["UsedProcessors"] == 5
|
|
239
|
+
assert result["UsedRAM"] == 0
|
|
240
|
+
assert result["AvailableRAM"] == ce_parameters.get("MaxRAM", 0)
|
|
220
241
|
|
|
221
|
-
jobParams = {"numberOfProcessors": 2} # This is same as asking for SP
|
|
242
|
+
jobParams = {"numberOfProcessors": 2, "MinRAM": 4000, "MaxRAM": 8000} # This is same as asking for SP
|
|
222
243
|
result = ce.submitJob("testPoolCEJob_4.py", None, **jobParams)
|
|
223
244
|
assert result["OK"] is True
|
|
224
245
|
taskID = result["Value"]
|
|
@@ -227,39 +248,72 @@ def test_executeJob_wholeNode8(createAndDelete):
|
|
|
227
248
|
|
|
228
249
|
result = ce.getCEStatus()
|
|
229
250
|
assert result["UsedProcessors"] == 6
|
|
251
|
+
assert result["UsedRAM"] == 8000
|
|
252
|
+
assert result["AvailableRAM"] == (
|
|
253
|
+
ce_parameters.get("MaxRAM") - result["UsedRAM"] if ce_parameters.get("MaxRAM") else 0
|
|
254
|
+
)
|
|
230
255
|
|
|
231
|
-
#
|
|
232
|
-
jobParams = {"mpTag": True, "numberOfProcessors": 3}
|
|
256
|
+
jobParams = {"MinRAM": 8000, "MaxRAM": 8000} # This is same as asking for SP
|
|
233
257
|
result = ce.submitJob("testPoolCEJob_5.py", None, **jobParams)
|
|
234
258
|
assert result["OK"] is True
|
|
235
259
|
taskID = result["Value"]
|
|
236
260
|
assert taskID == 3
|
|
261
|
+
taskIDs[taskID] = True
|
|
262
|
+
|
|
263
|
+
result = ce.getCEStatus()
|
|
264
|
+
assert result["UsedProcessors"] == 7
|
|
265
|
+
assert result["UsedRAM"] == 16000
|
|
266
|
+
assert result["AvailableRAM"] == (
|
|
267
|
+
ce_parameters.get("MaxRAM") - result["UsedRAM"] if ce_parameters.get("MaxRAM") else 0
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
jobParams = {"MaxRAM": 24000} # This will fail for the case when the ce have set a RAM
|
|
271
|
+
result = ce.submitJob("testPoolCEJob_6.py", None, **jobParams)
|
|
272
|
+
assert result["OK"] is True
|
|
273
|
+
taskID = result["Value"]
|
|
274
|
+
assert taskID == 4
|
|
275
|
+
if ce_parameters.get("MaxRAM"):
|
|
276
|
+
assert ce.taskResults[taskID]["OK"] is False
|
|
277
|
+
|
|
278
|
+
result = ce.getCEStatus()
|
|
279
|
+
assert result["UsedProcessors"] == 7 if ce_parameters.get("MaxRAM") else 8
|
|
280
|
+
assert result["UsedRAM"] == 16000 if ce_parameters.get("MaxRAM") else 40000
|
|
281
|
+
assert result["AvailableRAM"] == (
|
|
282
|
+
ce_parameters.get("MaxRAM") - result["UsedRAM"] if ce_parameters.get("MaxRAM") else 0
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
# now trying again would fail
|
|
286
|
+
jobParams = {"mpTag": True, "numberOfProcessors": 3}
|
|
287
|
+
result = ce.submitJob("testPoolCEJob_7.py", None, **jobParams)
|
|
288
|
+
assert result["OK"] is True
|
|
289
|
+
taskID = result["Value"]
|
|
290
|
+
assert taskID == 5
|
|
237
291
|
taskIDs[taskID] = False
|
|
238
292
|
|
|
239
293
|
# waiting and submit again
|
|
240
294
|
while len(ce.taskResults) < 2:
|
|
241
295
|
time.sleep(0.1)
|
|
242
296
|
|
|
243
|
-
jobParams = {"mpTag": True, "numberOfProcessors":
|
|
244
|
-
result = ce.submitJob("
|
|
297
|
+
jobParams = {"mpTag": True, "numberOfProcessors": 1}
|
|
298
|
+
result = ce.submitJob("testPoolCEJob_8.py", None, **jobParams)
|
|
245
299
|
assert result["OK"] is True
|
|
246
300
|
taskID = result["Value"]
|
|
247
|
-
assert taskID ==
|
|
301
|
+
assert taskID == 6
|
|
248
302
|
taskIDs[taskID] = True
|
|
249
303
|
|
|
250
304
|
result = ce.shutdown()
|
|
251
305
|
assert result["OK"] is True
|
|
252
306
|
assert isinstance(result["Value"], dict)
|
|
253
|
-
assert len(result["Value"]) ==
|
|
307
|
+
assert len(result["Value"]) == 7
|
|
254
308
|
|
|
255
|
-
while len(ce.taskResults) <
|
|
309
|
+
while len(ce.taskResults) < 7:
|
|
256
310
|
time.sleep(0.1)
|
|
257
311
|
|
|
258
312
|
for taskID, expectedResult in taskIDs.items():
|
|
259
313
|
submissionResult = ce.taskResults[taskID]
|
|
260
314
|
assert submissionResult["OK"] is expectedResult
|
|
261
315
|
if not submissionResult["OK"]:
|
|
262
|
-
assert "Not enough processors"
|
|
316
|
+
assert submissionResult["Message"] in ["Not enough processors for the job", "Not enough memory for the job"]
|
|
263
317
|
|
|
264
318
|
|
|
265
319
|
@pytest.mark.slow
|
|
@@ -372,28 +426,41 @@ def test_executeJob_WholeNodeJobs(createAndDelete):
|
|
|
372
426
|
|
|
373
427
|
|
|
374
428
|
@pytest.mark.parametrize(
|
|
375
|
-
"processorsPerTask, kwargs,
|
|
429
|
+
"processorsPerTask, ramPerTask, kwargs, expected_processors, expected_memory",
|
|
376
430
|
[
|
|
377
|
-
(None, {}, 1),
|
|
378
|
-
(None, {"mpTag": False}, 1),
|
|
379
|
-
(None, {"mpTag": True}, 1),
|
|
380
|
-
(None, {"mpTag": True, "wholeNode": True}, 16),
|
|
381
|
-
(None, {"mpTag": True, "wholeNode": False}, 1),
|
|
382
|
-
(None, {"mpTag": True, "numberOfProcessors": 4}, 4),
|
|
383
|
-
(None, {"mpTag": True, "numberOfProcessors": 4, "
|
|
384
|
-
(None, {"mpTag": True, "numberOfProcessors": 4, "
|
|
385
|
-
(
|
|
386
|
-
(
|
|
387
|
-
(
|
|
388
|
-
({1: 4}, {"mpTag": True, "
|
|
389
|
-
({1: 4}, {"mpTag": True, "
|
|
431
|
+
(None, None, {}, 1, 0),
|
|
432
|
+
(None, None, {"mpTag": False}, 1, 0),
|
|
433
|
+
(None, None, {"mpTag": True, "MaxRAM": 8000}, 1, 8000),
|
|
434
|
+
(None, None, {"mpTag": True, "wholeNode": True}, 16, 0),
|
|
435
|
+
(None, None, {"mpTag": True, "wholeNode": False}, 1, 0),
|
|
436
|
+
(None, None, {"mpTag": True, "numberOfProcessors": 4, "MinRAM": 2000}, 4, 2000),
|
|
437
|
+
(None, None, {"mpTag": True, "numberOfProcessors": 4, "MaxRAM": 4000}, 4, 4000),
|
|
438
|
+
(None, None, {"mpTag": True, "numberOfProcessors": 4, "MaxRAM": 36000}, 4, None),
|
|
439
|
+
(None, None, {"mpTag": True, "numberOfProcessors": 4, "MinRAM": 2000, "MaxRAM": 4000}, 4, 4000),
|
|
440
|
+
(None, None, {"mpTag": True, "numberOfProcessors": 4, "maxNumberOfProcessors": 8}, 8, 0),
|
|
441
|
+
(None, None, {"mpTag": True, "numberOfProcessors": 4, "maxNumberOfProcessors": 32}, 16, 0),
|
|
442
|
+
({1: 4}, {1: 4000}, {"mpTag": True, "wholeNode": True}, 0, 0),
|
|
443
|
+
({1: 4}, {1: 4000}, {"mpTag": True, "wholeNode": False}, 1, 0),
|
|
444
|
+
({1: 4}, {1: 4000}, {"mpTag": True, "numberOfProcessors": 2, "MinRAM": 8000}, 2, 8000),
|
|
445
|
+
({1: 4}, {1: 4000}, {"mpTag": True, "numberOfProcessors": 16, "MinRAM": 8000, "MaxRAM": 12000}, 0, 12000),
|
|
446
|
+
({1: 4}, {1: 4000}, {"mpTag": True, "maxNumberOfProcessors": 2, "MaxRAM": 16000}, 2, 16000),
|
|
447
|
+
({1: 4}, {1: 4000}, {"mpTag": True, "numberOfProcessors": 2, "MaxRAM": 8000}, 2, 8000),
|
|
448
|
+
({1: 4}, {1: 4000}, {"mpTag": True, "maxNumberOfProcessors": 16, "MaxRAM": 32000}, 12, None),
|
|
449
|
+
({1: 4, 2: 8}, {1: 4000}, {"mpTag": True, "numberOfProcessors": 2}, 2, 0),
|
|
450
|
+
({1: 4, 2: 8}, {1: 4000}, {"mpTag": True, "numberOfProcessors": 4}, 4, 0),
|
|
451
|
+
({1: 4, 2: 8, 3: 8}, {1: 4000}, {"mpTag": True, "numberOfProcessors": 4}, 0, 0),
|
|
390
452
|
],
|
|
391
453
|
)
|
|
392
|
-
def
|
|
454
|
+
def test__getLimitsForJobs(processorsPerTask, ramPerTask, kwargs, expected_processors, expected_memory):
|
|
393
455
|
ce = PoolComputingElement("TestPoolCE")
|
|
394
456
|
ce.processors = 16
|
|
457
|
+
ce.ram = 32000
|
|
395
458
|
|
|
396
459
|
if processorsPerTask:
|
|
397
460
|
ce.processorsPerTask = processorsPerTask
|
|
461
|
+
if ramPerTask:
|
|
462
|
+
ce.ramPerTask = ramPerTask
|
|
398
463
|
res = ce._getProcessorsForJobs(kwargs)
|
|
399
|
-
assert res ==
|
|
464
|
+
assert res == expected_processors
|
|
465
|
+
res = ce._getMemoryForJobs(kwargs)
|
|
466
|
+
assert res == expected_memory
|
|
@@ -53,6 +53,9 @@ except AttributeError:
|
|
|
53
53
|
MAX_SINGLE_STREAM_SIZE = 1024 * 1024 * 10 # 10MB
|
|
54
54
|
MIN_BANDWIDTH = 0.5 * (1024 * 1024) # 0.5 MB/s
|
|
55
55
|
|
|
56
|
+
# Default timeout for any stat like call
|
|
57
|
+
DEFAULT_OPERATION_TIMEOUT = 10
|
|
58
|
+
|
|
56
59
|
|
|
57
60
|
@contextmanager
|
|
58
61
|
def setGfalSetting(
|
|
@@ -169,6 +172,12 @@ class GFAL2_StorageBase(StorageBase):
|
|
|
169
172
|
# It is only useful for TPC
|
|
170
173
|
self.ctx.set_opt_boolean("HTTP PLUGIN", "RETRIEVE_BEARER_TOKEN", False)
|
|
171
174
|
|
|
175
|
+
# Set a global timeout for the operations
|
|
176
|
+
self.ctx.set_opt_integer("CORE", "NAMESPACE_TIMEOUT", DEFAULT_OPERATION_TIMEOUT)
|
|
177
|
+
# Because HTTP Plugin does not read the CORE:NAMESPACE_TIMEOUT as it should
|
|
178
|
+
# I also specify it here
|
|
179
|
+
self.ctx.set_opt_integer("HTTP PLUGIN", "OPERATION_TIMEOUT", DEFAULT_OPERATION_TIMEOUT)
|
|
180
|
+
|
|
172
181
|
# spaceToken used for copying from and to the storage element
|
|
173
182
|
self.spaceToken = parameters.get("SpaceToken", "")
|
|
174
183
|
# stageTimeout, default timeout to try and stage/pin a file
|
|
@@ -500,19 +500,18 @@ class TransformationAgent(AgentModule, TransformationAgentsUtilities):
|
|
|
500
500
|
startTime = time.time()
|
|
501
501
|
self._logInfo(f"Getting replicas for {len(newLFNs)} files from catalog", method=method, transID=transID)
|
|
502
502
|
newReplicas = {}
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
)
|
|
503
|
+
res = self._getDataReplicasDM(transID, newLFNs, clients, forJobs=forJobs)
|
|
504
|
+
if res["OK"]:
|
|
505
|
+
newReplicas = {lfn: ses for lfn, ses in res["Value"].items() if ses}
|
|
506
|
+
|
|
507
|
+
self.__updateCache(transID, newReplicas)
|
|
508
|
+
else:
|
|
509
|
+
self._logWarn(
|
|
510
|
+
f"Failed to get replicas for {len(newLFNs)} files",
|
|
511
|
+
res["Message"],
|
|
512
|
+
method=method,
|
|
513
|
+
transID=transID,
|
|
514
|
+
)
|
|
516
515
|
|
|
517
516
|
self._logInfo(
|
|
518
517
|
f"Obtained {len(newReplicas)} replicas from catalog in {time.time() - startTime:.1f} seconds",
|
|
@@ -38,8 +38,8 @@ from DIRAC.WorkloadManagementSystem.Client import JobStatus
|
|
|
38
38
|
from DIRAC.WorkloadManagementSystem.Client.WMSClient import WMSClient
|
|
39
39
|
from DIRAC.WorkloadManagementSystem.DB.JobDB import JobDB
|
|
40
40
|
from DIRAC.WorkloadManagementSystem.DB.SandboxMetadataDB import SandboxMetadataDB
|
|
41
|
-
from DIRAC.WorkloadManagementSystem.Service.JobPolicy import RIGHT_DELETE
|
|
42
41
|
from DIRAC.WorkloadManagementSystem.DB.StatusUtils import kill_delete_jobs
|
|
42
|
+
from DIRAC.WorkloadManagementSystem.Service.JobPolicy import RIGHT_DELETE
|
|
43
43
|
from DIRAC.WorkloadManagementSystem.Utilities.JobParameters import getJobParameters
|
|
44
44
|
|
|
45
45
|
|
|
@@ -8,15 +8,16 @@
|
|
|
8
8
|
|
|
9
9
|
"""
|
|
10
10
|
|
|
11
|
-
import
|
|
11
|
+
import hashlib
|
|
12
12
|
import json
|
|
13
|
+
import os
|
|
13
14
|
import shutil
|
|
14
|
-
|
|
15
|
+
|
|
15
16
|
import requests
|
|
16
17
|
|
|
17
18
|
from DIRAC import S_OK
|
|
18
19
|
from DIRAC.Core.Base.AgentModule import AgentModule
|
|
19
|
-
from DIRAC.Core.Security.Locations import
|
|
20
|
+
from DIRAC.Core.Security.Locations import getCAsLocation, getHostCertificateAndKeyLocation
|
|
20
21
|
from DIRAC.DataManagementSystem.Client.DataManager import DataManager
|
|
21
22
|
from DIRAC.WorkloadManagementSystem.Utilities.PilotCStoJSONSynchronizer import PilotCStoJSONSynchronizer
|
|
22
23
|
|
|
@@ -20,8 +20,8 @@ from DIRAC.Core.Utilities.ClassAd.ClassAdLight import ClassAd
|
|
|
20
20
|
from DIRAC.Core.Utilities.ObjectLoader import ObjectLoader
|
|
21
21
|
from DIRAC.Core.Utilities.TimeUtilities import fromString, second, toEpoch
|
|
22
22
|
from DIRAC.WorkloadManagementSystem.Client import JobMinorStatus, JobStatus
|
|
23
|
-
from DIRAC.WorkloadManagementSystem.Service.JobPolicy import RIGHT_KILL
|
|
24
23
|
from DIRAC.WorkloadManagementSystem.DB.StatusUtils import kill_delete_jobs
|
|
24
|
+
from DIRAC.WorkloadManagementSystem.Service.JobPolicy import RIGHT_KILL
|
|
25
25
|
from DIRAC.WorkloadManagementSystem.Utilities.JobParameters import getJobParameters
|
|
26
26
|
from DIRAC.WorkloadManagementSystem.Utilities.Utils import rescheduleJobs
|
|
27
27
|
|
|
@@ -2,14 +2,15 @@
|
|
|
2
2
|
"""
|
|
3
3
|
import multiprocessing
|
|
4
4
|
import os
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
import pytest
|
|
7
5
|
import time
|
|
8
6
|
from concurrent.futures import ProcessPoolExecutor
|
|
9
7
|
from functools import partial
|
|
8
|
+
from pathlib import Path
|
|
10
9
|
|
|
11
|
-
|
|
10
|
+
import pytest
|
|
12
11
|
from DIRAC.Core.Security.X509Chain import X509Chain # pylint: disable=import-error
|
|
12
|
+
|
|
13
|
+
from DIRAC import S_ERROR, S_OK, gLogger
|
|
13
14
|
from DIRAC.Resources.Computing.BatchSystems.TimeLeft.TimeLeft import TimeLeft
|
|
14
15
|
from DIRAC.Resources.Computing.ComputingElementFactory import ComputingElementFactory
|
|
15
16
|
from DIRAC.Resources.Computing.test.Test_PoolComputingElement import badJobScript, jobScript
|
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
""" Test class for PilotLoggingAgent Agent
|
|
2
2
|
"""
|
|
3
3
|
import os
|
|
4
|
-
import time
|
|
5
4
|
import tempfile
|
|
5
|
+
import time
|
|
6
|
+
from unittest.mock import MagicMock, patch
|
|
6
7
|
|
|
7
8
|
import pytest
|
|
8
|
-
from unittest.mock import MagicMock, patch
|
|
9
9
|
|
|
10
10
|
# DIRAC Components
|
|
11
11
|
import DIRAC.WorkloadManagementSystem.Agent.PilotLoggingAgent as plaModule
|
|
12
|
+
from DIRAC import S_ERROR, S_OK, gConfig, gLogger
|
|
12
13
|
from DIRAC.WorkloadManagementSystem.Agent.PilotLoggingAgent import PilotLoggingAgent
|
|
13
|
-
from DIRAC import gLogger, gConfig, S_OK, S_ERROR
|
|
14
14
|
|
|
15
15
|
gLogger.setLevel("DEBUG")
|
|
16
16
|
|
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
""" Test class for Pilot Status Agent
|
|
2
2
|
"""
|
|
3
|
-
import pytest
|
|
4
3
|
from unittest.mock import MagicMock
|
|
5
4
|
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from DIRAC import S_OK, gLogger
|
|
8
|
+
|
|
6
9
|
# DIRAC Components
|
|
7
10
|
from DIRAC.WorkloadManagementSystem.Agent.PilotStatusAgent import PilotStatusAgent
|
|
8
|
-
from DIRAC import gLogger, S_OK
|
|
9
11
|
|
|
10
12
|
# Mock objects
|
|
11
13
|
mockReply = MagicMock()
|
|
@@ -3,18 +3,19 @@
|
|
|
3
3
|
|
|
4
4
|
# imports
|
|
5
5
|
import os
|
|
6
|
-
from pathlib import Path
|
|
7
6
|
import shutil
|
|
7
|
+
from collections import defaultdict
|
|
8
|
+
from pathlib import Path
|
|
8
9
|
from unittest.mock import Mock
|
|
10
|
+
|
|
9
11
|
import pytest
|
|
10
|
-
|
|
12
|
+
|
|
13
|
+
from DIRAC import S_ERROR, S_OK, gLogger
|
|
11
14
|
|
|
12
15
|
# DIRAC Components
|
|
13
16
|
from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations
|
|
14
17
|
from DIRAC.WorkloadManagementSystem.Agent.PushJobAgent import PushJobAgent
|
|
15
18
|
from DIRAC.WorkloadManagementSystem.Agent.test.Test_Agent_SiteDirector import config
|
|
16
|
-
|
|
17
|
-
from DIRAC import gLogger, S_OK, S_ERROR
|
|
18
19
|
from DIRAC.WorkloadManagementSystem.Client import JobMinorStatus
|
|
19
20
|
from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport
|
|
20
21
|
|
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
""" Test class for Stalled Job Agent
|
|
2
2
|
"""
|
|
3
|
-
import pytest
|
|
4
3
|
from unittest.mock import MagicMock
|
|
5
4
|
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from DIRAC import gLogger
|
|
8
|
+
|
|
6
9
|
# DIRAC Components
|
|
7
10
|
from DIRAC.WorkloadManagementSystem.Agent.StalledJobAgent import StalledJobAgent
|
|
8
|
-
from DIRAC import gLogger
|
|
9
11
|
|
|
10
12
|
# Mock Objects
|
|
11
13
|
mockAM = MagicMock()
|
|
@@ -115,16 +115,20 @@ class JobReport:
|
|
|
115
115
|
|
|
116
116
|
def commit(self):
|
|
117
117
|
"""Send all the accumulated information"""
|
|
118
|
+
messages = []
|
|
118
119
|
|
|
119
|
-
success = True
|
|
120
120
|
result = self.sendStoredStatusInfo()
|
|
121
|
-
|
|
121
|
+
if not result["OK"]:
|
|
122
|
+
messages.append(result["Message"])
|
|
122
123
|
result = self.sendStoredJobParameters()
|
|
123
|
-
|
|
124
|
+
if not result["OK"]:
|
|
125
|
+
messages.append(result["Message"])
|
|
124
126
|
|
|
125
|
-
if
|
|
126
|
-
|
|
127
|
-
|
|
127
|
+
if messages:
|
|
128
|
+
gLogger.warn("Some information could not be uploaded to JobStateUpdate service:", "; ".join(messages))
|
|
129
|
+
return S_ERROR("Information upload to JobStateUpdate service failed")
|
|
130
|
+
|
|
131
|
+
return S_OK()
|
|
128
132
|
|
|
129
133
|
def dump(self):
|
|
130
134
|
"""Print out the contents of the internal cached information"""
|
|
@@ -1,11 +1,16 @@
|
|
|
1
|
-
"""
|
|
2
|
-
|
|
1
|
+
"""This object is a wrapper for setting and getting jobs states"""
|
|
2
|
+
|
|
3
3
|
from DIRAC import S_ERROR, S_OK, gLogger
|
|
4
4
|
from DIRAC.WorkloadManagementSystem.Client import JobStatus
|
|
5
5
|
from DIRAC.WorkloadManagementSystem.Client.JobState.JobManifest import JobManifest
|
|
6
6
|
from DIRAC.WorkloadManagementSystem.DB.JobDB import JobDB
|
|
7
7
|
from DIRAC.WorkloadManagementSystem.DB.JobLoggingDB import JobLoggingDB
|
|
8
|
-
from DIRAC.WorkloadManagementSystem.DB.TaskQueueDB import
|
|
8
|
+
from DIRAC.WorkloadManagementSystem.DB.TaskQueueDB import (
|
|
9
|
+
TaskQueueDB,
|
|
10
|
+
multiValueDefFields,
|
|
11
|
+
singleValueDefFields,
|
|
12
|
+
rangeValueDefFields,
|
|
13
|
+
)
|
|
9
14
|
from DIRAC.WorkloadManagementSystem.Service.JobPolicy import (
|
|
10
15
|
RIGHT_CHANGE_STATUS,
|
|
11
16
|
RIGHT_GET_INFO,
|
|
@@ -351,6 +356,10 @@ class JobState:
|
|
|
351
356
|
if name in reqCfg:
|
|
352
357
|
jobReqDict[name] = reqCfg.getOption(name, [])
|
|
353
358
|
|
|
359
|
+
for name in rangeValueDefFields:
|
|
360
|
+
if name in reqCfg:
|
|
361
|
+
jobReqDict[name] = int(reqCfg[name])
|
|
362
|
+
|
|
354
363
|
jobPriority = reqCfg.getOption("UserPriority", 1)
|
|
355
364
|
|
|
356
365
|
result = self.__retryFunction(2, JobState.__db.tqDB.insertJob, (self.__jid, jobReqDict, jobPriority))
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Encapsulate here the logic for matching jobs
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Utilities and classes here are used by MatcherHandler
|
|
4
4
|
"""
|
|
5
|
+
|
|
5
6
|
import time
|
|
6
7
|
|
|
7
8
|
from DIRAC import convertToPy3VersionNumber, gLogger
|
|
@@ -16,7 +17,11 @@ from DIRAC.WorkloadManagementSystem.Client.Limiter import Limiter
|
|
|
16
17
|
from DIRAC.WorkloadManagementSystem.DB.JobDB import JobDB
|
|
17
18
|
from DIRAC.WorkloadManagementSystem.DB.JobLoggingDB import JobLoggingDB
|
|
18
19
|
from DIRAC.WorkloadManagementSystem.DB.PilotAgentsDB import PilotAgentsDB
|
|
19
|
-
from DIRAC.WorkloadManagementSystem.DB.TaskQueueDB import
|
|
20
|
+
from DIRAC.WorkloadManagementSystem.DB.TaskQueueDB import (
|
|
21
|
+
TaskQueueDB,
|
|
22
|
+
multiValueMatchFields,
|
|
23
|
+
singleValueDefFields,
|
|
24
|
+
)
|
|
20
25
|
|
|
21
26
|
|
|
22
27
|
class PilotVersionError(Exception):
|
|
@@ -68,14 +73,14 @@ class Matcher:
|
|
|
68
73
|
|
|
69
74
|
# Make a nice print of the resource matching parameters
|
|
70
75
|
toPrintDict = dict(resourceDict)
|
|
71
|
-
if "MaxRAM" in
|
|
72
|
-
toPrintDict["MaxRAM"] =
|
|
76
|
+
if "MaxRAM" in resourceDict:
|
|
77
|
+
toPrintDict["MaxRAM"] = resourceDict["MaxRAM"]
|
|
73
78
|
if "NumberOfProcessors" in resourceDescription:
|
|
74
79
|
toPrintDict["NumberOfProcessors"] = resourceDescription["NumberOfProcessors"]
|
|
75
80
|
toPrintDict["Tag"] = []
|
|
76
81
|
if "Tag" in resourceDict:
|
|
77
82
|
for tag in resourceDict["Tag"]:
|
|
78
|
-
if not tag.endswith("
|
|
83
|
+
if not tag.endswith("MB") and not tag.endswith("Processors"):
|
|
79
84
|
toPrintDict["Tag"].append(tag)
|
|
80
85
|
if not toPrintDict["Tag"]:
|
|
81
86
|
toPrintDict.pop("Tag")
|
|
@@ -166,11 +171,7 @@ class Matcher:
|
|
|
166
171
|
"""
|
|
167
172
|
|
|
168
173
|
resourceDict = {}
|
|
169
|
-
for name in singleValueDefFields:
|
|
170
|
-
if name in resourceDescription:
|
|
171
|
-
resourceDict[name] = resourceDescription[name]
|
|
172
|
-
|
|
173
|
-
for name in multiValueMatchFields:
|
|
174
|
+
for name in singleValueDefFields + multiValueMatchFields + ("MaxRAM",):
|
|
174
175
|
if name in resourceDescription:
|
|
175
176
|
resourceDict[name] = resourceDescription[name]
|
|
176
177
|
|
|
@@ -191,25 +192,18 @@ class Matcher:
|
|
|
191
192
|
if "JobID" in resourceDescription:
|
|
192
193
|
resourceDict["JobID"] = resourceDescription["JobID"]
|
|
193
194
|
|
|
194
|
-
# Convert
|
|
195
|
-
maxRAM = resourceDescription.get("MaxRAM")
|
|
196
|
-
if maxRAM:
|
|
197
|
-
try:
|
|
198
|
-
maxRAM = int(maxRAM / 1000)
|
|
199
|
-
except ValueError:
|
|
200
|
-
maxRAM = None
|
|
195
|
+
# Convert NumberOfProcessors parameters into a list of tags
|
|
201
196
|
nProcessors = resourceDescription.get("NumberOfProcessors")
|
|
202
197
|
if nProcessors:
|
|
203
198
|
try:
|
|
204
199
|
nProcessors = int(nProcessors)
|
|
205
200
|
except ValueError:
|
|
206
201
|
nProcessors = None
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
resourceDict.setdefault("Tag", []).extend(paramTags)
|
|
202
|
+
if nProcessors and nProcessors <= 1024:
|
|
203
|
+
paramList = list(range(1, nProcessors + 1, 1))
|
|
204
|
+
paramTags = ["%d%s" % (par, "Processors") for par in paramList]
|
|
205
|
+
if paramTags:
|
|
206
|
+
resourceDict.setdefault("Tag", []).extend(paramTags)
|
|
213
207
|
|
|
214
208
|
# Add 'MultiProcessor' to the list of tags
|
|
215
209
|
if nProcessors and nProcessors > 1:
|