DIRAC 9.0.14__py3-none-any.whl → 9.0.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- DIRAC/ConfigurationSystem/Client/CSAPI.py +11 -0
- DIRAC/Core/Tornado/Client/private/TornadoBaseClient.py +1 -1
- DIRAC/Core/Utilities/CGroups2.py +1 -0
- DIRAC/Core/Utilities/ElasticSearchDB.py +1 -1
- DIRAC/Core/Utilities/MySQL.py +51 -25
- DIRAC/DataManagementSystem/Client/DataManager.py +7 -10
- DIRAC/DataManagementSystem/Client/FTS3Job.py +12 -3
- DIRAC/FrameworkSystem/Service/SystemAdministratorHandler.py +41 -11
- DIRAC/Interfaces/API/Dirac.py +12 -4
- DIRAC/Interfaces/API/Job.py +62 -17
- DIRAC/RequestManagementSystem/private/RequestTask.py +2 -1
- DIRAC/Resources/Catalog/FileCatalogClient.py +18 -7
- DIRAC/Resources/Catalog/Utilities.py +3 -3
- DIRAC/Resources/Computing/BatchSystems/SLURM.py +1 -1
- DIRAC/Resources/Computing/BatchSystems/TimeLeft/TimeLeft.py +3 -1
- DIRAC/Resources/Computing/ComputingElement.py +39 -34
- DIRAC/Resources/Computing/InProcessComputingElement.py +20 -7
- DIRAC/Resources/Computing/PoolComputingElement.py +76 -37
- DIRAC/Resources/Computing/SingularityComputingElement.py +19 -9
- DIRAC/Resources/Computing/test/Test_InProcessComputingElement.py +69 -8
- DIRAC/Resources/Computing/test/Test_PoolComputingElement.py +102 -35
- DIRAC/Resources/Storage/GFAL2_StorageBase.py +9 -0
- DIRAC/TransformationSystem/Agent/TransformationAgent.py +12 -13
- DIRAC/WorkloadManagementSystem/Client/JobReport.py +10 -6
- DIRAC/WorkloadManagementSystem/Client/JobState/JobState.py +12 -3
- DIRAC/WorkloadManagementSystem/Client/Matcher.py +18 -24
- DIRAC/WorkloadManagementSystem/DB/TaskQueueDB.py +137 -7
- DIRAC/WorkloadManagementSystem/Executor/JobScheduling.py +8 -14
- DIRAC/WorkloadManagementSystem/Executor/test/Test_Executor.py +3 -5
- DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapper.py +2 -2
- DIRAC/WorkloadManagementSystem/JobWrapper/test/Test_JobWrapper.py +1 -1
- DIRAC/WorkloadManagementSystem/Service/JobManagerHandler.py +7 -1
- DIRAC/WorkloadManagementSystem/Utilities/JobParameters.py +81 -2
- DIRAC/WorkloadManagementSystem/Utilities/PilotCStoJSONSynchronizer.py +7 -6
- DIRAC/WorkloadManagementSystem/Utilities/QueueUtilities.py +5 -5
- DIRAC/WorkloadManagementSystem/Utilities/RemoteRunner.py +2 -1
- DIRAC/WorkloadManagementSystem/Utilities/Utils.py +21 -4
- DIRAC/WorkloadManagementSystem/Utilities/test/Test_RemoteRunner.py +7 -3
- DIRAC/WorkloadManagementSystem/scripts/dirac_wms_get_wn_parameters.py +3 -3
- DIRAC/__init__.py +1 -1
- DIRAC/tests/Utilities/testJobDefinitions.py +57 -20
- {dirac-9.0.14.dist-info → dirac-9.0.16.dist-info}/METADATA +2 -2
- {dirac-9.0.14.dist-info → dirac-9.0.16.dist-info}/RECORD +47 -47
- {dirac-9.0.14.dist-info → dirac-9.0.16.dist-info}/WHEEL +0 -0
- {dirac-9.0.14.dist-info → dirac-9.0.16.dist-info}/entry_points.txt +0 -0
- {dirac-9.0.14.dist-info → dirac-9.0.16.dist-info}/licenses/LICENSE +0 -0
- {dirac-9.0.14.dist-info → dirac-9.0.16.dist-info}/top_level.txt +0 -0
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
"""
|
|
3
3
|
tests for PoolComputingElement module
|
|
4
4
|
"""
|
|
5
|
+
|
|
5
6
|
import os
|
|
6
7
|
import time
|
|
7
8
|
|
|
@@ -50,7 +51,7 @@ def _stopJob(nJob):
|
|
|
50
51
|
|
|
51
52
|
@pytest.fixture
|
|
52
53
|
def createAndDelete():
|
|
53
|
-
for i in range(
|
|
54
|
+
for i in range(9):
|
|
54
55
|
with open(f"testPoolCEJob_{i}.py", "w") as execFile:
|
|
55
56
|
execFile.write(jobScript % i)
|
|
56
57
|
os.chmod(f"testPoolCEJob_{i}.py", 0o755)
|
|
@@ -66,24 +67,28 @@ def createAndDelete():
|
|
|
66
67
|
time.sleep(0.5)
|
|
67
68
|
|
|
68
69
|
# stopping the jobs
|
|
69
|
-
for i in range(
|
|
70
|
+
for i in range(9):
|
|
70
71
|
_stopJob(i)
|
|
71
72
|
|
|
72
73
|
# removing testPoolCEJob files
|
|
73
74
|
# this will also stop the futures unless they are already stopped!
|
|
74
|
-
for i in range(
|
|
75
|
+
for i in range(9):
|
|
75
76
|
try:
|
|
76
77
|
os.remove(f"testPoolCEJob_{i}.py")
|
|
77
|
-
os.remove("testBadPoolCEJob.py")
|
|
78
78
|
except OSError:
|
|
79
79
|
pass
|
|
80
80
|
|
|
81
|
+
try:
|
|
82
|
+
os.remove("testBadPoolCEJob.py")
|
|
83
|
+
except OSError:
|
|
84
|
+
pass
|
|
85
|
+
|
|
81
86
|
|
|
82
87
|
@pytest.mark.slow
|
|
83
88
|
def test_submit_and_shutdown(createAndDelete):
|
|
84
89
|
time.sleep(0.5)
|
|
85
90
|
|
|
86
|
-
ceParameters = {"WholeNode": True, "NumberOfProcessors": 4}
|
|
91
|
+
ceParameters = {"WholeNode": True, "NumberOfProcessors": 4, "MaxRAM": 3800}
|
|
87
92
|
ce = PoolComputingElement("TestPoolCE")
|
|
88
93
|
ce.setParameters(ceParameters)
|
|
89
94
|
|
|
@@ -145,7 +150,7 @@ def test_executeJob_wholeNode4(createAndDelete):
|
|
|
145
150
|
time.sleep(0.5)
|
|
146
151
|
taskIDs = {}
|
|
147
152
|
|
|
148
|
-
ceParameters = {"WholeNode": True, "NumberOfProcessors": 4}
|
|
153
|
+
ceParameters = {"WholeNode": True, "NumberOfProcessors": 4, "MaxRAM": 16000}
|
|
149
154
|
ce = PoolComputingElement("TestPoolCE")
|
|
150
155
|
ce.setParameters(ceParameters)
|
|
151
156
|
|
|
@@ -159,9 +164,11 @@ def test_executeJob_wholeNode4(createAndDelete):
|
|
|
159
164
|
result = ce.getCEStatus()
|
|
160
165
|
assert result["UsedProcessors"] == 1
|
|
161
166
|
assert result["AvailableProcessors"] == 3
|
|
167
|
+
assert result["UsedRAM"] == 0
|
|
168
|
+
assert result["AvailableRAM"] == 16000
|
|
162
169
|
assert result["RunningJobs"] == 1
|
|
163
170
|
|
|
164
|
-
jobParams = {"mpTag": True, "numberOfProcessors": 2}
|
|
171
|
+
jobParams = {"mpTag": True, "numberOfProcessors": 2, "MaxRAM": 4000}
|
|
165
172
|
result = ce.submitJob("testPoolCEJob_1.py", None, **jobParams)
|
|
166
173
|
assert result["OK"] is True
|
|
167
174
|
taskID = result["Value"]
|
|
@@ -171,6 +178,9 @@ def test_executeJob_wholeNode4(createAndDelete):
|
|
|
171
178
|
result = ce.getCEStatus()
|
|
172
179
|
assert result["UsedProcessors"] == 3
|
|
173
180
|
assert result["AvailableProcessors"] == 1
|
|
181
|
+
assert result["UsedRAM"] == 4000
|
|
182
|
+
assert result["AvailableRAM"] == 12000
|
|
183
|
+
|
|
174
184
|
assert result["RunningJobs"] == 2
|
|
175
185
|
|
|
176
186
|
# now trying again would fail
|
|
@@ -190,13 +200,20 @@ def test_executeJob_wholeNode4(createAndDelete):
|
|
|
190
200
|
|
|
191
201
|
|
|
192
202
|
@pytest.mark.slow
|
|
193
|
-
|
|
203
|
+
@pytest.mark.parametrize(
|
|
204
|
+
"ce_parameters",
|
|
205
|
+
[
|
|
206
|
+
({"NumberOfProcessors": 8}),
|
|
207
|
+
({"NumberOfProcessors": 8, "MaxRAM": 32000}),
|
|
208
|
+
({"WholeNode": True, "NumberOfProcessors": 8, "MaxRAM": 32000}),
|
|
209
|
+
],
|
|
210
|
+
)
|
|
211
|
+
def test_executeJob_wholeNode8(createAndDelete, ce_parameters):
|
|
194
212
|
time.sleep(0.5)
|
|
195
213
|
taskIDs = {}
|
|
196
214
|
|
|
197
|
-
ceParameters = {"WholeNode": True, "NumberOfProcessors": 8}
|
|
198
215
|
ce = PoolComputingElement("TestPoolCE")
|
|
199
|
-
ce.setParameters(
|
|
216
|
+
ce.setParameters(ce_parameters)
|
|
200
217
|
|
|
201
218
|
jobParams = {"mpTag": True, "numberOfProcessors": 2, "maxNumberOfProcessors": 2}
|
|
202
219
|
result = ce.submitJob("testPoolCEJob_2.py", None, **jobParams)
|
|
@@ -207,6 +224,8 @@ def test_executeJob_wholeNode8(createAndDelete):
|
|
|
207
224
|
|
|
208
225
|
result = ce.getCEStatus()
|
|
209
226
|
assert result["UsedProcessors"] == 2
|
|
227
|
+
assert result["UsedRAM"] == 0
|
|
228
|
+
assert result["AvailableRAM"] == ce_parameters.get("MaxRAM", 0)
|
|
210
229
|
|
|
211
230
|
jobParams = {"mpTag": True, "numberOfProcessors": 1, "maxNumberOfProcessors": 3}
|
|
212
231
|
result = ce.submitJob("testPoolCEJob_3.py", None, **jobParams)
|
|
@@ -217,8 +236,10 @@ def test_executeJob_wholeNode8(createAndDelete):
|
|
|
217
236
|
|
|
218
237
|
result = ce.getCEStatus()
|
|
219
238
|
assert result["UsedProcessors"] == 5
|
|
239
|
+
assert result["UsedRAM"] == 0
|
|
240
|
+
assert result["AvailableRAM"] == ce_parameters.get("MaxRAM", 0)
|
|
220
241
|
|
|
221
|
-
jobParams = {"numberOfProcessors": 2} # This is same as asking for SP
|
|
242
|
+
jobParams = {"numberOfProcessors": 2, "MinRAM": 4000, "MaxRAM": 8000} # This is same as asking for SP
|
|
222
243
|
result = ce.submitJob("testPoolCEJob_4.py", None, **jobParams)
|
|
223
244
|
assert result["OK"] is True
|
|
224
245
|
taskID = result["Value"]
|
|
@@ -227,39 +248,72 @@ def test_executeJob_wholeNode8(createAndDelete):
|
|
|
227
248
|
|
|
228
249
|
result = ce.getCEStatus()
|
|
229
250
|
assert result["UsedProcessors"] == 6
|
|
251
|
+
assert result["UsedRAM"] == 8000
|
|
252
|
+
assert result["AvailableRAM"] == (
|
|
253
|
+
ce_parameters.get("MaxRAM") - result["UsedRAM"] if ce_parameters.get("MaxRAM") else 0
|
|
254
|
+
)
|
|
230
255
|
|
|
231
|
-
#
|
|
232
|
-
jobParams = {"mpTag": True, "numberOfProcessors": 3}
|
|
256
|
+
jobParams = {"MinRAM": 8000, "MaxRAM": 8000} # This is same as asking for SP
|
|
233
257
|
result = ce.submitJob("testPoolCEJob_5.py", None, **jobParams)
|
|
234
258
|
assert result["OK"] is True
|
|
235
259
|
taskID = result["Value"]
|
|
236
260
|
assert taskID == 3
|
|
261
|
+
taskIDs[taskID] = True
|
|
262
|
+
|
|
263
|
+
result = ce.getCEStatus()
|
|
264
|
+
assert result["UsedProcessors"] == 7
|
|
265
|
+
assert result["UsedRAM"] == 16000
|
|
266
|
+
assert result["AvailableRAM"] == (
|
|
267
|
+
ce_parameters.get("MaxRAM") - result["UsedRAM"] if ce_parameters.get("MaxRAM") else 0
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
jobParams = {"MaxRAM": 24000} # This will fail for the case when the ce have set a RAM
|
|
271
|
+
result = ce.submitJob("testPoolCEJob_6.py", None, **jobParams)
|
|
272
|
+
assert result["OK"] is True
|
|
273
|
+
taskID = result["Value"]
|
|
274
|
+
assert taskID == 4
|
|
275
|
+
if ce_parameters.get("MaxRAM"):
|
|
276
|
+
assert ce.taskResults[taskID]["OK"] is False
|
|
277
|
+
|
|
278
|
+
result = ce.getCEStatus()
|
|
279
|
+
assert result["UsedProcessors"] == 7 if ce_parameters.get("MaxRAM") else 8
|
|
280
|
+
assert result["UsedRAM"] == 16000 if ce_parameters.get("MaxRAM") else 40000
|
|
281
|
+
assert result["AvailableRAM"] == (
|
|
282
|
+
ce_parameters.get("MaxRAM") - result["UsedRAM"] if ce_parameters.get("MaxRAM") else 0
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
# now trying again would fail
|
|
286
|
+
jobParams = {"mpTag": True, "numberOfProcessors": 3}
|
|
287
|
+
result = ce.submitJob("testPoolCEJob_7.py", None, **jobParams)
|
|
288
|
+
assert result["OK"] is True
|
|
289
|
+
taskID = result["Value"]
|
|
290
|
+
assert taskID == 5
|
|
237
291
|
taskIDs[taskID] = False
|
|
238
292
|
|
|
239
293
|
# waiting and submit again
|
|
240
294
|
while len(ce.taskResults) < 2:
|
|
241
295
|
time.sleep(0.1)
|
|
242
296
|
|
|
243
|
-
jobParams = {"mpTag": True, "numberOfProcessors":
|
|
244
|
-
result = ce.submitJob("
|
|
297
|
+
jobParams = {"mpTag": True, "numberOfProcessors": 1}
|
|
298
|
+
result = ce.submitJob("testPoolCEJob_8.py", None, **jobParams)
|
|
245
299
|
assert result["OK"] is True
|
|
246
300
|
taskID = result["Value"]
|
|
247
|
-
assert taskID ==
|
|
301
|
+
assert taskID == 6
|
|
248
302
|
taskIDs[taskID] = True
|
|
249
303
|
|
|
250
304
|
result = ce.shutdown()
|
|
251
305
|
assert result["OK"] is True
|
|
252
306
|
assert isinstance(result["Value"], dict)
|
|
253
|
-
assert len(result["Value"]) ==
|
|
307
|
+
assert len(result["Value"]) == 7
|
|
254
308
|
|
|
255
|
-
while len(ce.taskResults) <
|
|
309
|
+
while len(ce.taskResults) < 7:
|
|
256
310
|
time.sleep(0.1)
|
|
257
311
|
|
|
258
312
|
for taskID, expectedResult in taskIDs.items():
|
|
259
313
|
submissionResult = ce.taskResults[taskID]
|
|
260
314
|
assert submissionResult["OK"] is expectedResult
|
|
261
315
|
if not submissionResult["OK"]:
|
|
262
|
-
assert "Not enough processors"
|
|
316
|
+
assert submissionResult["Message"] in ["Not enough processors for the job", "Not enough memory for the job"]
|
|
263
317
|
|
|
264
318
|
|
|
265
319
|
@pytest.mark.slow
|
|
@@ -372,28 +426,41 @@ def test_executeJob_WholeNodeJobs(createAndDelete):
|
|
|
372
426
|
|
|
373
427
|
|
|
374
428
|
@pytest.mark.parametrize(
|
|
375
|
-
"processorsPerTask, kwargs,
|
|
429
|
+
"processorsPerTask, ramPerTask, kwargs, expected_processors, expected_memory",
|
|
376
430
|
[
|
|
377
|
-
(None, {}, 1),
|
|
378
|
-
(None, {"mpTag": False}, 1),
|
|
379
|
-
(None, {"mpTag": True}, 1),
|
|
380
|
-
(None, {"mpTag": True, "wholeNode": True}, 16),
|
|
381
|
-
(None, {"mpTag": True, "wholeNode": False}, 1),
|
|
382
|
-
(None, {"mpTag": True, "numberOfProcessors": 4}, 4),
|
|
383
|
-
(None, {"mpTag": True, "numberOfProcessors": 4, "
|
|
384
|
-
(None, {"mpTag": True, "numberOfProcessors": 4, "
|
|
385
|
-
(
|
|
386
|
-
(
|
|
387
|
-
(
|
|
388
|
-
({1: 4}, {"mpTag": True, "
|
|
389
|
-
({1: 4}, {"mpTag": True, "
|
|
431
|
+
(None, None, {}, 1, 0),
|
|
432
|
+
(None, None, {"mpTag": False}, 1, 0),
|
|
433
|
+
(None, None, {"mpTag": True, "MaxRAM": 8000}, 1, 8000),
|
|
434
|
+
(None, None, {"mpTag": True, "wholeNode": True}, 16, 0),
|
|
435
|
+
(None, None, {"mpTag": True, "wholeNode": False}, 1, 0),
|
|
436
|
+
(None, None, {"mpTag": True, "numberOfProcessors": 4, "MinRAM": 2000}, 4, 2000),
|
|
437
|
+
(None, None, {"mpTag": True, "numberOfProcessors": 4, "MaxRAM": 4000}, 4, 4000),
|
|
438
|
+
(None, None, {"mpTag": True, "numberOfProcessors": 4, "MaxRAM": 36000}, 4, None),
|
|
439
|
+
(None, None, {"mpTag": True, "numberOfProcessors": 4, "MinRAM": 2000, "MaxRAM": 4000}, 4, 4000),
|
|
440
|
+
(None, None, {"mpTag": True, "numberOfProcessors": 4, "maxNumberOfProcessors": 8}, 8, 0),
|
|
441
|
+
(None, None, {"mpTag": True, "numberOfProcessors": 4, "maxNumberOfProcessors": 32}, 16, 0),
|
|
442
|
+
({1: 4}, {1: 4000}, {"mpTag": True, "wholeNode": True}, 0, 0),
|
|
443
|
+
({1: 4}, {1: 4000}, {"mpTag": True, "wholeNode": False}, 1, 0),
|
|
444
|
+
({1: 4}, {1: 4000}, {"mpTag": True, "numberOfProcessors": 2, "MinRAM": 8000}, 2, 8000),
|
|
445
|
+
({1: 4}, {1: 4000}, {"mpTag": True, "numberOfProcessors": 16, "MinRAM": 8000, "MaxRAM": 12000}, 0, 12000),
|
|
446
|
+
({1: 4}, {1: 4000}, {"mpTag": True, "maxNumberOfProcessors": 2, "MaxRAM": 16000}, 2, 16000),
|
|
447
|
+
({1: 4}, {1: 4000}, {"mpTag": True, "numberOfProcessors": 2, "MaxRAM": 8000}, 2, 8000),
|
|
448
|
+
({1: 4}, {1: 4000}, {"mpTag": True, "maxNumberOfProcessors": 16, "MaxRAM": 32000}, 12, None),
|
|
449
|
+
({1: 4, 2: 8}, {1: 4000}, {"mpTag": True, "numberOfProcessors": 2}, 2, 0),
|
|
450
|
+
({1: 4, 2: 8}, {1: 4000}, {"mpTag": True, "numberOfProcessors": 4}, 4, 0),
|
|
451
|
+
({1: 4, 2: 8, 3: 8}, {1: 4000}, {"mpTag": True, "numberOfProcessors": 4}, 0, 0),
|
|
390
452
|
],
|
|
391
453
|
)
|
|
392
|
-
def
|
|
454
|
+
def test__getLimitsForJobs(processorsPerTask, ramPerTask, kwargs, expected_processors, expected_memory):
|
|
393
455
|
ce = PoolComputingElement("TestPoolCE")
|
|
394
456
|
ce.processors = 16
|
|
457
|
+
ce.ram = 32000
|
|
395
458
|
|
|
396
459
|
if processorsPerTask:
|
|
397
460
|
ce.processorsPerTask = processorsPerTask
|
|
461
|
+
if ramPerTask:
|
|
462
|
+
ce.ramPerTask = ramPerTask
|
|
398
463
|
res = ce._getProcessorsForJobs(kwargs)
|
|
399
|
-
assert res ==
|
|
464
|
+
assert res == expected_processors
|
|
465
|
+
res = ce._getMemoryForJobs(kwargs)
|
|
466
|
+
assert res == expected_memory
|
|
@@ -53,6 +53,9 @@ except AttributeError:
|
|
|
53
53
|
MAX_SINGLE_STREAM_SIZE = 1024 * 1024 * 10 # 10MB
|
|
54
54
|
MIN_BANDWIDTH = 0.5 * (1024 * 1024) # 0.5 MB/s
|
|
55
55
|
|
|
56
|
+
# Default timeout for any stat like call
|
|
57
|
+
DEFAULT_OPERATION_TIMEOUT = 10
|
|
58
|
+
|
|
56
59
|
|
|
57
60
|
@contextmanager
|
|
58
61
|
def setGfalSetting(
|
|
@@ -169,6 +172,12 @@ class GFAL2_StorageBase(StorageBase):
|
|
|
169
172
|
# It is only useful for TPC
|
|
170
173
|
self.ctx.set_opt_boolean("HTTP PLUGIN", "RETRIEVE_BEARER_TOKEN", False)
|
|
171
174
|
|
|
175
|
+
# Set a global timeout for the operations
|
|
176
|
+
self.ctx.set_opt_integer("CORE", "NAMESPACE_TIMEOUT", DEFAULT_OPERATION_TIMEOUT)
|
|
177
|
+
# Because HTTP Plugin does not read the CORE:NAMESPACE_TIMEOUT as it should
|
|
178
|
+
# I also specify it here
|
|
179
|
+
self.ctx.set_opt_integer("HTTP PLUGIN", "OPERATION_TIMEOUT", DEFAULT_OPERATION_TIMEOUT)
|
|
180
|
+
|
|
172
181
|
# spaceToken used for copying from and to the storage element
|
|
173
182
|
self.spaceToken = parameters.get("SpaceToken", "")
|
|
174
183
|
# stageTimeout, default timeout to try and stage/pin a file
|
|
@@ -500,19 +500,18 @@ class TransformationAgent(AgentModule, TransformationAgentsUtilities):
|
|
|
500
500
|
startTime = time.time()
|
|
501
501
|
self._logInfo(f"Getting replicas for {len(newLFNs)} files from catalog", method=method, transID=transID)
|
|
502
502
|
newReplicas = {}
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
)
|
|
503
|
+
res = self._getDataReplicasDM(transID, newLFNs, clients, forJobs=forJobs)
|
|
504
|
+
if res["OK"]:
|
|
505
|
+
newReplicas = {lfn: ses for lfn, ses in res["Value"].items() if ses}
|
|
506
|
+
|
|
507
|
+
self.__updateCache(transID, newReplicas)
|
|
508
|
+
else:
|
|
509
|
+
self._logWarn(
|
|
510
|
+
f"Failed to get replicas for {len(newLFNs)} files",
|
|
511
|
+
res["Message"],
|
|
512
|
+
method=method,
|
|
513
|
+
transID=transID,
|
|
514
|
+
)
|
|
516
515
|
|
|
517
516
|
self._logInfo(
|
|
518
517
|
f"Obtained {len(newReplicas)} replicas from catalog in {time.time() - startTime:.1f} seconds",
|
|
@@ -115,16 +115,20 @@ class JobReport:
|
|
|
115
115
|
|
|
116
116
|
def commit(self):
|
|
117
117
|
"""Send all the accumulated information"""
|
|
118
|
+
messages = []
|
|
118
119
|
|
|
119
|
-
success = True
|
|
120
120
|
result = self.sendStoredStatusInfo()
|
|
121
|
-
|
|
121
|
+
if not result["OK"]:
|
|
122
|
+
messages.append(result["Message"])
|
|
122
123
|
result = self.sendStoredJobParameters()
|
|
123
|
-
|
|
124
|
+
if not result["OK"]:
|
|
125
|
+
messages.append(result["Message"])
|
|
124
126
|
|
|
125
|
-
if
|
|
126
|
-
|
|
127
|
-
|
|
127
|
+
if messages:
|
|
128
|
+
gLogger.warn("Some information could not be uploaded to JobStateUpdate service:", "; ".join(messages))
|
|
129
|
+
return S_ERROR("Information upload to JobStateUpdate service failed")
|
|
130
|
+
|
|
131
|
+
return S_OK()
|
|
128
132
|
|
|
129
133
|
def dump(self):
|
|
130
134
|
"""Print out the contents of the internal cached information"""
|
|
@@ -1,11 +1,16 @@
|
|
|
1
|
-
"""
|
|
2
|
-
|
|
1
|
+
"""This object is a wrapper for setting and getting jobs states"""
|
|
2
|
+
|
|
3
3
|
from DIRAC import S_ERROR, S_OK, gLogger
|
|
4
4
|
from DIRAC.WorkloadManagementSystem.Client import JobStatus
|
|
5
5
|
from DIRAC.WorkloadManagementSystem.Client.JobState.JobManifest import JobManifest
|
|
6
6
|
from DIRAC.WorkloadManagementSystem.DB.JobDB import JobDB
|
|
7
7
|
from DIRAC.WorkloadManagementSystem.DB.JobLoggingDB import JobLoggingDB
|
|
8
|
-
from DIRAC.WorkloadManagementSystem.DB.TaskQueueDB import
|
|
8
|
+
from DIRAC.WorkloadManagementSystem.DB.TaskQueueDB import (
|
|
9
|
+
TaskQueueDB,
|
|
10
|
+
multiValueDefFields,
|
|
11
|
+
singleValueDefFields,
|
|
12
|
+
rangeValueDefFields,
|
|
13
|
+
)
|
|
9
14
|
from DIRAC.WorkloadManagementSystem.Service.JobPolicy import (
|
|
10
15
|
RIGHT_CHANGE_STATUS,
|
|
11
16
|
RIGHT_GET_INFO,
|
|
@@ -351,6 +356,10 @@ class JobState:
|
|
|
351
356
|
if name in reqCfg:
|
|
352
357
|
jobReqDict[name] = reqCfg.getOption(name, [])
|
|
353
358
|
|
|
359
|
+
for name in rangeValueDefFields:
|
|
360
|
+
if name in reqCfg:
|
|
361
|
+
jobReqDict[name] = int(reqCfg[name])
|
|
362
|
+
|
|
354
363
|
jobPriority = reqCfg.getOption("UserPriority", 1)
|
|
355
364
|
|
|
356
365
|
result = self.__retryFunction(2, JobState.__db.tqDB.insertJob, (self.__jid, jobReqDict, jobPriority))
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Encapsulate here the logic for matching jobs
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Utilities and classes here are used by MatcherHandler
|
|
4
4
|
"""
|
|
5
|
+
|
|
5
6
|
import time
|
|
6
7
|
|
|
7
8
|
from DIRAC import convertToPy3VersionNumber, gLogger
|
|
@@ -16,7 +17,11 @@ from DIRAC.WorkloadManagementSystem.Client.Limiter import Limiter
|
|
|
16
17
|
from DIRAC.WorkloadManagementSystem.DB.JobDB import JobDB
|
|
17
18
|
from DIRAC.WorkloadManagementSystem.DB.JobLoggingDB import JobLoggingDB
|
|
18
19
|
from DIRAC.WorkloadManagementSystem.DB.PilotAgentsDB import PilotAgentsDB
|
|
19
|
-
from DIRAC.WorkloadManagementSystem.DB.TaskQueueDB import
|
|
20
|
+
from DIRAC.WorkloadManagementSystem.DB.TaskQueueDB import (
|
|
21
|
+
TaskQueueDB,
|
|
22
|
+
multiValueMatchFields,
|
|
23
|
+
singleValueDefFields,
|
|
24
|
+
)
|
|
20
25
|
|
|
21
26
|
|
|
22
27
|
class PilotVersionError(Exception):
|
|
@@ -68,14 +73,14 @@ class Matcher:
|
|
|
68
73
|
|
|
69
74
|
# Make a nice print of the resource matching parameters
|
|
70
75
|
toPrintDict = dict(resourceDict)
|
|
71
|
-
if "MaxRAM" in
|
|
72
|
-
toPrintDict["MaxRAM"] =
|
|
76
|
+
if "MaxRAM" in resourceDict:
|
|
77
|
+
toPrintDict["MaxRAM"] = resourceDict["MaxRAM"]
|
|
73
78
|
if "NumberOfProcessors" in resourceDescription:
|
|
74
79
|
toPrintDict["NumberOfProcessors"] = resourceDescription["NumberOfProcessors"]
|
|
75
80
|
toPrintDict["Tag"] = []
|
|
76
81
|
if "Tag" in resourceDict:
|
|
77
82
|
for tag in resourceDict["Tag"]:
|
|
78
|
-
if not tag.endswith("
|
|
83
|
+
if not tag.endswith("MB") and not tag.endswith("Processors"):
|
|
79
84
|
toPrintDict["Tag"].append(tag)
|
|
80
85
|
if not toPrintDict["Tag"]:
|
|
81
86
|
toPrintDict.pop("Tag")
|
|
@@ -166,11 +171,7 @@ class Matcher:
|
|
|
166
171
|
"""
|
|
167
172
|
|
|
168
173
|
resourceDict = {}
|
|
169
|
-
for name in singleValueDefFields:
|
|
170
|
-
if name in resourceDescription:
|
|
171
|
-
resourceDict[name] = resourceDescription[name]
|
|
172
|
-
|
|
173
|
-
for name in multiValueMatchFields:
|
|
174
|
+
for name in singleValueDefFields + multiValueMatchFields + ("MaxRAM",):
|
|
174
175
|
if name in resourceDescription:
|
|
175
176
|
resourceDict[name] = resourceDescription[name]
|
|
176
177
|
|
|
@@ -191,25 +192,18 @@ class Matcher:
|
|
|
191
192
|
if "JobID" in resourceDescription:
|
|
192
193
|
resourceDict["JobID"] = resourceDescription["JobID"]
|
|
193
194
|
|
|
194
|
-
# Convert
|
|
195
|
-
maxRAM = resourceDescription.get("MaxRAM")
|
|
196
|
-
if maxRAM:
|
|
197
|
-
try:
|
|
198
|
-
maxRAM = int(maxRAM / 1000)
|
|
199
|
-
except ValueError:
|
|
200
|
-
maxRAM = None
|
|
195
|
+
# Convert NumberOfProcessors parameters into a list of tags
|
|
201
196
|
nProcessors = resourceDescription.get("NumberOfProcessors")
|
|
202
197
|
if nProcessors:
|
|
203
198
|
try:
|
|
204
199
|
nProcessors = int(nProcessors)
|
|
205
200
|
except ValueError:
|
|
206
201
|
nProcessors = None
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
resourceDict.setdefault("Tag", []).extend(paramTags)
|
|
202
|
+
if nProcessors and nProcessors <= 1024:
|
|
203
|
+
paramList = list(range(1, nProcessors + 1, 1))
|
|
204
|
+
paramTags = ["%d%s" % (par, "Processors") for par in paramList]
|
|
205
|
+
if paramTags:
|
|
206
|
+
resourceDict.setdefault("Tag", []).extend(paramTags)
|
|
213
207
|
|
|
214
208
|
# Add 'MultiProcessor' to the list of tags
|
|
215
209
|
if nProcessors and nProcessors > 1:
|