DIRAC 9.0.13__py3-none-any.whl → 9.0.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. DIRAC/ConfigurationSystem/Client/CSAPI.py +11 -0
  2. DIRAC/Core/Utilities/CGroups2.py +1 -0
  3. DIRAC/Core/Utilities/ElasticSearchDB.py +1 -1
  4. DIRAC/Core/Utilities/MySQL.py +51 -25
  5. DIRAC/DataManagementSystem/Client/DataManager.py +7 -10
  6. DIRAC/DataManagementSystem/Client/FTS3Job.py +12 -3
  7. DIRAC/FrameworkSystem/Service/SystemAdministratorHandler.py +41 -11
  8. DIRAC/Interfaces/API/Dirac.py +12 -4
  9. DIRAC/Interfaces/API/Job.py +62 -17
  10. DIRAC/RequestManagementSystem/private/RequestTask.py +2 -1
  11. DIRAC/Resources/Catalog/FileCatalogClient.py +18 -7
  12. DIRAC/Resources/Catalog/Utilities.py +3 -3
  13. DIRAC/Resources/Computing/BatchSystems/SLURM.py +1 -1
  14. DIRAC/Resources/Computing/BatchSystems/TimeLeft/TimeLeft.py +3 -1
  15. DIRAC/Resources/Computing/ComputingElement.py +39 -34
  16. DIRAC/Resources/Computing/InProcessComputingElement.py +20 -7
  17. DIRAC/Resources/Computing/PoolComputingElement.py +76 -37
  18. DIRAC/Resources/Computing/SingularityComputingElement.py +19 -9
  19. DIRAC/Resources/Computing/test/Test_InProcessComputingElement.py +69 -8
  20. DIRAC/Resources/Computing/test/Test_PoolComputingElement.py +102 -35
  21. DIRAC/Resources/Storage/GFAL2_StorageBase.py +9 -0
  22. DIRAC/TransformationSystem/Agent/TransformationAgent.py +12 -13
  23. DIRAC/WorkloadManagementSystem/Agent/JobCleaningAgent.py +1 -1
  24. DIRAC/WorkloadManagementSystem/Agent/PilotSyncAgent.py +4 -3
  25. DIRAC/WorkloadManagementSystem/Agent/StalledJobAgent.py +1 -1
  26. DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_JobAgent.py +4 -3
  27. DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_PilotLoggingAgent.py +3 -3
  28. DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_PilotStatusAgent.py +4 -2
  29. DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_PushJobAgent.py +5 -4
  30. DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_StalledJobAgent.py +4 -2
  31. DIRAC/WorkloadManagementSystem/Client/JobReport.py +10 -6
  32. DIRAC/WorkloadManagementSystem/Client/JobState/JobState.py +12 -3
  33. DIRAC/WorkloadManagementSystem/Client/Matcher.py +18 -24
  34. DIRAC/WorkloadManagementSystem/DB/TaskQueueDB.py +137 -7
  35. DIRAC/WorkloadManagementSystem/Executor/JobScheduling.py +8 -14
  36. DIRAC/WorkloadManagementSystem/Executor/test/Test_Executor.py +3 -5
  37. DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapper.py +4 -5
  38. DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapperOfflineTemplate.py +1 -1
  39. DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapperTemplate.py +1 -2
  40. DIRAC/WorkloadManagementSystem/JobWrapper/test/Test_JobWrapper.py +1 -1
  41. DIRAC/WorkloadManagementSystem/Utilities/JobParameters.py +81 -2
  42. DIRAC/WorkloadManagementSystem/Utilities/QueueUtilities.py +5 -5
  43. DIRAC/WorkloadManagementSystem/Utilities/RemoteRunner.py +2 -1
  44. DIRAC/WorkloadManagementSystem/Utilities/test/Test_RemoteRunner.py +7 -3
  45. DIRAC/WorkloadManagementSystem/scripts/dirac_wms_get_wn_parameters.py +3 -3
  46. DIRAC/__init__.py +1 -1
  47. DIRAC/tests/Utilities/testJobDefinitions.py +57 -20
  48. {dirac-9.0.13.dist-info → dirac-9.0.15.dist-info}/METADATA +2 -2
  49. {dirac-9.0.13.dist-info → dirac-9.0.15.dist-info}/RECORD +53 -53
  50. {dirac-9.0.13.dist-info → dirac-9.0.15.dist-info}/WHEEL +0 -0
  51. {dirac-9.0.13.dist-info → dirac-9.0.15.dist-info}/entry_points.txt +0 -0
  52. {dirac-9.0.13.dist-info → dirac-9.0.15.dist-info}/licenses/LICENSE +0 -0
  53. {dirac-9.0.13.dist-info → dirac-9.0.15.dist-info}/top_level.txt +0 -0
@@ -2,6 +2,7 @@
2
2
  """
3
3
  tests for PoolComputingElement module
4
4
  """
5
+
5
6
  import os
6
7
  import time
7
8
 
@@ -50,7 +51,7 @@ def _stopJob(nJob):
50
51
 
51
52
  @pytest.fixture
52
53
  def createAndDelete():
53
- for i in range(6):
54
+ for i in range(9):
54
55
  with open(f"testPoolCEJob_{i}.py", "w") as execFile:
55
56
  execFile.write(jobScript % i)
56
57
  os.chmod(f"testPoolCEJob_{i}.py", 0o755)
@@ -66,24 +67,28 @@ def createAndDelete():
66
67
  time.sleep(0.5)
67
68
 
68
69
  # stopping the jobs
69
- for i in range(6):
70
+ for i in range(9):
70
71
  _stopJob(i)
71
72
 
72
73
  # removing testPoolCEJob files
73
74
  # this will also stop the futures unless they are already stopped!
74
- for i in range(6):
75
+ for i in range(9):
75
76
  try:
76
77
  os.remove(f"testPoolCEJob_{i}.py")
77
- os.remove("testBadPoolCEJob.py")
78
78
  except OSError:
79
79
  pass
80
80
 
81
+ try:
82
+ os.remove("testBadPoolCEJob.py")
83
+ except OSError:
84
+ pass
85
+
81
86
 
82
87
  @pytest.mark.slow
83
88
  def test_submit_and_shutdown(createAndDelete):
84
89
  time.sleep(0.5)
85
90
 
86
- ceParameters = {"WholeNode": True, "NumberOfProcessors": 4}
91
+ ceParameters = {"WholeNode": True, "NumberOfProcessors": 4, "MaxRAM": 3800}
87
92
  ce = PoolComputingElement("TestPoolCE")
88
93
  ce.setParameters(ceParameters)
89
94
 
@@ -145,7 +150,7 @@ def test_executeJob_wholeNode4(createAndDelete):
145
150
  time.sleep(0.5)
146
151
  taskIDs = {}
147
152
 
148
- ceParameters = {"WholeNode": True, "NumberOfProcessors": 4}
153
+ ceParameters = {"WholeNode": True, "NumberOfProcessors": 4, "MaxRAM": 16000}
149
154
  ce = PoolComputingElement("TestPoolCE")
150
155
  ce.setParameters(ceParameters)
151
156
 
@@ -159,9 +164,11 @@ def test_executeJob_wholeNode4(createAndDelete):
159
164
  result = ce.getCEStatus()
160
165
  assert result["UsedProcessors"] == 1
161
166
  assert result["AvailableProcessors"] == 3
167
+ assert result["UsedRAM"] == 0
168
+ assert result["AvailableRAM"] == 16000
162
169
  assert result["RunningJobs"] == 1
163
170
 
164
- jobParams = {"mpTag": True, "numberOfProcessors": 2}
171
+ jobParams = {"mpTag": True, "numberOfProcessors": 2, "MaxRAM": 4000}
165
172
  result = ce.submitJob("testPoolCEJob_1.py", None, **jobParams)
166
173
  assert result["OK"] is True
167
174
  taskID = result["Value"]
@@ -171,6 +178,9 @@ def test_executeJob_wholeNode4(createAndDelete):
171
178
  result = ce.getCEStatus()
172
179
  assert result["UsedProcessors"] == 3
173
180
  assert result["AvailableProcessors"] == 1
181
+ assert result["UsedRAM"] == 4000
182
+ assert result["AvailableRAM"] == 12000
183
+
174
184
  assert result["RunningJobs"] == 2
175
185
 
176
186
  # now trying again would fail
@@ -190,13 +200,20 @@ def test_executeJob_wholeNode4(createAndDelete):
190
200
 
191
201
 
192
202
  @pytest.mark.slow
193
- def test_executeJob_wholeNode8(createAndDelete):
203
+ @pytest.mark.parametrize(
204
+ "ce_parameters",
205
+ [
206
+ ({"NumberOfProcessors": 8}),
207
+ ({"NumberOfProcessors": 8, "MaxRAM": 32000}),
208
+ ({"WholeNode": True, "NumberOfProcessors": 8, "MaxRAM": 32000}),
209
+ ],
210
+ )
211
+ def test_executeJob_wholeNode8(createAndDelete, ce_parameters):
194
212
  time.sleep(0.5)
195
213
  taskIDs = {}
196
214
 
197
- ceParameters = {"WholeNode": True, "NumberOfProcessors": 8}
198
215
  ce = PoolComputingElement("TestPoolCE")
199
- ce.setParameters(ceParameters)
216
+ ce.setParameters(ce_parameters)
200
217
 
201
218
  jobParams = {"mpTag": True, "numberOfProcessors": 2, "maxNumberOfProcessors": 2}
202
219
  result = ce.submitJob("testPoolCEJob_2.py", None, **jobParams)
@@ -207,6 +224,8 @@ def test_executeJob_wholeNode8(createAndDelete):
207
224
 
208
225
  result = ce.getCEStatus()
209
226
  assert result["UsedProcessors"] == 2
227
+ assert result["UsedRAM"] == 0
228
+ assert result["AvailableRAM"] == ce_parameters.get("MaxRAM", 0)
210
229
 
211
230
  jobParams = {"mpTag": True, "numberOfProcessors": 1, "maxNumberOfProcessors": 3}
212
231
  result = ce.submitJob("testPoolCEJob_3.py", None, **jobParams)
@@ -217,8 +236,10 @@ def test_executeJob_wholeNode8(createAndDelete):
217
236
 
218
237
  result = ce.getCEStatus()
219
238
  assert result["UsedProcessors"] == 5
239
+ assert result["UsedRAM"] == 0
240
+ assert result["AvailableRAM"] == ce_parameters.get("MaxRAM", 0)
220
241
 
221
- jobParams = {"numberOfProcessors": 2} # This is same as asking for SP
242
+ jobParams = {"numberOfProcessors": 2, "MinRAM": 4000, "MaxRAM": 8000} # This is same as asking for SP
222
243
  result = ce.submitJob("testPoolCEJob_4.py", None, **jobParams)
223
244
  assert result["OK"] is True
224
245
  taskID = result["Value"]
@@ -227,39 +248,72 @@ def test_executeJob_wholeNode8(createAndDelete):
227
248
 
228
249
  result = ce.getCEStatus()
229
250
  assert result["UsedProcessors"] == 6
251
+ assert result["UsedRAM"] == 8000
252
+ assert result["AvailableRAM"] == (
253
+ ce_parameters.get("MaxRAM") - result["UsedRAM"] if ce_parameters.get("MaxRAM") else 0
254
+ )
230
255
 
231
- # now trying again would fail
232
- jobParams = {"mpTag": True, "numberOfProcessors": 3}
256
+ jobParams = {"MinRAM": 8000, "MaxRAM": 8000} # This is same as asking for SP
233
257
  result = ce.submitJob("testPoolCEJob_5.py", None, **jobParams)
234
258
  assert result["OK"] is True
235
259
  taskID = result["Value"]
236
260
  assert taskID == 3
261
+ taskIDs[taskID] = True
262
+
263
+ result = ce.getCEStatus()
264
+ assert result["UsedProcessors"] == 7
265
+ assert result["UsedRAM"] == 16000
266
+ assert result["AvailableRAM"] == (
267
+ ce_parameters.get("MaxRAM") - result["UsedRAM"] if ce_parameters.get("MaxRAM") else 0
268
+ )
269
+
270
+ jobParams = {"MaxRAM": 24000} # This will fail for the case when the ce have set a RAM
271
+ result = ce.submitJob("testPoolCEJob_6.py", None, **jobParams)
272
+ assert result["OK"] is True
273
+ taskID = result["Value"]
274
+ assert taskID == 4
275
+ if ce_parameters.get("MaxRAM"):
276
+ assert ce.taskResults[taskID]["OK"] is False
277
+
278
+ result = ce.getCEStatus()
279
+ assert result["UsedProcessors"] == 7 if ce_parameters.get("MaxRAM") else 8
280
+ assert result["UsedRAM"] == 16000 if ce_parameters.get("MaxRAM") else 40000
281
+ assert result["AvailableRAM"] == (
282
+ ce_parameters.get("MaxRAM") - result["UsedRAM"] if ce_parameters.get("MaxRAM") else 0
283
+ )
284
+
285
+ # now trying again would fail
286
+ jobParams = {"mpTag": True, "numberOfProcessors": 3}
287
+ result = ce.submitJob("testPoolCEJob_7.py", None, **jobParams)
288
+ assert result["OK"] is True
289
+ taskID = result["Value"]
290
+ assert taskID == 5
237
291
  taskIDs[taskID] = False
238
292
 
239
293
  # waiting and submit again
240
294
  while len(ce.taskResults) < 2:
241
295
  time.sleep(0.1)
242
296
 
243
- jobParams = {"mpTag": True, "numberOfProcessors": 3}
244
- result = ce.submitJob("testPoolCEJob_5.py", None, **jobParams)
297
+ jobParams = {"mpTag": True, "numberOfProcessors": 1}
298
+ result = ce.submitJob("testPoolCEJob_8.py", None, **jobParams)
245
299
  assert result["OK"] is True
246
300
  taskID = result["Value"]
247
- assert taskID == 4
301
+ assert taskID == 6
248
302
  taskIDs[taskID] = True
249
303
 
250
304
  result = ce.shutdown()
251
305
  assert result["OK"] is True
252
306
  assert isinstance(result["Value"], dict)
253
- assert len(result["Value"]) == 5
307
+ assert len(result["Value"]) == 7
254
308
 
255
- while len(ce.taskResults) < 5:
309
+ while len(ce.taskResults) < 7:
256
310
  time.sleep(0.1)
257
311
 
258
312
  for taskID, expectedResult in taskIDs.items():
259
313
  submissionResult = ce.taskResults[taskID]
260
314
  assert submissionResult["OK"] is expectedResult
261
315
  if not submissionResult["OK"]:
262
- assert "Not enough processors" in submissionResult["Message"]
316
+ assert submissionResult["Message"] in ["Not enough processors for the job", "Not enough memory for the job"]
263
317
 
264
318
 
265
319
  @pytest.mark.slow
@@ -372,28 +426,41 @@ def test_executeJob_WholeNodeJobs(createAndDelete):
372
426
 
373
427
 
374
428
  @pytest.mark.parametrize(
375
- "processorsPerTask, kwargs, expected",
429
+ "processorsPerTask, ramPerTask, kwargs, expected_processors, expected_memory",
376
430
  [
377
- (None, {}, 1),
378
- (None, {"mpTag": False}, 1),
379
- (None, {"mpTag": True}, 1),
380
- (None, {"mpTag": True, "wholeNode": True}, 16),
381
- (None, {"mpTag": True, "wholeNode": False}, 1),
382
- (None, {"mpTag": True, "numberOfProcessors": 4}, 4),
383
- (None, {"mpTag": True, "numberOfProcessors": 4, "maxNumberOfProcessors": 8}, 8),
384
- (None, {"mpTag": True, "numberOfProcessors": 4, "maxNumberOfProcessors": 32}, 16),
385
- ({1: 4}, {"mpTag": True, "wholeNode": True}, 0),
386
- ({1: 4}, {"mpTag": True, "wholeNode": False}, 1),
387
- ({1: 4}, {"mpTag": True, "numberOfProcessors": 2}, 2),
388
- ({1: 4}, {"mpTag": True, "maxNumberOfProcessors": 2}, 2),
389
- ({1: 4}, {"mpTag": True, "maxNumberOfProcessors": 16}, 12),
431
+ (None, None, {}, 1, 0),
432
+ (None, None, {"mpTag": False}, 1, 0),
433
+ (None, None, {"mpTag": True, "MaxRAM": 8000}, 1, 8000),
434
+ (None, None, {"mpTag": True, "wholeNode": True}, 16, 0),
435
+ (None, None, {"mpTag": True, "wholeNode": False}, 1, 0),
436
+ (None, None, {"mpTag": True, "numberOfProcessors": 4, "MinRAM": 2000}, 4, 2000),
437
+ (None, None, {"mpTag": True, "numberOfProcessors": 4, "MaxRAM": 4000}, 4, 4000),
438
+ (None, None, {"mpTag": True, "numberOfProcessors": 4, "MaxRAM": 36000}, 4, None),
439
+ (None, None, {"mpTag": True, "numberOfProcessors": 4, "MinRAM": 2000, "MaxRAM": 4000}, 4, 4000),
440
+ (None, None, {"mpTag": True, "numberOfProcessors": 4, "maxNumberOfProcessors": 8}, 8, 0),
441
+ (None, None, {"mpTag": True, "numberOfProcessors": 4, "maxNumberOfProcessors": 32}, 16, 0),
442
+ ({1: 4}, {1: 4000}, {"mpTag": True, "wholeNode": True}, 0, 0),
443
+ ({1: 4}, {1: 4000}, {"mpTag": True, "wholeNode": False}, 1, 0),
444
+ ({1: 4}, {1: 4000}, {"mpTag": True, "numberOfProcessors": 2, "MinRAM": 8000}, 2, 8000),
445
+ ({1: 4}, {1: 4000}, {"mpTag": True, "numberOfProcessors": 16, "MinRAM": 8000, "MaxRAM": 12000}, 0, 12000),
446
+ ({1: 4}, {1: 4000}, {"mpTag": True, "maxNumberOfProcessors": 2, "MaxRAM": 16000}, 2, 16000),
447
+ ({1: 4}, {1: 4000}, {"mpTag": True, "numberOfProcessors": 2, "MaxRAM": 8000}, 2, 8000),
448
+ ({1: 4}, {1: 4000}, {"mpTag": True, "maxNumberOfProcessors": 16, "MaxRAM": 32000}, 12, None),
449
+ ({1: 4, 2: 8}, {1: 4000}, {"mpTag": True, "numberOfProcessors": 2}, 2, 0),
450
+ ({1: 4, 2: 8}, {1: 4000}, {"mpTag": True, "numberOfProcessors": 4}, 4, 0),
451
+ ({1: 4, 2: 8, 3: 8}, {1: 4000}, {"mpTag": True, "numberOfProcessors": 4}, 0, 0),
390
452
  ],
391
453
  )
392
- def test__getProcessorsForJobs(processorsPerTask, kwargs, expected):
454
+ def test__getLimitsForJobs(processorsPerTask, ramPerTask, kwargs, expected_processors, expected_memory):
393
455
  ce = PoolComputingElement("TestPoolCE")
394
456
  ce.processors = 16
457
+ ce.ram = 32000
395
458
 
396
459
  if processorsPerTask:
397
460
  ce.processorsPerTask = processorsPerTask
461
+ if ramPerTask:
462
+ ce.ramPerTask = ramPerTask
398
463
  res = ce._getProcessorsForJobs(kwargs)
399
- assert res == expected
464
+ assert res == expected_processors
465
+ res = ce._getMemoryForJobs(kwargs)
466
+ assert res == expected_memory
@@ -53,6 +53,9 @@ except AttributeError:
53
53
  MAX_SINGLE_STREAM_SIZE = 1024 * 1024 * 10 # 10MB
54
54
  MIN_BANDWIDTH = 0.5 * (1024 * 1024) # 0.5 MB/s
55
55
 
56
+ # Default timeout for any stat like call
57
+ DEFAULT_OPERATION_TIMEOUT = 10
58
+
56
59
 
57
60
  @contextmanager
58
61
  def setGfalSetting(
@@ -169,6 +172,12 @@ class GFAL2_StorageBase(StorageBase):
169
172
  # It is only useful for TPC
170
173
  self.ctx.set_opt_boolean("HTTP PLUGIN", "RETRIEVE_BEARER_TOKEN", False)
171
174
 
175
+ # Set a global timeout for the operations
176
+ self.ctx.set_opt_integer("CORE", "NAMESPACE_TIMEOUT", DEFAULT_OPERATION_TIMEOUT)
177
+ # Because HTTP Plugin does not read the CORE:NAMESPACE_TIMEOUT as it should
178
+ # I also specify it here
179
+ self.ctx.set_opt_integer("HTTP PLUGIN", "OPERATION_TIMEOUT", DEFAULT_OPERATION_TIMEOUT)
180
+
172
181
  # spaceToken used for copying from and to the storage element
173
182
  self.spaceToken = parameters.get("SpaceToken", "")
174
183
  # stageTimeout, default timeout to try and stage/pin a file
@@ -500,19 +500,18 @@ class TransformationAgent(AgentModule, TransformationAgentsUtilities):
500
500
  startTime = time.time()
501
501
  self._logInfo(f"Getting replicas for {len(newLFNs)} files from catalog", method=method, transID=transID)
502
502
  newReplicas = {}
503
- for chunk in breakListIntoChunks(newLFNs, 10000):
504
- res = self._getDataReplicasDM(transID, chunk, clients, forJobs=forJobs)
505
- if res["OK"]:
506
- reps = {lfn: ses for lfn, ses in res["Value"].items() if ses}
507
- newReplicas.update(reps)
508
- self.__updateCache(transID, reps)
509
- else:
510
- self._logWarn(
511
- f"Failed to get replicas for {len(chunk)} files",
512
- res["Message"],
513
- method=method,
514
- transID=transID,
515
- )
503
+ res = self._getDataReplicasDM(transID, newLFNs, clients, forJobs=forJobs)
504
+ if res["OK"]:
505
+ newReplicas = {lfn: ses for lfn, ses in res["Value"].items() if ses}
506
+
507
+ self.__updateCache(transID, newReplicas)
508
+ else:
509
+ self._logWarn(
510
+ f"Failed to get replicas for {len(newLFNs)} files",
511
+ res["Message"],
512
+ method=method,
513
+ transID=transID,
514
+ )
516
515
 
517
516
  self._logInfo(
518
517
  f"Obtained {len(newReplicas)} replicas from catalog in {time.time() - startTime:.1f} seconds",
@@ -38,8 +38,8 @@ from DIRAC.WorkloadManagementSystem.Client import JobStatus
38
38
  from DIRAC.WorkloadManagementSystem.Client.WMSClient import WMSClient
39
39
  from DIRAC.WorkloadManagementSystem.DB.JobDB import JobDB
40
40
  from DIRAC.WorkloadManagementSystem.DB.SandboxMetadataDB import SandboxMetadataDB
41
- from DIRAC.WorkloadManagementSystem.Service.JobPolicy import RIGHT_DELETE
42
41
  from DIRAC.WorkloadManagementSystem.DB.StatusUtils import kill_delete_jobs
42
+ from DIRAC.WorkloadManagementSystem.Service.JobPolicy import RIGHT_DELETE
43
43
  from DIRAC.WorkloadManagementSystem.Utilities.JobParameters import getJobParameters
44
44
 
45
45
 
@@ -8,15 +8,16 @@
8
8
 
9
9
  """
10
10
 
11
- import os
11
+ import hashlib
12
12
  import json
13
+ import os
13
14
  import shutil
14
- import hashlib
15
+
15
16
  import requests
16
17
 
17
18
  from DIRAC import S_OK
18
19
  from DIRAC.Core.Base.AgentModule import AgentModule
19
- from DIRAC.Core.Security.Locations import getHostCertificateAndKeyLocation, getCAsLocation
20
+ from DIRAC.Core.Security.Locations import getCAsLocation, getHostCertificateAndKeyLocation
20
21
  from DIRAC.DataManagementSystem.Client.DataManager import DataManager
21
22
  from DIRAC.WorkloadManagementSystem.Utilities.PilotCStoJSONSynchronizer import PilotCStoJSONSynchronizer
22
23
 
@@ -20,8 +20,8 @@ from DIRAC.Core.Utilities.ClassAd.ClassAdLight import ClassAd
20
20
  from DIRAC.Core.Utilities.ObjectLoader import ObjectLoader
21
21
  from DIRAC.Core.Utilities.TimeUtilities import fromString, second, toEpoch
22
22
  from DIRAC.WorkloadManagementSystem.Client import JobMinorStatus, JobStatus
23
- from DIRAC.WorkloadManagementSystem.Service.JobPolicy import RIGHT_KILL
24
23
  from DIRAC.WorkloadManagementSystem.DB.StatusUtils import kill_delete_jobs
24
+ from DIRAC.WorkloadManagementSystem.Service.JobPolicy import RIGHT_KILL
25
25
  from DIRAC.WorkloadManagementSystem.Utilities.JobParameters import getJobParameters
26
26
  from DIRAC.WorkloadManagementSystem.Utilities.Utils import rescheduleJobs
27
27
 
@@ -2,14 +2,15 @@
2
2
  """
3
3
  import multiprocessing
4
4
  import os
5
- from pathlib import Path
6
- import pytest
7
5
  import time
8
6
  from concurrent.futures import ProcessPoolExecutor
9
7
  from functools import partial
8
+ from pathlib import Path
10
9
 
11
- from DIRAC import gLogger, S_OK, S_ERROR
10
+ import pytest
12
11
  from DIRAC.Core.Security.X509Chain import X509Chain # pylint: disable=import-error
12
+
13
+ from DIRAC import S_ERROR, S_OK, gLogger
13
14
  from DIRAC.Resources.Computing.BatchSystems.TimeLeft.TimeLeft import TimeLeft
14
15
  from DIRAC.Resources.Computing.ComputingElementFactory import ComputingElementFactory
15
16
  from DIRAC.Resources.Computing.test.Test_PoolComputingElement import badJobScript, jobScript
@@ -1,16 +1,16 @@
1
1
  """ Test class for PilotLoggingAgent Agent
2
2
  """
3
3
  import os
4
- import time
5
4
  import tempfile
5
+ import time
6
+ from unittest.mock import MagicMock, patch
6
7
 
7
8
  import pytest
8
- from unittest.mock import MagicMock, patch
9
9
 
10
10
  # DIRAC Components
11
11
  import DIRAC.WorkloadManagementSystem.Agent.PilotLoggingAgent as plaModule
12
+ from DIRAC import S_ERROR, S_OK, gConfig, gLogger
12
13
  from DIRAC.WorkloadManagementSystem.Agent.PilotLoggingAgent import PilotLoggingAgent
13
- from DIRAC import gLogger, gConfig, S_OK, S_ERROR
14
14
 
15
15
  gLogger.setLevel("DEBUG")
16
16
 
@@ -1,11 +1,13 @@
1
1
  """ Test class for Pilot Status Agent
2
2
  """
3
- import pytest
4
3
  from unittest.mock import MagicMock
5
4
 
5
+ import pytest
6
+
7
+ from DIRAC import S_OK, gLogger
8
+
6
9
  # DIRAC Components
7
10
  from DIRAC.WorkloadManagementSystem.Agent.PilotStatusAgent import PilotStatusAgent
8
- from DIRAC import gLogger, S_OK
9
11
 
10
12
  # Mock objects
11
13
  mockReply = MagicMock()
@@ -3,18 +3,19 @@
3
3
 
4
4
  # imports
5
5
  import os
6
- from pathlib import Path
7
6
  import shutil
7
+ from collections import defaultdict
8
+ from pathlib import Path
8
9
  from unittest.mock import Mock
10
+
9
11
  import pytest
10
- from collections import defaultdict
12
+
13
+ from DIRAC import S_ERROR, S_OK, gLogger
11
14
 
12
15
  # DIRAC Components
13
16
  from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations
14
17
  from DIRAC.WorkloadManagementSystem.Agent.PushJobAgent import PushJobAgent
15
18
  from DIRAC.WorkloadManagementSystem.Agent.test.Test_Agent_SiteDirector import config
16
-
17
- from DIRAC import gLogger, S_OK, S_ERROR
18
19
  from DIRAC.WorkloadManagementSystem.Client import JobMinorStatus
19
20
  from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport
20
21
 
@@ -1,11 +1,13 @@
1
1
  """ Test class for Stalled Job Agent
2
2
  """
3
- import pytest
4
3
  from unittest.mock import MagicMock
5
4
 
5
+ import pytest
6
+
7
+ from DIRAC import gLogger
8
+
6
9
  # DIRAC Components
7
10
  from DIRAC.WorkloadManagementSystem.Agent.StalledJobAgent import StalledJobAgent
8
- from DIRAC import gLogger
9
11
 
10
12
  # Mock Objects
11
13
  mockAM = MagicMock()
@@ -115,16 +115,20 @@ class JobReport:
115
115
 
116
116
  def commit(self):
117
117
  """Send all the accumulated information"""
118
+ messages = []
118
119
 
119
- success = True
120
120
  result = self.sendStoredStatusInfo()
121
- success &= result["OK"]
121
+ if not result["OK"]:
122
+ messages.append(result["Message"])
122
123
  result = self.sendStoredJobParameters()
123
- success &= result["OK"]
124
+ if not result["OK"]:
125
+ messages.append(result["Message"])
124
126
 
125
- if success:
126
- return S_OK()
127
- return S_ERROR("Information upload to JobStateUpdate service failed")
127
+ if messages:
128
+ gLogger.warn("Some information could not be uploaded to JobStateUpdate service:", "; ".join(messages))
129
+ return S_ERROR("Information upload to JobStateUpdate service failed")
130
+
131
+ return S_OK()
128
132
 
129
133
  def dump(self):
130
134
  """Print out the contents of the internal cached information"""
@@ -1,11 +1,16 @@
1
- """ This object is a wrapper for setting and getting jobs states
2
- """
1
+ """This object is a wrapper for setting and getting jobs states"""
2
+
3
3
  from DIRAC import S_ERROR, S_OK, gLogger
4
4
  from DIRAC.WorkloadManagementSystem.Client import JobStatus
5
5
  from DIRAC.WorkloadManagementSystem.Client.JobState.JobManifest import JobManifest
6
6
  from DIRAC.WorkloadManagementSystem.DB.JobDB import JobDB
7
7
  from DIRAC.WorkloadManagementSystem.DB.JobLoggingDB import JobLoggingDB
8
- from DIRAC.WorkloadManagementSystem.DB.TaskQueueDB import TaskQueueDB, multiValueDefFields, singleValueDefFields
8
+ from DIRAC.WorkloadManagementSystem.DB.TaskQueueDB import (
9
+ TaskQueueDB,
10
+ multiValueDefFields,
11
+ singleValueDefFields,
12
+ rangeValueDefFields,
13
+ )
9
14
  from DIRAC.WorkloadManagementSystem.Service.JobPolicy import (
10
15
  RIGHT_CHANGE_STATUS,
11
16
  RIGHT_GET_INFO,
@@ -351,6 +356,10 @@ class JobState:
351
356
  if name in reqCfg:
352
357
  jobReqDict[name] = reqCfg.getOption(name, [])
353
358
 
359
+ for name in rangeValueDefFields:
360
+ if name in reqCfg:
361
+ jobReqDict[name] = int(reqCfg[name])
362
+
354
363
  jobPriority = reqCfg.getOption("UserPriority", 1)
355
364
 
356
365
  result = self.__retryFunction(2, JobState.__db.tqDB.insertJob, (self.__jid, jobReqDict, jobPriority))
@@ -1,7 +1,8 @@
1
- """ Encapsulate here the logic for matching jobs
1
+ """Encapsulate here the logic for matching jobs
2
2
 
3
- Utilities and classes here are used by MatcherHandler
3
+ Utilities and classes here are used by MatcherHandler
4
4
  """
5
+
5
6
  import time
6
7
 
7
8
  from DIRAC import convertToPy3VersionNumber, gLogger
@@ -16,7 +17,11 @@ from DIRAC.WorkloadManagementSystem.Client.Limiter import Limiter
16
17
  from DIRAC.WorkloadManagementSystem.DB.JobDB import JobDB
17
18
  from DIRAC.WorkloadManagementSystem.DB.JobLoggingDB import JobLoggingDB
18
19
  from DIRAC.WorkloadManagementSystem.DB.PilotAgentsDB import PilotAgentsDB
19
- from DIRAC.WorkloadManagementSystem.DB.TaskQueueDB import TaskQueueDB, multiValueMatchFields, singleValueDefFields
20
+ from DIRAC.WorkloadManagementSystem.DB.TaskQueueDB import (
21
+ TaskQueueDB,
22
+ multiValueMatchFields,
23
+ singleValueDefFields,
24
+ )
20
25
 
21
26
 
22
27
  class PilotVersionError(Exception):
@@ -68,14 +73,14 @@ class Matcher:
68
73
 
69
74
  # Make a nice print of the resource matching parameters
70
75
  toPrintDict = dict(resourceDict)
71
- if "MaxRAM" in resourceDescription:
72
- toPrintDict["MaxRAM"] = resourceDescription["MaxRAM"]
76
+ if "MaxRAM" in resourceDict:
77
+ toPrintDict["MaxRAM"] = resourceDict["MaxRAM"]
73
78
  if "NumberOfProcessors" in resourceDescription:
74
79
  toPrintDict["NumberOfProcessors"] = resourceDescription["NumberOfProcessors"]
75
80
  toPrintDict["Tag"] = []
76
81
  if "Tag" in resourceDict:
77
82
  for tag in resourceDict["Tag"]:
78
- if not tag.endswith("GB") and not tag.endswith("Processors"):
83
+ if not tag.endswith("MB") and not tag.endswith("Processors"):
79
84
  toPrintDict["Tag"].append(tag)
80
85
  if not toPrintDict["Tag"]:
81
86
  toPrintDict.pop("Tag")
@@ -166,11 +171,7 @@ class Matcher:
166
171
  """
167
172
 
168
173
  resourceDict = {}
169
- for name in singleValueDefFields:
170
- if name in resourceDescription:
171
- resourceDict[name] = resourceDescription[name]
172
-
173
- for name in multiValueMatchFields:
174
+ for name in singleValueDefFields + multiValueMatchFields + ("MaxRAM",):
174
175
  if name in resourceDescription:
175
176
  resourceDict[name] = resourceDescription[name]
176
177
 
@@ -191,25 +192,18 @@ class Matcher:
191
192
  if "JobID" in resourceDescription:
192
193
  resourceDict["JobID"] = resourceDescription["JobID"]
193
194
 
194
- # Convert MaxRAM and NumberOfProcessors parameters into a list of tags
195
- maxRAM = resourceDescription.get("MaxRAM")
196
- if maxRAM:
197
- try:
198
- maxRAM = int(maxRAM / 1000)
199
- except ValueError:
200
- maxRAM = None
195
+ # Convert NumberOfProcessors parameters into a list of tags
201
196
  nProcessors = resourceDescription.get("NumberOfProcessors")
202
197
  if nProcessors:
203
198
  try:
204
199
  nProcessors = int(nProcessors)
205
200
  except ValueError:
206
201
  nProcessors = None
207
- for param, key in [(maxRAM, "GB"), (nProcessors, "Processors")]:
208
- if param and param <= 1024:
209
- paramList = list(range(2, param + 1))
210
- paramTags = ["%d%s" % (par, key) for par in paramList]
211
- if paramTags:
212
- resourceDict.setdefault("Tag", []).extend(paramTags)
202
+ if nProcessors and nProcessors <= 1024:
203
+ paramList = list(range(1, nProcessors + 1, 1))
204
+ paramTags = ["%d%s" % (par, "Processors") for par in paramList]
205
+ if paramTags:
206
+ resourceDict.setdefault("Tag", []).extend(paramTags)
213
207
 
214
208
  # Add 'MultiProcessor' to the list of tags
215
209
  if nProcessors and nProcessors > 1: