DIRAC 9.0.0a54__py3-none-any.whl → 9.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. DIRAC/AccountingSystem/Client/AccountingCLI.py +0 -140
  2. DIRAC/AccountingSystem/Client/DataStoreClient.py +0 -13
  3. DIRAC/AccountingSystem/Client/Types/BaseAccountingType.py +0 -7
  4. DIRAC/AccountingSystem/ConfigTemplate.cfg +0 -5
  5. DIRAC/AccountingSystem/Service/DataStoreHandler.py +0 -72
  6. DIRAC/ConfigurationSystem/Client/Helpers/CSGlobals.py +0 -9
  7. DIRAC/ConfigurationSystem/Client/Helpers/Registry.py +34 -32
  8. DIRAC/ConfigurationSystem/Client/Helpers/Resources.py +11 -43
  9. DIRAC/ConfigurationSystem/Client/Helpers/test/Test_Helpers.py +0 -16
  10. DIRAC/ConfigurationSystem/Client/LocalConfiguration.py +14 -8
  11. DIRAC/ConfigurationSystem/Client/PathFinder.py +47 -8
  12. DIRAC/ConfigurationSystem/Client/SyncPlugins/CERNLDAPSyncPlugin.py +4 -1
  13. DIRAC/ConfigurationSystem/Client/VOMS2CSSynchronizer.py +9 -2
  14. DIRAC/ConfigurationSystem/Client/test/Test_PathFinder.py +41 -1
  15. DIRAC/ConfigurationSystem/private/RefresherBase.py +4 -2
  16. DIRAC/Core/DISET/ServiceReactor.py +11 -3
  17. DIRAC/Core/DISET/private/BaseClient.py +1 -2
  18. DIRAC/Core/DISET/private/Transports/M2SSLTransport.py +9 -7
  19. DIRAC/Core/Security/DiracX.py +12 -7
  20. DIRAC/Core/Security/IAMService.py +4 -3
  21. DIRAC/Core/Security/ProxyInfo.py +9 -5
  22. DIRAC/Core/Security/test/test_diracx_token_from_pem.py +161 -0
  23. DIRAC/Core/Tornado/Client/ClientSelector.py +4 -1
  24. DIRAC/Core/Tornado/Server/TornadoService.py +1 -1
  25. DIRAC/Core/Utilities/ClassAd/ClassAdLight.py +4 -290
  26. DIRAC/Core/Utilities/DErrno.py +5 -309
  27. DIRAC/Core/Utilities/Extensions.py +10 -1
  28. DIRAC/Core/Utilities/Graphs/GraphData.py +1 -1
  29. DIRAC/Core/Utilities/JDL.py +1 -195
  30. DIRAC/Core/Utilities/List.py +1 -124
  31. DIRAC/Core/Utilities/MySQL.py +101 -97
  32. DIRAC/Core/Utilities/Os.py +32 -1
  33. DIRAC/Core/Utilities/Platform.py +2 -107
  34. DIRAC/Core/Utilities/ReturnValues.py +7 -252
  35. DIRAC/Core/Utilities/StateMachine.py +12 -178
  36. DIRAC/Core/Utilities/TimeUtilities.py +10 -253
  37. DIRAC/Core/Utilities/test/Test_JDL.py +0 -3
  38. DIRAC/Core/Utilities/test/Test_Profiler.py +20 -20
  39. DIRAC/Core/scripts/dirac_agent.py +1 -1
  40. DIRAC/Core/scripts/dirac_apptainer_exec.py +16 -7
  41. DIRAC/Core/scripts/dirac_platform.py +1 -92
  42. DIRAC/DataManagementSystem/Agent/FTS3Agent.py +8 -7
  43. DIRAC/DataManagementSystem/Agent/RequestOperations/RemoveFile.py +7 -6
  44. DIRAC/DataManagementSystem/Client/FTS3Job.py +71 -34
  45. DIRAC/DataManagementSystem/DB/FTS3DB.py +3 -0
  46. DIRAC/DataManagementSystem/DB/FileCatalogComponents/DatasetManager/DatasetManager.py +1 -1
  47. DIRAC/DataManagementSystem/Utilities/DMSHelpers.py +6 -2
  48. DIRAC/DataManagementSystem/scripts/dirac_dms_create_moving_request.py +2 -0
  49. DIRAC/DataManagementSystem/scripts/dirac_dms_protocol_matrix.py +0 -1
  50. DIRAC/FrameworkSystem/Client/ComponentInstaller.py +4 -2
  51. DIRAC/FrameworkSystem/DB/ProxyDB.py +9 -5
  52. DIRAC/FrameworkSystem/Utilities/TokenManagementUtilities.py +3 -2
  53. DIRAC/FrameworkSystem/Utilities/diracx.py +2 -74
  54. DIRAC/FrameworkSystem/private/authorization/AuthServer.py +2 -2
  55. DIRAC/FrameworkSystem/scripts/dirac_login.py +2 -2
  56. DIRAC/FrameworkSystem/scripts/dirac_proxy_init.py +1 -1
  57. DIRAC/Interfaces/API/Dirac.py +27 -13
  58. DIRAC/Interfaces/API/DiracAdmin.py +42 -7
  59. DIRAC/Interfaces/API/Job.py +1 -0
  60. DIRAC/Interfaces/scripts/dirac_admin_allow_site.py +7 -1
  61. DIRAC/Interfaces/scripts/dirac_admin_ban_site.py +7 -1
  62. DIRAC/Interfaces/scripts/dirac_wms_job_parameters.py +0 -1
  63. DIRAC/MonitoringSystem/Client/Types/WMSHistory.py +4 -0
  64. DIRAC/MonitoringSystem/Client/WebAppClient.py +26 -0
  65. DIRAC/MonitoringSystem/ConfigTemplate.cfg +9 -0
  66. DIRAC/MonitoringSystem/DB/MonitoringDB.py +6 -25
  67. DIRAC/MonitoringSystem/Service/MonitoringHandler.py +0 -33
  68. DIRAC/MonitoringSystem/Service/WebAppHandler.py +599 -0
  69. DIRAC/MonitoringSystem/private/MainReporter.py +0 -3
  70. DIRAC/ProductionSystem/scripts/dirac_prod_get_trans.py +2 -3
  71. DIRAC/RequestManagementSystem/Agent/RequestExecutingAgent.py +8 -6
  72. DIRAC/RequestManagementSystem/ConfigTemplate.cfg +6 -6
  73. DIRAC/RequestManagementSystem/DB/test/RMSTestScenari.py +2 -0
  74. DIRAC/ResourceStatusSystem/Client/SiteStatus.py +4 -2
  75. DIRAC/ResourceStatusSystem/Command/FreeDiskSpaceCommand.py +3 -1
  76. DIRAC/ResourceStatusSystem/Utilities/CSHelpers.py +2 -31
  77. DIRAC/ResourceStatusSystem/scripts/dirac_rss_set_status.py +18 -4
  78. DIRAC/Resources/Catalog/RucioFileCatalogClient.py +1 -1
  79. DIRAC/Resources/Computing/AREXComputingElement.py +19 -3
  80. DIRAC/Resources/Computing/BatchSystems/Condor.py +126 -108
  81. DIRAC/Resources/Computing/BatchSystems/SLURM.py +5 -1
  82. DIRAC/Resources/Computing/BatchSystems/test/Test_SLURM.py +46 -0
  83. DIRAC/Resources/Computing/HTCondorCEComputingElement.py +37 -43
  84. DIRAC/Resources/Computing/SingularityComputingElement.py +6 -1
  85. DIRAC/Resources/Computing/test/Test_HTCondorCEComputingElement.py +67 -49
  86. DIRAC/Resources/Computing/test/Test_PoolComputingElement.py +2 -1
  87. DIRAC/Resources/IdProvider/CheckInIdProvider.py +13 -0
  88. DIRAC/Resources/IdProvider/IdProviderFactory.py +11 -3
  89. DIRAC/Resources/Storage/StorageBase.py +4 -2
  90. DIRAC/Resources/Storage/StorageElement.py +4 -4
  91. DIRAC/TransformationSystem/Agent/TaskManagerAgentBase.py +10 -16
  92. DIRAC/TransformationSystem/Agent/TransformationAgent.py +22 -1
  93. DIRAC/TransformationSystem/Agent/TransformationCleaningAgent.py +15 -15
  94. DIRAC/TransformationSystem/Client/Transformation.py +2 -1
  95. DIRAC/TransformationSystem/Client/TransformationClient.py +0 -7
  96. DIRAC/TransformationSystem/Client/Utilities.py +9 -0
  97. DIRAC/TransformationSystem/Service/TransformationManagerHandler.py +0 -336
  98. DIRAC/TransformationSystem/Utilities/ReplicationCLIParameters.py +3 -3
  99. DIRAC/TransformationSystem/scripts/dirac_production_runjoblocal.py +2 -4
  100. DIRAC/TransformationSystem/test/Test_replicationTransformation.py +5 -6
  101. DIRAC/Workflow/Modules/test/Test_Modules.py +5 -0
  102. DIRAC/WorkloadManagementSystem/Agent/JobAgent.py +1 -5
  103. DIRAC/WorkloadManagementSystem/Agent/JobCleaningAgent.py +11 -7
  104. DIRAC/WorkloadManagementSystem/Agent/PilotSyncAgent.py +4 -3
  105. DIRAC/WorkloadManagementSystem/Agent/PushJobAgent.py +13 -13
  106. DIRAC/WorkloadManagementSystem/Agent/SiteDirector.py +10 -13
  107. DIRAC/WorkloadManagementSystem/Agent/StalledJobAgent.py +18 -51
  108. DIRAC/WorkloadManagementSystem/Agent/StatesAccountingAgent.py +41 -1
  109. DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_JobAgent.py +2 -0
  110. DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_JobCleaningAgent.py +7 -9
  111. DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_PushJobAgent.py +1 -0
  112. DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_SiteDirector.py +8 -2
  113. DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_StalledJobAgent.py +4 -5
  114. DIRAC/WorkloadManagementSystem/Client/DownloadInputData.py +7 -5
  115. DIRAC/WorkloadManagementSystem/Client/JobMonitoringClient.py +10 -11
  116. DIRAC/WorkloadManagementSystem/Client/JobState/JobManifest.py +32 -261
  117. DIRAC/WorkloadManagementSystem/Client/JobStateUpdateClient.py +3 -0
  118. DIRAC/WorkloadManagementSystem/Client/JobStatus.py +8 -152
  119. DIRAC/WorkloadManagementSystem/Client/SandboxStoreClient.py +25 -38
  120. DIRAC/WorkloadManagementSystem/Client/WMSClient.py +2 -3
  121. DIRAC/WorkloadManagementSystem/Client/test/Test_Client_DownloadInputData.py +29 -0
  122. DIRAC/WorkloadManagementSystem/ConfigTemplate.cfg +4 -8
  123. DIRAC/WorkloadManagementSystem/DB/JobDB.py +40 -69
  124. DIRAC/WorkloadManagementSystem/DB/JobDBUtils.py +18 -147
  125. DIRAC/WorkloadManagementSystem/DB/JobParametersDB.py +9 -9
  126. DIRAC/WorkloadManagementSystem/DB/PilotAgentsDB.py +3 -2
  127. DIRAC/WorkloadManagementSystem/DB/SandboxMetadataDB.py +28 -39
  128. DIRAC/WorkloadManagementSystem/DB/StatusUtils.py +125 -0
  129. DIRAC/WorkloadManagementSystem/DB/tests/Test_JobDB.py +1 -1
  130. DIRAC/WorkloadManagementSystem/DB/tests/Test_StatusUtils.py +28 -0
  131. DIRAC/WorkloadManagementSystem/Executor/JobSanity.py +3 -3
  132. DIRAC/WorkloadManagementSystem/FutureClient/JobStateUpdateClient.py +2 -14
  133. DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapper.py +14 -9
  134. DIRAC/WorkloadManagementSystem/JobWrapper/test/Test_JobWrapper.py +36 -10
  135. DIRAC/WorkloadManagementSystem/JobWrapper/test/Test_JobWrapperTemplate.py +4 -0
  136. DIRAC/WorkloadManagementSystem/Service/JobManagerHandler.py +33 -154
  137. DIRAC/WorkloadManagementSystem/Service/JobMonitoringHandler.py +5 -323
  138. DIRAC/WorkloadManagementSystem/Service/JobStateUpdateHandler.py +0 -16
  139. DIRAC/WorkloadManagementSystem/Service/PilotManagerHandler.py +6 -102
  140. DIRAC/WorkloadManagementSystem/Service/SandboxStoreHandler.py +5 -51
  141. DIRAC/WorkloadManagementSystem/Service/WMSAdministratorHandler.py +16 -79
  142. DIRAC/WorkloadManagementSystem/Utilities/JobModel.py +28 -199
  143. DIRAC/WorkloadManagementSystem/Utilities/JobParameters.py +65 -3
  144. DIRAC/WorkloadManagementSystem/Utilities/JobStatusUtility.py +2 -64
  145. DIRAC/WorkloadManagementSystem/Utilities/ParametricJob.py +7 -171
  146. DIRAC/WorkloadManagementSystem/Utilities/PilotCStoJSONSynchronizer.py +73 -7
  147. DIRAC/WorkloadManagementSystem/Utilities/PilotWrapper.py +2 -0
  148. DIRAC/WorkloadManagementSystem/Utilities/RemoteRunner.py +16 -0
  149. DIRAC/WorkloadManagementSystem/Utilities/Utils.py +36 -1
  150. DIRAC/WorkloadManagementSystem/Utilities/jobAdministration.py +15 -0
  151. DIRAC/WorkloadManagementSystem/Utilities/test/Test_JobModel.py +1 -5
  152. DIRAC/WorkloadManagementSystem/Utilities/test/Test_ParametricJob.py +45 -128
  153. DIRAC/WorkloadManagementSystem/Utilities/test/Test_PilotWrapper.py +16 -0
  154. DIRAC/__init__.py +55 -54
  155. {dirac-9.0.0a54.dist-info → dirac-9.0.7.dist-info}/METADATA +6 -4
  156. {dirac-9.0.0a54.dist-info → dirac-9.0.7.dist-info}/RECORD +160 -160
  157. {dirac-9.0.0a54.dist-info → dirac-9.0.7.dist-info}/WHEEL +1 -1
  158. {dirac-9.0.0a54.dist-info → dirac-9.0.7.dist-info}/entry_points.txt +0 -3
  159. DIRAC/Core/Utilities/test/Test_List.py +0 -150
  160. DIRAC/Core/Utilities/test/Test_Time.py +0 -88
  161. DIRAC/TransformationSystem/scripts/dirac_transformation_archive.py +0 -30
  162. DIRAC/TransformationSystem/scripts/dirac_transformation_clean.py +0 -30
  163. DIRAC/TransformationSystem/scripts/dirac_transformation_remove_output.py +0 -30
  164. DIRAC/WorkloadManagementSystem/Utilities/test/Test_JobManager.py +0 -58
  165. {dirac-9.0.0a54.dist-info → dirac-9.0.7.dist-info}/licenses/LICENSE +0 -0
  166. {dirac-9.0.0a54.dist-info → dirac-9.0.7.dist-info}/top_level.txt +0 -0
@@ -12,16 +12,14 @@
12
12
  import hashlib
13
13
  import json
14
14
  import os
15
- from pathlib import Path
16
15
  import random
17
16
  import shutil
18
17
  import sys
19
- from collections import defaultdict
20
18
  import time
19
+ from collections import defaultdict
20
+ from pathlib import Path
21
21
 
22
- from diraccfg import CFG
23
-
24
- from DIRAC import gConfig, S_OK, S_ERROR
22
+ from DIRAC import S_ERROR, S_OK, gConfig
25
23
  from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations
26
24
  from DIRAC.ConfigurationSystem.Client.Helpers.Resources import getQueues
27
25
  from DIRAC.Core.Utilities import DErrno
@@ -30,6 +28,7 @@ from DIRAC.Core.Utilities.Proxy import executeWithUserProxy
30
28
  from DIRAC.Core.Utilities.Version import getVersion
31
29
  from DIRAC.FrameworkSystem.Client.ProxyManagerClient import gProxyManager
32
30
  from DIRAC.Resources.Computing import ComputingElement
31
+ from DIRAC.WorkloadManagementSystem.Agent.JobAgent import JobAgent
33
32
  from DIRAC.WorkloadManagementSystem.Client import JobMinorStatus, JobStatus, PilotStatus
34
33
  from DIRAC.WorkloadManagementSystem.Client.JobMonitoringClient import JobMonitoringClient
35
34
  from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport
@@ -41,11 +40,10 @@ from DIRAC.WorkloadManagementSystem.JobWrapper.JobWrapperUtilities import (
41
40
  resolveInputData,
42
41
  transferInputSandbox,
43
42
  )
44
- from DIRAC.WorkloadManagementSystem.Utilities.QueueUtilities import getQueuesResolved
45
- from DIRAC.WorkloadManagementSystem.Agent.JobAgent import JobAgent
46
- from DIRAC.WorkloadManagementSystem.Utilities.Utils import createJobWrapper
47
43
  from DIRAC.WorkloadManagementSystem.private.ConfigHelper import findGenericPilotCredentials
44
+ from DIRAC.WorkloadManagementSystem.Utilities.JobParameters import getJobParameters
48
45
  from DIRAC.WorkloadManagementSystem.Utilities.QueueUtilities import getQueuesResolved
46
+ from DIRAC.WorkloadManagementSystem.Utilities.Utils import createJobWrapper
49
47
 
50
48
  MAX_JOBS_MANAGED = 100
51
49
 
@@ -230,6 +228,12 @@ class PushJobAgent(JobAgent):
230
228
  return result
231
229
  pilotProxy = result["Value"]
232
230
 
231
+ # Dump the proxy to a file to get DiracX token (it's later used by DiracX)
232
+ result = gProxyManager.dumpProxyToFile(pilotProxy)
233
+ if not result["OK"]:
234
+ return result
235
+ os.environ["X509_USER_PROXY"] = result["Value"]
236
+
233
237
  for queueName, queueDictionary in queueDictItems:
234
238
  # Make sure there is no problem with the queue before trying to submit
235
239
  if not self._allowedToSubmit(queueName):
@@ -281,7 +285,6 @@ class PushJobAgent(JobAgent):
281
285
  jobGroup = matcherInfo["Group"]
282
286
  owner = matcherInfo["Owner"]
283
287
  ceDict = matcherInfo["CEDict"]
284
- matchTime = matcherInfo["matchTime"]
285
288
 
286
289
  optimizerParams = {}
287
290
  for key in matcherInfo:
@@ -309,9 +312,6 @@ class PushJobAgent(JobAgent):
309
312
  self.log.verbose("Job request successful: \n", jobRequest["Value"])
310
313
  self.log.info("Received", f"JobID={jobID}, JobType={jobType}, Owner={owner}, JobGroup={jobGroup}")
311
314
 
312
- self.jobs[jobID]["JobReport"].setJobParameter(
313
- par_name="MatcherServiceTime", par_value=str(matchTime), sendFlag=False
314
- )
315
315
  self.jobs[jobID]["JobReport"].setJobStatus(
316
316
  status=JobStatus.MATCHED, minorStatus="Job Received by Agent", sendFlag=False
317
317
  )
@@ -734,7 +734,7 @@ class PushJobAgent(JobAgent):
734
734
  return S_OK()
735
735
 
736
736
  # Get their parameters
737
- if not (result := self.jobMonitoring.getJobParameters(jobs, ["GridCE", "TaskID", "Stamp"]))["OK"]:
737
+ if not (result := getJobParameters(jobs, ["GridCE", "TaskID", "Stamp"]))["OK"]:
738
738
  self.log.error("Failed to get the list of taskIDs", result["Message"])
739
739
  return result
740
740
 
@@ -1,4 +1,4 @@
1
- """ The Site Director is an agent performing pilot job submission to particular sites/Computing Elements.
1
+ """The Site Director is an agent performing pilot job submission to particular sites/Computing Elements.
2
2
 
3
3
  .. literalinclude:: ../ConfigTemplate.cfg
4
4
  :start-after: ##BEGIN SiteDirector
@@ -7,6 +7,7 @@
7
7
  :caption: SiteDirector options
8
8
 
9
9
  """
10
+
10
11
  import datetime
11
12
  import os
12
13
  from collections import defaultdict
@@ -14,7 +15,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
14
15
  from typing import Any
15
16
 
16
17
  import DIRAC
17
- from DIRAC import S_ERROR, S_OK, gConfig
18
+ from DIRAC import S_ERROR, S_OK
18
19
  from DIRAC.AccountingSystem.Client.DataStoreClient import gDataStoreClient
19
20
  from DIRAC.AccountingSystem.Client.Types.Pilot import Pilot as PilotAccounting
20
21
  from DIRAC.AccountingSystem.Client.Types.PilotSubmission import (
@@ -147,10 +148,10 @@ class SiteDirector(AgentModule):
147
148
  self.sendSubmissionAccounting = True
148
149
 
149
150
  # Get the site description dictionary
150
- siteNames = self.am_getOption("Site", [])
151
- ceTypes = self.am_getOption("CETypes", [])
152
- ces = self.am_getOption("CEs", [])
153
- tags = self.am_getOption("Tags", [])
151
+ siteNames = self.am_getOption("Site")
152
+ ceTypes = self.am_getOption("CETypes")
153
+ ces = self.am_getOption("CEs")
154
+ tags = self.am_getOption("Tags")
154
155
 
155
156
  # Display options used
156
157
  self.log.always("VO:", self.vo)
@@ -229,12 +230,8 @@ class SiteDirector(AgentModule):
229
230
  site = self.queueDict[queueName]["Site"]
230
231
  ce = self.queueDict[queueName]["CEName"]
231
232
 
232
- # Check the status of the Site
233
- if site in siteMaskList:
234
- continue
235
-
236
- # Check the status of the CE (only for RSS=Active)
237
- if ce not in ceMaskList:
233
+ # Check the status of the Site and CE
234
+ if site in siteMaskList and ce in ceMaskList:
238
235
  continue
239
236
 
240
237
  self.log.warn("Queue not considered because not usable:", queueName)
@@ -580,7 +577,7 @@ class SiteDirector(AgentModule):
580
577
  pilotOptions = []
581
578
 
582
579
  pilotOptions = " ".join(pilotOptions)
583
- self.log.verbose(f"pilotOptions: {pilotOptions}")
580
+ self.log.verbose(f"{pilotOptions=}")
584
581
 
585
582
  # if a global workingDirectory is defined for the CEType (like HTCondor)
586
583
  # use it (otherwise the __cleanup done by HTCondor will be in the wrong folder !)
@@ -14,18 +14,18 @@ import datetime
14
14
  from DIRAC import S_ERROR, S_OK, gConfig
15
15
  from DIRAC.AccountingSystem.Client.Types.Job import Job
16
16
  from DIRAC.ConfigurationSystem.Client.Helpers import cfgPath
17
- from DIRAC.ConfigurationSystem.Client.Helpers.Registry import getDNForUsername
18
17
  from DIRAC.Core.Base.AgentModule import AgentModule
19
18
  from DIRAC.Core.Utilities import DErrno
20
19
  from DIRAC.Core.Utilities.ClassAd.ClassAdLight import ClassAd
21
20
  from DIRAC.Core.Utilities.TimeUtilities import fromString, second, toEpoch
22
21
  from DIRAC.WorkloadManagementSystem.Client import JobMinorStatus, JobStatus
23
- from DIRAC.WorkloadManagementSystem.Client.JobManagerClient import JobManagerClient
24
- from DIRAC.WorkloadManagementSystem.Client.JobMonitoringClient import JobMonitoringClient
25
- from DIRAC.WorkloadManagementSystem.Client.PilotManagerClient import PilotManagerClient
26
- from DIRAC.WorkloadManagementSystem.Client.WMSClient import WMSClient
27
22
  from DIRAC.WorkloadManagementSystem.DB.JobDB import JobDB
28
23
  from DIRAC.WorkloadManagementSystem.DB.JobLoggingDB import JobLoggingDB
24
+ from DIRAC.WorkloadManagementSystem.DB.PilotAgentsDB import PilotAgentsDB
25
+ from DIRAC.WorkloadManagementSystem.Service.JobPolicy import RIGHT_KILL
26
+ from DIRAC.WorkloadManagementSystem.DB.StatusUtils import kill_delete_jobs
27
+ from DIRAC.WorkloadManagementSystem.Utilities.JobParameters import getJobParameters
28
+ from DIRAC.WorkloadManagementSystem.Utilities.Utils import rescheduleJobs
29
29
 
30
30
 
31
31
  class StalledJobAgent(AgentModule):
@@ -235,7 +235,7 @@ class StalledJobAgent(AgentModule):
235
235
  # Set the jobs Failed, send them a kill signal in case they are not really dead
236
236
  # and send accounting info
237
237
  if setFailed:
238
- res = self._sendKillCommand(jobID)
238
+ res = kill_delete_jobs(RIGHT_KILL, [jobID], nonauthJobList=[], force=True)
239
239
  if not res["OK"]:
240
240
  self.log.error("Failed to kill job", jobID)
241
241
 
@@ -254,15 +254,15 @@ class StalledJobAgent(AgentModule):
254
254
 
255
255
  def _getJobPilotStatus(self, jobID):
256
256
  """Get the job pilot status."""
257
- result = JobMonitoringClient().getJobParameter(jobID, "Pilot_Reference")
257
+ result = getJobParameters([jobID], "Pilot_Reference")
258
258
  if not result["OK"]:
259
259
  return result
260
- pilotReference = result["Value"].get("Pilot_Reference", "Unknown")
261
- if pilotReference == "Unknown":
260
+ pilotReference = result["Value"].get("Pilot_Reference")
261
+ if not pilotReference:
262
262
  # There is no pilot reference, hence its status is unknown
263
263
  return S_OK("NoPilot")
264
264
 
265
- result = PilotManagerClient().getPilotInfo(pilotReference)
265
+ result = PilotAgentsDB().getPilotInfo(pilotReference)
266
266
  if not result["OK"]:
267
267
  if DErrno.cmpError(result, DErrno.EWMSNOPILOT):
268
268
  self.log.warn("No pilot found", f"for job {jobID}: {result['Message']}")
@@ -389,11 +389,11 @@ class StalledJobAgent(AgentModule):
389
389
  if lastHeartBeatTime is not None and lastHeartBeatTime > endTime:
390
390
  endTime = lastHeartBeatTime
391
391
 
392
- result = JobMonitoringClient().getJobParameter(jobID, "CPUNormalizationFactor")
393
- if not result["OK"] or not result["Value"]:
392
+ result = getJobParameters([jobID], "CPUNormalizationFactor")
393
+ if not result["OK"] or not result["Value"] or not result["Value"].get("CPUNormalizationFactor"):
394
394
  self.log.error(
395
395
  "Error getting Job Parameter CPUNormalizationFactor, setting 0",
396
- result.get("Message", "No such value"),
396
+ result.get("Message"),
397
397
  )
398
398
  cpuNormalization = 0.0
399
399
  else:
@@ -518,8 +518,7 @@ class StalledJobAgent(AgentModule):
518
518
  return startTime, endTime
519
519
 
520
520
  def _kickStuckJobs(self):
521
- """Reschedule jobs stuck in initialization status Rescheduled,
522
- Matched."""
521
+ """Reschedule jobs stuck in initialization status Rescheduled, Matched."""
523
522
 
524
523
  message = ""
525
524
 
@@ -530,17 +529,12 @@ class StalledJobAgent(AgentModule):
530
529
  return result
531
530
 
532
531
  jobIDs = result["Value"]
533
- jobManagerClient = JobManagerClient()
534
532
  if jobIDs:
535
533
  self.log.info(f"Rescheduling {len(jobIDs)} jobs stuck in {JobStatus.MATCHED} status")
536
- result = jobManagerClient.rescheduleJob(jobIDs)
534
+ result = rescheduleJobs(jobIDs)
537
535
  if not result["OK"]:
538
536
  message = f"Failed to reschedule jobs stuck in {JobStatus.MATCHED} status"
539
537
  message += "\n" + result["Message"]
540
- if "InvalidJobIDs" in result:
541
- message += "\n" + "\tInvalid job IDs: " + str(result["InvalidJobIDs"])
542
- if "NonauthorizedJobIDs" in result:
543
- message += "\n" + "\tNon authorized job IDs: " + str(result["NonauthorizedJobIDs"])
544
538
 
545
539
  checkTime = datetime.datetime.utcnow() - self.rescheduledTime * second
546
540
  result = self.jobDB.selectJobs({"Status": JobStatus.RESCHEDULED}, older=checkTime)
@@ -550,18 +544,14 @@ class StalledJobAgent(AgentModule):
550
544
 
551
545
  jobIDs = result["Value"]
552
546
  if jobIDs:
553
- self.log.info(f"Rescheduling {len(jobIDs)} jobs stuck in Rescheduled status")
554
- result = jobManagerClient.rescheduleJob(jobIDs)
547
+ self.log.info(f"Rescheduling {len(jobIDs)} jobs stuck in {JobStatus.RESCHEDULED} status")
548
+ result = rescheduleJobs(jobIDs)
555
549
  if not result["OK"]:
556
550
  message = f"Failed to reschedule jobs stuck in {JobStatus.RESCHEDULED} status"
557
551
  message += "\n" + result["Message"]
558
- if "InvalidJobIDs" in result:
559
- message += "\n" + "\tInvalid job IDs: " + str(result["InvalidJobIDs"])
560
- if "NonauthorizedJobIDs" in result:
561
- message += "\n" + "\tNon authorized job IDs: " + str(result["NonauthorizedJobIDs"])
562
552
 
563
553
  if message:
564
- return S_ERROR(message)
554
+ self.log.error(message)
565
555
  return S_OK()
566
556
 
567
557
  def _failSubmittingJobs(self):
@@ -584,26 +574,3 @@ class StalledJobAgent(AgentModule):
584
574
  continue
585
575
 
586
576
  return S_OK()
587
-
588
- def _sendKillCommand(self, job):
589
- """Send a kill signal to the job such that it cannot continue running.
590
-
591
- :param int job: ID of job to send kill command
592
- """
593
-
594
- res = self.jobDB.getJobAttribute(job, "Owner")
595
- if not res["OK"]:
596
- return res
597
- owner = res["Value"]
598
-
599
- res = self.jobDB.getJobAttribute(job, "OwnerGroup")
600
- if not res["OK"]:
601
- return res
602
- ownerGroup = res["Value"]
603
-
604
- wmsClient = WMSClient(
605
- useCertificates=True,
606
- delegatedDN=getDNForUsername(owner)["Value"][0] if owner else None,
607
- delegatedGroup=ownerGroup,
608
- )
609
- return wmsClient.killJob(job)
@@ -9,10 +9,11 @@
9
9
  """
10
10
  import datetime
11
11
 
12
- from DIRAC import S_ERROR, S_OK
12
+ from DIRAC import S_ERROR, S_OK, gConfig
13
13
  from DIRAC.AccountingSystem.Client.DataStoreClient import DataStoreClient
14
14
  from DIRAC.AccountingSystem.Client.Types.WMSHistory import WMSHistory
15
15
  from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations
16
+ from DIRAC.ConfigurationSystem.Client.Helpers.Resources import getSites
16
17
  from DIRAC.Core.Base.AgentModule import AgentModule
17
18
  from DIRAC.Core.Utilities import TimeUtilities
18
19
  from DIRAC.MonitoringSystem.Client.MonitoringReporter import MonitoringReporter
@@ -77,6 +78,8 @@ class StatesAccountingAgent(AgentModule):
77
78
  def execute(self):
78
79
  """Main execution method"""
79
80
 
81
+ site_metadata = self._getSitesMetadata()
82
+
80
83
  # on the first iteration of the agent, do nothing in order to avoid double committing after a restart
81
84
  if self.am_getModuleParam("cyclesDone") == 0:
82
85
  self.log.notice("Skipping the first iteration of the agent")
@@ -131,6 +134,16 @@ class StatesAccountingAgent(AgentModule):
131
134
 
132
135
  for backend in self.datastores:
133
136
  if backend.lower() == "monitoring":
137
+ site_name = rD["Site"]
138
+ if site_name not in site_metadata:
139
+ self.log.warn(
140
+ f"Site {site_name} not found in site metadata, using default values",
141
+ )
142
+ rD["Tier"] = "4"
143
+ rD["Type"] = site_name.split(".")[0]
144
+ else:
145
+ rD["Tier"] = site_metadata[site_name]["Tier"]
146
+ rD["Type"] = site_metadata[site_name]["Type"]
134
147
  rD["timestamp"] = int(TimeUtilities.toEpochMilliSeconds(now))
135
148
  self.datastores["Monitoring"].addRecord(rD)
136
149
 
@@ -154,3 +167,30 @@ class StatesAccountingAgent(AgentModule):
154
167
  self.log.verbose(f"Done committing WMSHistory to {backend} backend")
155
168
 
156
169
  return S_OK()
170
+
171
+ def _getSitesMetadata(self):
172
+ """Get the metadata for the sites"""
173
+ res = getSites()
174
+ if not res["OK"]:
175
+ return res
176
+ sites = res["Value"]
177
+ site_metadata = {}
178
+
179
+ for site in sites:
180
+ site_metadata[site] = {}
181
+
182
+ # Get the site metadata from the Configuration System
183
+ grid = site.split(".")[0]
184
+ res = gConfig.getOptionsDict(f"Resources/Sites/{grid}/{site}")
185
+ if not res["OK"]:
186
+ self.log.error("Failure getting options dict for site", f"{site}: {res['Message']}")
187
+ continue
188
+ siteInfoCS = res["Value"]
189
+
190
+ # The site tier is normally 1 or 2. Few VOs may define tier 3.
191
+ # If the tier is not defined, we assume it is 4, with 4 meaning "not pledged" (opportunistic).
192
+ site_metadata[site]["Tier"] = siteInfoCS.get("MoUTierLevel", "4")
193
+ # The site type is defined by the first part of the site name.
194
+ # It needs to be interpreted at the Monitoring side (e.g. in Grafana).
195
+ site_metadata[site]["Type"] = site.split(".")[0]
196
+ return site_metadata
@@ -260,6 +260,7 @@ def test__checkMatcherInfo(mocker, matcherInfo, matcherParams, expectedResult):
260
260
  #############################################################################
261
261
 
262
262
 
263
+ @pytest.mark.slow
263
264
  @pytest.mark.parametrize(
264
265
  "mockGCReply, mockPMReply, expected",
265
266
  [
@@ -308,6 +309,7 @@ def test__setupProxy(mocker, mockGCReply, mockPMReply, expected):
308
309
  assert result["Message"] == expected["Message"]
309
310
 
310
311
 
312
+ @pytest.mark.slow
311
313
  @pytest.mark.parametrize(
312
314
  "mockGCReply, mockPMReply, expected",
313
315
  [
@@ -1,10 +1,11 @@
1
1
  """ Test class for Job Cleaning Agent
2
2
  """
3
- import pytest
4
3
  from unittest.mock import MagicMock
5
4
 
5
+ import pytest
6
+
6
7
  # DIRAC Components
7
- from DIRAC import gLogger, S_OK
8
+ from DIRAC import S_OK, gLogger
8
9
  from DIRAC.WorkloadManagementSystem.Agent.JobCleaningAgent import JobCleaningAgent
9
10
 
10
11
  gLogger.setLevel("DEBUG")
@@ -32,7 +33,6 @@ def jca(mocker):
32
33
  mocker.patch("DIRAC.WorkloadManagementSystem.Agent.JobCleaningAgent.JobDB.selectJobs", side_effect=mockReply)
33
34
  mocker.patch("DIRAC.WorkloadManagementSystem.Agent.JobCleaningAgent.JobDB.__init__", side_effect=mockNone)
34
35
  mocker.patch("DIRAC.WorkloadManagementSystem.Agent.JobCleaningAgent.ReqClient", return_value=mockNone)
35
- mocker.patch("DIRAC.WorkloadManagementSystem.Agent.JobCleaningAgent.JobMonitoringClient", return_value=mockJMC)
36
36
 
37
37
  jca = JobCleaningAgent()
38
38
  jca.log = gLogger
@@ -98,7 +98,7 @@ def test_deleteJobsByStatus(jca, conditions, mockReplyInput, expected):
98
98
  "inputs, params, expected",
99
99
  [
100
100
  ([], {"OK": True, "Value": {}}, {"OK": True, "Value": {"Failed": {}, "Successful": {}}}),
101
- (["a", "b"], {"OK": True, "Value": {}}, {"OK": True, "Value": {"Failed": {}, "Successful": {}}}),
101
+ (["123", "456"], {"OK": True, "Value": {}}, {"OK": True, "Value": {"Failed": {}, "Successful": {}}}),
102
102
  (
103
103
  [],
104
104
  {"OK": True, "Value": {1: {"OutputSandboxLFN": "/some/lfn/1.txt"}}},
@@ -113,11 +113,11 @@ def test_deleteJobsByStatus(jca, conditions, mockReplyInput, expected):
113
113
  {"OK": True, "Value": {"Failed": {}, "Successful": {1: "/some/lfn/1.txt", 2: "/some/other/lfn/2.txt"}}},
114
114
  ),
115
115
  (
116
- ["a", "b"],
116
+ ["123", "456"],
117
117
  {"OK": True, "Value": {1: {"OutputSandboxLFN": "/some/lfn/1.txt"}}},
118
118
  {"OK": True, "Value": {"Failed": {}, "Successful": {1: "/some/lfn/1.txt"}}},
119
119
  ),
120
- (["a", "b"], {"OK": False}, {"OK": False}),
120
+ (["123", "456"], {"OK": False}, {"OK": False}),
121
121
  ],
122
122
  )
123
123
  def test_deleteJobOversizedSandbox(mocker, inputs, params, expected):
@@ -127,10 +127,10 @@ def test_deleteJobOversizedSandbox(mocker, inputs, params, expected):
127
127
  mocker.patch("DIRAC.WorkloadManagementSystem.Agent.JobCleaningAgent.AgentModule.am_getOption", return_value=mockAM)
128
128
  mocker.patch("DIRAC.WorkloadManagementSystem.Agent.JobCleaningAgent.JobDB", return_value=mockNone)
129
129
  mocker.patch("DIRAC.WorkloadManagementSystem.Agent.JobCleaningAgent.ReqClient", return_value=mockNone)
130
- mocker.patch("DIRAC.WorkloadManagementSystem.Agent.JobCleaningAgent.JobMonitoringClient", return_value=mockJMC)
131
130
  mocker.patch(
132
131
  "DIRAC.WorkloadManagementSystem.Agent.JobCleaningAgent.getDNForUsername", return_value=S_OK(["/bih/boh/DN"])
133
132
  )
133
+ mocker.patch("DIRAC.WorkloadManagementSystem.Agent.JobCleaningAgent.getJobParameters", return_value=params)
134
134
 
135
135
  jobCleaningAgent = JobCleaningAgent()
136
136
  jobCleaningAgent.log = gLogger
@@ -138,8 +138,6 @@ def test_deleteJobOversizedSandbox(mocker, inputs, params, expected):
138
138
  jobCleaningAgent._AgentModule__configDefaults = mockAM
139
139
  jobCleaningAgent.initialize()
140
140
 
141
- mockJMC.getJobParameters.return_value = params
142
-
143
141
  result = jobCleaningAgent.deleteJobOversizedSandbox(inputs)
144
142
 
145
143
  assert result == expected
@@ -178,6 +178,7 @@ def jobID():
178
178
  shutil.rmtree(jobID)
179
179
 
180
180
 
181
+ @pytest.mark.slow
181
182
  def test_submitJobWrapper(mocker, jobID):
182
183
  """Test JobAgent._submitJobWrapper()"""
183
184
  mocker.patch("DIRAC.WorkloadManagementSystem.Agent.JobAgent.AgentModule.__init__")
@@ -169,10 +169,16 @@ def sd(mocker, config):
169
169
  gConfig.getSections("Resources/Sites/LCG")["Value"] + gConfig.getSections("Resources/Sites/DIRAC")["Value"]
170
170
  )
171
171
  mocker.patch(
172
- "DIRAC.WorkloadManagementSystem.Agent.SiteDirector.SiteStatus.getUsableSites", return_values=usableSites
172
+ "DIRAC.WorkloadManagementSystem.Agent.SiteDirector.SiteStatus.getUsableSites", return_value=S_OK(usableSites)
173
173
  )
174
+
175
+ # Mock getElementStatus to return a properly formatted dictionary
176
+ def mock_getElementStatus(ceNamesList, *args, **kwargs):
177
+ return S_OK({ceName: {"all": "Active"} for ceName in ceNamesList})
178
+
174
179
  mocker.patch(
175
- "DIRAC.WorkloadManagementSystem.Agent.SiteDirector.ResourceStatus.getElementStatus", return_values=usableSites
180
+ "DIRAC.WorkloadManagementSystem.Agent.SiteDirector.ResourceStatus.getElementStatus",
181
+ side_effect=mock_getElementStatus,
176
182
  )
177
183
  mocker.patch(
178
184
  "DIRAC.WorkloadManagementSystem.Agent.SiteDirector.gProxyManager.downloadProxy", side_effect=mockPMProxyReply
@@ -25,11 +25,10 @@ def sja(mocker):
25
25
  )
26
26
  mocker.patch("DIRAC.WorkloadManagementSystem.Agent.StalledJobAgent.JobDB")
27
27
  mocker.patch("DIRAC.WorkloadManagementSystem.Agent.StalledJobAgent.JobLoggingDB")
28
- mocker.patch("DIRAC.WorkloadManagementSystem.Agent.StalledJobAgent.JobMonitoringClient", return_value=MagicMock())
29
- mocker.patch("DIRAC.WorkloadManagementSystem.Agent.StalledJobAgent.JobManagerClient", return_value=MagicMock())
30
- mocker.patch("DIRAC.WorkloadManagementSystem.Agent.StalledJobAgent.PilotManagerClient", return_value=MagicMock())
31
- mocker.patch("DIRAC.WorkloadManagementSystem.Agent.StalledJobAgent.WMSClient", return_value=MagicMock())
32
- mocker.patch("DIRAC.WorkloadManagementSystem.Agent.StalledJobAgent.getDNForUsername", return_value=MagicMock())
28
+ mocker.patch("DIRAC.WorkloadManagementSystem.Agent.StalledJobAgent.rescheduleJobs", return_value=MagicMock())
29
+ mocker.patch("DIRAC.WorkloadManagementSystem.Agent.StalledJobAgent.PilotAgentsDB", return_value=MagicMock())
30
+ mocker.patch("DIRAC.WorkloadManagementSystem.Agent.StalledJobAgent.getJobParameters", return_value=MagicMock())
31
+ mocker.patch("DIRAC.WorkloadManagementSystem.Agent.StalledJobAgent.kill_delete_jobs", return_value=MagicMock())
33
32
 
34
33
  stalledJobAgent = StalledJobAgent()
35
34
  stalledJobAgent._AgentModule__configDefaults = mockAM
@@ -1,6 +1,7 @@
1
- """ The Download Input Data module wraps around the Replica Management
2
- components to provide access to datasets by downloading locally
1
+ """The Download Input Data module wraps around the Replica Management
2
+ components to provide access to datasets by downloading locally
3
3
  """
4
+
4
5
  import os
5
6
  import random
6
7
  import tempfile
@@ -226,7 +227,7 @@ class DownloadInputData:
226
227
  diskSpace = getDiskSpace(self.__getDownloadDir(False)) # MB
227
228
  availableBytes = diskSpace * 1024 * 1024 # bytes
228
229
  bufferGBs = gConfig.getValue(
229
- os.path.join("/Systems/WorkloadManagement/JobWrapper", "JobWrapper", "MinOutputDataBufferGB"), 5.0
230
+ os.path.join("/Systems/WorkloadManagement/JobWrapper", "MinOutputDataBufferGB"), 5.0
230
231
  )
231
232
  data = bufferGBs * 1024 * 1024 * 1024 # bufferGBs in bytes
232
233
  if (data + totalSize) < availableBytes:
@@ -243,12 +244,13 @@ class DownloadInputData:
243
244
  return S_ERROR(msg)
244
245
 
245
246
  def __getDownloadDir(self, incrementCounter=True):
247
+ jobIDPath = str(self.configuration.get("JobIDPath", os.getcwd()))
246
248
  if self.inputDataDirectory == "PerFile":
247
249
  if incrementCounter:
248
250
  self.counter += 1
249
- return tempfile.mkdtemp(prefix=f"InputData_{self.counter}", dir=os.getcwd())
251
+ return tempfile.mkdtemp(prefix=f"InputData_{self.counter}", dir=jobIDPath)
250
252
  elif self.inputDataDirectory == "CWD":
251
- return os.getcwd()
253
+ return jobIDPath
252
254
  else:
253
255
  return self.inputDataDirectory
254
256
 
@@ -14,12 +14,14 @@ except ImportError:
14
14
 
15
15
  @createClient("WorkloadManagement/JobMonitoring")
16
16
  class JobMonitoringClient(Client):
17
+ # Set to None to raise an error if this service is set as "legacy adapted"
18
+ # See ClientSelector
19
+ diracxClient = None
20
+
17
21
  def __init__(self, **kwargs):
18
22
  super().__init__(**kwargs)
19
23
  self.setServer("WorkloadManagement/JobMonitoring")
20
24
 
21
- diracxClient = futureJobMonitoringClient
22
-
23
25
  @ignoreEncodeWarning
24
26
  def getJobsStatus(self, jobIDs):
25
27
  res = self._getRPC().getJobsStatus(jobIDs)
@@ -38,15 +40,6 @@ class JobMonitoringClient(Client):
38
40
  res["Value"] = strToIntDict(res["Value"])
39
41
  return res
40
42
 
41
- @ignoreEncodeWarning
42
- def getJobsParameters(self, jobIDs, parameters):
43
- res = self._getRPC().getJobsParameters(jobIDs, parameters)
44
-
45
- # Cast the str keys to int
46
- if res["OK"]:
47
- res["Value"] = strToIntDict(res["Value"])
48
- return res
49
-
50
43
  @ignoreEncodeWarning
51
44
  def getJobsMinorStatus(self, jobIDs):
52
45
  res = self._getRPC().getJobsMinorStatus(jobIDs)
@@ -79,3 +72,9 @@ class JobMonitoringClient(Client):
79
72
  if res["OK"]:
80
73
  res["Value"] = strToIntDict(res["Value"])
81
74
  return res
75
+
76
+ def getInputData(self, jobIDs):
77
+ res = self._getRPC().getInputData(jobIDs)
78
+ if res["OK"] and isinstance(res["Value"], dict):
79
+ res["Value"] = strToIntDict(res["Value"])
80
+ return res