DIRAC 9.0.0a42__py3-none-any.whl → 9.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. DIRAC/AccountingSystem/Client/AccountingCLI.py +0 -140
  2. DIRAC/AccountingSystem/Client/DataStoreClient.py +0 -13
  3. DIRAC/AccountingSystem/Client/Types/BaseAccountingType.py +0 -7
  4. DIRAC/AccountingSystem/ConfigTemplate.cfg +0 -5
  5. DIRAC/AccountingSystem/Service/DataStoreHandler.py +0 -72
  6. DIRAC/ConfigurationSystem/Client/Helpers/CSGlobals.py +0 -9
  7. DIRAC/ConfigurationSystem/Client/Helpers/Registry.py +38 -26
  8. DIRAC/ConfigurationSystem/Client/Helpers/Resources.py +11 -43
  9. DIRAC/ConfigurationSystem/Client/Helpers/test/Test_Helpers.py +0 -16
  10. DIRAC/ConfigurationSystem/Client/LocalConfiguration.py +14 -8
  11. DIRAC/ConfigurationSystem/Client/PathFinder.py +47 -8
  12. DIRAC/ConfigurationSystem/Client/SyncPlugins/CERNLDAPSyncPlugin.py +4 -1
  13. DIRAC/ConfigurationSystem/Client/VOMS2CSSynchronizer.py +32 -19
  14. DIRAC/ConfigurationSystem/Client/test/Test_PathFinder.py +41 -1
  15. DIRAC/ConfigurationSystem/private/RefresherBase.py +4 -2
  16. DIRAC/Core/Base/API.py +4 -7
  17. DIRAC/Core/Base/SQLAlchemyDB.py +1 -0
  18. DIRAC/Core/DISET/ServiceReactor.py +11 -3
  19. DIRAC/Core/DISET/private/BaseClient.py +1 -2
  20. DIRAC/Core/DISET/private/Transports/M2SSLTransport.py +9 -7
  21. DIRAC/Core/DISET/private/Transports/SSL/M2Utils.py +3 -1
  22. DIRAC/Core/LCG/GOCDBClient.py +5 -7
  23. DIRAC/Core/Security/DiracX.py +31 -17
  24. DIRAC/Core/Security/IAMService.py +5 -10
  25. DIRAC/Core/Security/Locations.py +27 -18
  26. DIRAC/Core/Security/ProxyInfo.py +9 -5
  27. DIRAC/Core/Security/VOMSService.py +2 -4
  28. DIRAC/Core/Security/m2crypto/X509Certificate.py +4 -6
  29. DIRAC/Core/Security/m2crypto/asn1_utils.py +17 -5
  30. DIRAC/Core/Security/test/test_diracx_token_from_pem.py +161 -0
  31. DIRAC/Core/Tornado/Client/ClientSelector.py +4 -1
  32. DIRAC/Core/Tornado/Server/TornadoService.py +1 -1
  33. DIRAC/Core/Utilities/CGroups2.py +328 -0
  34. DIRAC/Core/Utilities/ClassAd/ClassAdLight.py +4 -290
  35. DIRAC/Core/Utilities/DErrno.py +5 -309
  36. DIRAC/Core/Utilities/Extensions.py +10 -1
  37. DIRAC/Core/Utilities/File.py +1 -1
  38. DIRAC/Core/Utilities/Graphs/GraphData.py +1 -1
  39. DIRAC/Core/Utilities/Graphs/GraphUtilities.py +6 -1
  40. DIRAC/Core/Utilities/JDL.py +1 -195
  41. DIRAC/Core/Utilities/List.py +1 -124
  42. DIRAC/Core/Utilities/MySQL.py +103 -99
  43. DIRAC/Core/Utilities/Os.py +32 -1
  44. DIRAC/Core/Utilities/Platform.py +2 -107
  45. DIRAC/Core/Utilities/Proxy.py +0 -4
  46. DIRAC/Core/Utilities/ReturnValues.py +7 -252
  47. DIRAC/Core/Utilities/StateMachine.py +12 -178
  48. DIRAC/Core/Utilities/Subprocess.py +35 -14
  49. DIRAC/Core/Utilities/TimeUtilities.py +10 -253
  50. DIRAC/Core/Utilities/test/Test_JDL.py +0 -3
  51. DIRAC/Core/Utilities/test/Test_Profiler.py +20 -20
  52. DIRAC/Core/scripts/dirac_agent.py +1 -1
  53. DIRAC/Core/scripts/dirac_apptainer_exec.py +72 -46
  54. DIRAC/Core/scripts/dirac_configure.py +1 -3
  55. DIRAC/Core/scripts/dirac_install_db.py +24 -6
  56. DIRAC/Core/scripts/dirac_platform.py +1 -92
  57. DIRAC/DataManagementSystem/Agent/FTS3Agent.py +8 -7
  58. DIRAC/DataManagementSystem/Agent/RequestOperations/RemoveFile.py +7 -6
  59. DIRAC/DataManagementSystem/Client/FTS3Job.py +71 -34
  60. DIRAC/DataManagementSystem/DB/FTS3DB.py +7 -3
  61. DIRAC/DataManagementSystem/DB/FileCatalogComponents/DatasetManager/DatasetManager.py +1 -1
  62. DIRAC/DataManagementSystem/DB/FileCatalogDB.sql +9 -9
  63. DIRAC/DataManagementSystem/DB/FileCatalogWithFkAndPsDB.sql +9 -9
  64. DIRAC/DataManagementSystem/Utilities/DMSHelpers.py +6 -2
  65. DIRAC/DataManagementSystem/scripts/dirac_admin_allow_se.py +13 -8
  66. DIRAC/DataManagementSystem/scripts/dirac_admin_ban_se.py +13 -8
  67. DIRAC/DataManagementSystem/scripts/dirac_dms_create_moving_request.py +2 -0
  68. DIRAC/DataManagementSystem/scripts/dirac_dms_protocol_matrix.py +0 -1
  69. DIRAC/FrameworkSystem/Client/BundleDeliveryClient.py +2 -7
  70. DIRAC/FrameworkSystem/Client/ComponentInstaller.py +9 -4
  71. DIRAC/FrameworkSystem/Client/ProxyManagerClient.py +5 -2
  72. DIRAC/FrameworkSystem/Client/SystemAdministratorClientCLI.py +11 -6
  73. DIRAC/FrameworkSystem/ConfigTemplate.cfg +2 -0
  74. DIRAC/FrameworkSystem/DB/AuthDB.py +3 -3
  75. DIRAC/FrameworkSystem/DB/InstalledComponentsDB.py +4 -4
  76. DIRAC/FrameworkSystem/DB/ProxyDB.py +11 -3
  77. DIRAC/FrameworkSystem/DB/TokenDB.py +1 -1
  78. DIRAC/FrameworkSystem/Service/ProxyManagerHandler.py +8 -6
  79. DIRAC/FrameworkSystem/Utilities/MonitoringUtilities.py +2 -19
  80. DIRAC/FrameworkSystem/Utilities/TokenManagementUtilities.py +3 -2
  81. DIRAC/FrameworkSystem/Utilities/diracx.py +36 -14
  82. DIRAC/FrameworkSystem/private/authorization/AuthServer.py +2 -2
  83. DIRAC/FrameworkSystem/scripts/dirac_admin_update_pilot.py +18 -11
  84. DIRAC/FrameworkSystem/scripts/dirac_login.py +2 -2
  85. DIRAC/FrameworkSystem/scripts/dirac_proxy_init.py +7 -8
  86. DIRAC/Interfaces/API/Dirac.py +27 -15
  87. DIRAC/Interfaces/API/DiracAdmin.py +45 -17
  88. DIRAC/Interfaces/API/Job.py +9 -13
  89. DIRAC/Interfaces/scripts/dirac_admin_allow_site.py +12 -18
  90. DIRAC/Interfaces/scripts/dirac_admin_ban_site.py +12 -10
  91. DIRAC/Interfaces/scripts/dirac_admin_get_site_mask.py +4 -13
  92. DIRAC/Interfaces/scripts/dirac_admin_reset_job.py +3 -6
  93. DIRAC/Interfaces/scripts/dirac_wms_job_parameters.py +0 -1
  94. DIRAC/MonitoringSystem/Client/Types/WMSHistory.py +4 -0
  95. DIRAC/MonitoringSystem/Client/WebAppClient.py +26 -0
  96. DIRAC/MonitoringSystem/ConfigTemplate.cfg +9 -0
  97. DIRAC/MonitoringSystem/DB/MonitoringDB.py +6 -25
  98. DIRAC/MonitoringSystem/Service/MonitoringHandler.py +0 -33
  99. DIRAC/MonitoringSystem/Service/WebAppHandler.py +599 -0
  100. DIRAC/MonitoringSystem/private/MainReporter.py +0 -3
  101. DIRAC/ProductionSystem/DB/ProductionDB.sql +4 -4
  102. DIRAC/ProductionSystem/scripts/dirac_prod_get.py +2 -2
  103. DIRAC/ProductionSystem/scripts/dirac_prod_get_all.py +2 -2
  104. DIRAC/ProductionSystem/scripts/dirac_prod_get_trans.py +2 -3
  105. DIRAC/RequestManagementSystem/Agent/RequestExecutingAgent.py +8 -6
  106. DIRAC/RequestManagementSystem/Agent/RequestOperations/ForwardDISET.py +2 -14
  107. DIRAC/RequestManagementSystem/Client/ReqClient.py +66 -13
  108. DIRAC/RequestManagementSystem/ConfigTemplate.cfg +6 -6
  109. DIRAC/RequestManagementSystem/DB/RequestDB.py +10 -5
  110. DIRAC/RequestManagementSystem/DB/test/RMSTestScenari.py +2 -0
  111. DIRAC/RequestManagementSystem/private/RequestValidator.py +40 -46
  112. DIRAC/ResourceStatusSystem/Client/SiteStatus.py +4 -2
  113. DIRAC/ResourceStatusSystem/Command/FreeDiskSpaceCommand.py +3 -1
  114. DIRAC/ResourceStatusSystem/DB/ResourceManagementDB.py +8 -8
  115. DIRAC/ResourceStatusSystem/DB/ResourceStatusDB.py +2 -2
  116. DIRAC/ResourceStatusSystem/Utilities/CSHelpers.py +2 -31
  117. DIRAC/ResourceStatusSystem/scripts/dirac_rss_set_status.py +30 -12
  118. DIRAC/Resources/Catalog/RucioFileCatalogClient.py +195 -1
  119. DIRAC/Resources/Catalog/test/Test_RucioFileCatalogClient.py +181 -0
  120. DIRAC/Resources/Computing/AREXComputingElement.py +25 -8
  121. DIRAC/Resources/Computing/BatchSystems/Condor.py +126 -108
  122. DIRAC/Resources/Computing/BatchSystems/SLURM.py +5 -1
  123. DIRAC/Resources/Computing/BatchSystems/test/Test_SLURM.py +46 -0
  124. DIRAC/Resources/Computing/ComputingElement.py +1 -1
  125. DIRAC/Resources/Computing/HTCondorCEComputingElement.py +44 -44
  126. DIRAC/Resources/Computing/InProcessComputingElement.py +4 -2
  127. DIRAC/Resources/Computing/LocalComputingElement.py +1 -18
  128. DIRAC/Resources/Computing/SSHBatchComputingElement.py +1 -17
  129. DIRAC/Resources/Computing/SSHComputingElement.py +1 -18
  130. DIRAC/Resources/Computing/SingularityComputingElement.py +19 -5
  131. DIRAC/Resources/Computing/test/Test_HTCondorCEComputingElement.py +67 -49
  132. DIRAC/Resources/Computing/test/Test_PoolComputingElement.py +2 -1
  133. DIRAC/Resources/IdProvider/CheckInIdProvider.py +13 -0
  134. DIRAC/Resources/IdProvider/IdProviderFactory.py +11 -3
  135. DIRAC/Resources/MessageQueue/StompMQConnector.py +1 -1
  136. DIRAC/Resources/Storage/GFAL2_StorageBase.py +24 -15
  137. DIRAC/Resources/Storage/OccupancyPlugins/WLCGAccountingHTTPJson.py +1 -3
  138. DIRAC/Resources/Storage/StorageBase.py +4 -2
  139. DIRAC/Resources/Storage/StorageElement.py +6 -7
  140. DIRAC/StorageManagementSystem/DB/StorageManagementDB.sql +2 -2
  141. DIRAC/TransformationSystem/Agent/TaskManagerAgentBase.py +10 -16
  142. DIRAC/TransformationSystem/Agent/TransformationAgent.py +22 -1
  143. DIRAC/TransformationSystem/Agent/TransformationCleaningAgent.py +16 -16
  144. DIRAC/TransformationSystem/Client/TaskManager.py +2 -4
  145. DIRAC/TransformationSystem/Client/Transformation.py +6 -7
  146. DIRAC/TransformationSystem/Client/TransformationClient.py +21 -11
  147. DIRAC/TransformationSystem/Client/Utilities.py +9 -0
  148. DIRAC/TransformationSystem/DB/TransformationDB.py +11 -14
  149. DIRAC/TransformationSystem/DB/TransformationDB.sql +9 -9
  150. DIRAC/TransformationSystem/Service/TransformationManagerHandler.py +0 -333
  151. DIRAC/TransformationSystem/Utilities/ReplicationCLIParameters.py +3 -3
  152. DIRAC/TransformationSystem/Utilities/TransformationInfo.py +7 -5
  153. DIRAC/TransformationSystem/scripts/dirac_production_runjoblocal.py +2 -4
  154. DIRAC/TransformationSystem/test/Test_TransformationInfo.py +22 -15
  155. DIRAC/TransformationSystem/test/Test_replicationTransformation.py +5 -6
  156. DIRAC/Workflow/Modules/test/Test_Modules.py +5 -0
  157. DIRAC/WorkloadManagementSystem/Agent/JobAgent.py +38 -26
  158. DIRAC/WorkloadManagementSystem/Agent/JobCleaningAgent.py +12 -8
  159. DIRAC/WorkloadManagementSystem/Agent/PilotSyncAgent.py +4 -3
  160. DIRAC/WorkloadManagementSystem/Agent/PushJobAgent.py +13 -13
  161. DIRAC/WorkloadManagementSystem/Agent/SiteDirector.py +18 -14
  162. DIRAC/WorkloadManagementSystem/Agent/StalledJobAgent.py +18 -51
  163. DIRAC/WorkloadManagementSystem/Agent/StatesAccountingAgent.py +41 -1
  164. DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_JobAgent.py +45 -4
  165. DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_JobCleaningAgent.py +7 -9
  166. DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_PushJobAgent.py +1 -0
  167. DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_SiteDirector.py +9 -2
  168. DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_StalledJobAgent.py +4 -5
  169. DIRAC/WorkloadManagementSystem/Client/DownloadInputData.py +9 -9
  170. DIRAC/WorkloadManagementSystem/Client/InputDataResolution.py +6 -6
  171. DIRAC/WorkloadManagementSystem/Client/JobMonitoringClient.py +10 -11
  172. DIRAC/WorkloadManagementSystem/Client/JobReport.py +1 -1
  173. DIRAC/WorkloadManagementSystem/Client/JobState/CachedJobState.py +3 -0
  174. DIRAC/WorkloadManagementSystem/Client/JobState/JobManifest.py +32 -261
  175. DIRAC/WorkloadManagementSystem/Client/JobState/JobState.py +6 -0
  176. DIRAC/WorkloadManagementSystem/Client/JobStateUpdateClient.py +3 -0
  177. DIRAC/WorkloadManagementSystem/Client/JobStatus.py +8 -152
  178. DIRAC/WorkloadManagementSystem/Client/PoolXMLSlice.py +12 -19
  179. DIRAC/WorkloadManagementSystem/Client/SandboxStoreClient.py +25 -38
  180. DIRAC/WorkloadManagementSystem/Client/WMSClient.py +2 -3
  181. DIRAC/WorkloadManagementSystem/Client/test/Test_Client_DownloadInputData.py +29 -0
  182. DIRAC/WorkloadManagementSystem/ConfigTemplate.cfg +4 -8
  183. DIRAC/WorkloadManagementSystem/DB/JobDB.py +89 -132
  184. DIRAC/WorkloadManagementSystem/DB/JobDB.sql +8 -8
  185. DIRAC/WorkloadManagementSystem/DB/JobDBUtils.py +18 -147
  186. DIRAC/WorkloadManagementSystem/DB/JobLoggingDB.py +19 -6
  187. DIRAC/WorkloadManagementSystem/DB/JobParametersDB.py +9 -9
  188. DIRAC/WorkloadManagementSystem/DB/PilotAgentsDB.py +16 -5
  189. DIRAC/WorkloadManagementSystem/DB/PilotAgentsDB.sql +3 -3
  190. DIRAC/WorkloadManagementSystem/DB/SandboxMetadataDB.py +44 -82
  191. DIRAC/WorkloadManagementSystem/DB/StatusUtils.py +125 -0
  192. DIRAC/WorkloadManagementSystem/DB/tests/Test_JobDB.py +1 -1
  193. DIRAC/WorkloadManagementSystem/DB/tests/Test_StatusUtils.py +28 -0
  194. DIRAC/WorkloadManagementSystem/Executor/JobSanity.py +5 -4
  195. DIRAC/WorkloadManagementSystem/Executor/JobScheduling.py +4 -0
  196. DIRAC/WorkloadManagementSystem/FutureClient/JobStateUpdateClient.py +75 -33
  197. DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapper.py +22 -11
  198. DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapperTemplate.py +9 -10
  199. DIRAC/WorkloadManagementSystem/JobWrapper/test/Test_JobWrapper.py +60 -10
  200. DIRAC/WorkloadManagementSystem/JobWrapper/test/Test_JobWrapperTemplate.py +4 -0
  201. DIRAC/WorkloadManagementSystem/Service/JobManagerHandler.py +33 -154
  202. DIRAC/WorkloadManagementSystem/Service/JobMonitoringHandler.py +5 -323
  203. DIRAC/WorkloadManagementSystem/Service/JobStateUpdateHandler.py +0 -16
  204. DIRAC/WorkloadManagementSystem/Service/PilotManagerHandler.py +6 -103
  205. DIRAC/WorkloadManagementSystem/Service/SandboxStoreHandler.py +7 -53
  206. DIRAC/WorkloadManagementSystem/Service/WMSAdministratorHandler.py +16 -79
  207. DIRAC/WorkloadManagementSystem/Service/WMSUtilities.py +4 -18
  208. DIRAC/WorkloadManagementSystem/Utilities/JobModel.py +28 -209
  209. DIRAC/WorkloadManagementSystem/Utilities/JobParameters.py +65 -3
  210. DIRAC/WorkloadManagementSystem/Utilities/JobStatusUtility.py +2 -64
  211. DIRAC/WorkloadManagementSystem/Utilities/ParametricJob.py +7 -171
  212. DIRAC/WorkloadManagementSystem/Utilities/PilotCStoJSONSynchronizer.py +73 -7
  213. DIRAC/WorkloadManagementSystem/Utilities/PilotWrapper.py +41 -11
  214. DIRAC/WorkloadManagementSystem/Utilities/RemoteRunner.py +16 -0
  215. DIRAC/WorkloadManagementSystem/Utilities/Utils.py +36 -1
  216. DIRAC/WorkloadManagementSystem/Utilities/jobAdministration.py +15 -0
  217. DIRAC/WorkloadManagementSystem/Utilities/test/Test_JobModel.py +1 -15
  218. DIRAC/WorkloadManagementSystem/Utilities/test/Test_ParametricJob.py +45 -128
  219. DIRAC/WorkloadManagementSystem/Utilities/test/Test_PilotWrapper.py +16 -0
  220. DIRAC/WorkloadManagementSystem/scripts/dirac_jobexec.py +7 -2
  221. DIRAC/WorkloadManagementSystem/scripts/dirac_wms_pilot_job_info.py +1 -1
  222. DIRAC/__init__.py +62 -60
  223. DIRAC/tests/Utilities/testJobDefinitions.py +22 -28
  224. {DIRAC-9.0.0a42.dist-info → dirac-9.0.7.dist-info}/METADATA +8 -5
  225. {DIRAC-9.0.0a42.dist-info → dirac-9.0.7.dist-info}/RECORD +229 -228
  226. {DIRAC-9.0.0a42.dist-info → dirac-9.0.7.dist-info}/WHEEL +1 -1
  227. {DIRAC-9.0.0a42.dist-info → dirac-9.0.7.dist-info}/entry_points.txt +0 -3
  228. DIRAC/Core/Utilities/test/Test_List.py +0 -150
  229. DIRAC/Core/Utilities/test/Test_Time.py +0 -88
  230. DIRAC/Resources/Computing/PilotBundle.py +0 -70
  231. DIRAC/TransformationSystem/scripts/dirac_transformation_archive.py +0 -30
  232. DIRAC/TransformationSystem/scripts/dirac_transformation_clean.py +0 -30
  233. DIRAC/TransformationSystem/scripts/dirac_transformation_remove_output.py +0 -30
  234. DIRAC/WorkloadManagementSystem/Utilities/test/Test_JobManager.py +0 -58
  235. {DIRAC-9.0.0a42.dist-info → dirac-9.0.7.dist-info/licenses}/LICENSE +0 -0
  236. {DIRAC-9.0.0a42.dist-info → dirac-9.0.7.dist-info}/top_level.txt +0 -0
@@ -5,34 +5,36 @@
5
5
  and the current resource status that is used for matching.
6
6
  """
7
7
  import os
8
- import sys
9
8
  import re
9
+ import sys
10
10
  import time
11
+ from pathlib import Path
11
12
 
12
13
  from diraccfg import CFG
13
14
 
14
- from DIRAC import S_OK, S_ERROR, gConfig, rootPath, siteName
15
+ from DIRAC import S_ERROR, S_OK, gConfig, rootPath, siteName
15
16
  from DIRAC.ConfigurationSystem.Client.Helpers.Registry import getDNForUsername
16
- from DIRAC.Core.Utilities.ClassAd.ClassAdLight import ClassAd
17
17
  from DIRAC.Core.Base.AgentModule import AgentModule
18
- from DIRAC.Core.Security.ProxyInfo import getProxyInfo
19
18
  from DIRAC.Core.Security import Properties
19
+ from DIRAC.Core.Security.ProxyFile import writeChainToTemporaryFile
20
+ from DIRAC.Core.Security.ProxyInfo import getProxyInfo
20
21
  from DIRAC.Core.Utilities import DErrno
22
+ from DIRAC.Core.Utilities.CGroups2 import CG2Manager
23
+ from DIRAC.Core.Utilities.ClassAd.ClassAdLight import ClassAd
21
24
  from DIRAC.Core.Utilities.ObjectLoader import ObjectLoader
22
25
  from DIRAC.FrameworkSystem.Client.ProxyManagerClient import gProxyManager
23
- from DIRAC.Resources.Computing.BatchSystems.TimeLeft.TimeLeft import TimeLeft
24
- from DIRAC.Resources.Computing.ComputingElementFactory import ComputingElementFactory
25
- from DIRAC.RequestManagementSystem.Client.Request import Request
26
26
  from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient
27
+ from DIRAC.RequestManagementSystem.Client.Request import Request
27
28
  from DIRAC.RequestManagementSystem.private.RequestValidator import RequestValidator
28
- from DIRAC.WorkloadManagementSystem.Client.MatcherClient import MatcherClient
29
- from DIRAC.WorkloadManagementSystem.Client.PilotManagerClient import PilotManagerClient
29
+ from DIRAC.Resources.Computing.BatchSystems.TimeLeft.TimeLeft import TimeLeft
30
+ from DIRAC.Resources.Computing.ComputingElementFactory import ComputingElementFactory
31
+ from DIRAC.WorkloadManagementSystem.Client import JobStatus, PilotStatus
30
32
  from DIRAC.WorkloadManagementSystem.Client.JobManagerClient import JobManagerClient
31
33
  from DIRAC.WorkloadManagementSystem.Client.JobMonitoringClient import JobMonitoringClient
32
34
  from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport
33
- from DIRAC.WorkloadManagementSystem.Client import JobStatus
35
+ from DIRAC.WorkloadManagementSystem.Client.MatcherClient import MatcherClient
36
+ from DIRAC.WorkloadManagementSystem.Client.PilotManagerClient import PilotManagerClient
34
37
  from DIRAC.WorkloadManagementSystem.Utilities.Utils import createJobWrapper
35
- from DIRAC.WorkloadManagementSystem.Client import PilotStatus
36
38
 
37
39
 
38
40
  class JobAgent(AgentModule):
@@ -134,6 +136,14 @@ class JobAgent(AgentModule):
134
136
 
135
137
  # Utilities
136
138
  self.timeLeftUtil = TimeLeft()
139
+
140
+ # Some innerCEs may want to make use of CGroup2 support, so we prepare it globally here
141
+ res = CG2Manager().setUp()
142
+ if res["OK"]:
143
+ self.log.info("CGroup2 support configured successfully.")
144
+ else:
145
+ self.log.info("CGroup2 support unavailable:", res["Message"])
146
+
137
147
  return S_OK()
138
148
 
139
149
  def _initializeComputingElement(self, localCE):
@@ -226,7 +236,6 @@ class JobAgent(AgentModule):
226
236
  jobGroup = matcherInfo["Group"]
227
237
  owner = matcherInfo["Owner"]
228
238
  ceDict = matcherInfo["CEDict"]
229
- matchTime = matcherInfo["matchTime"]
230
239
 
231
240
  optimizerParams = {}
232
241
  for key in matcherInfo:
@@ -253,9 +262,6 @@ class JobAgent(AgentModule):
253
262
  self.log.verbose("Job request successful: \n", jobRequest["Value"])
254
263
  self.log.info("Received", f"JobID={jobID}, JobType={jobType}, Owner={owner}, JobGroup={jobGroup}")
255
264
  self.jobCount += 1
256
- self.jobs[jobID]["JobReport"].setJobParameter(
257
- par_name="MatcherServiceTime", par_value=str(matchTime), sendFlag=False
258
- )
259
265
 
260
266
  self.jobs[jobID]["JobReport"].setJobStatus(minorStatus="Job Received by Agent", sendFlag=False)
261
267
  result_setupProxy = self._setupProxy(owner, jobGroup)
@@ -476,8 +482,6 @@ class JobAgent(AgentModule):
476
482
 
477
483
  proxyChain = ret["Value"]["chain"]
478
484
  if "groupProperties" not in ret["Value"]:
479
- print(ret["Value"])
480
- print(proxyChain.dumpAllToString())
481
485
  self.log.error("Invalid Proxy", "Group has no properties defined")
482
486
  return S_ERROR("Proxy has no group properties defined")
483
487
 
@@ -539,7 +543,7 @@ class JobAgent(AgentModule):
539
543
  jobRequest = MatcherClient().requestJob(ceDict)
540
544
  matchTime = time.time() - start
541
545
 
542
- self.log.info("MatcherTime", f"= {matchTime:.2f} (s)")
546
+ self.log.verbose("MatcherTime", f"= {matchTime:.2f} (s)")
543
547
  if jobRequest["OK"]:
544
548
  jobRequest["Value"]["matchTime"] = matchTime
545
549
  jobRequest["Value"]["CEDict"] = ceDict
@@ -626,13 +630,15 @@ class JobAgent(AgentModule):
626
630
 
627
631
  self.log.info("Submitting JobWrapper", f"{os.path.basename(wrapperFile)} to {self.ceName}CE")
628
632
 
629
- # Pass proxy to the CE
630
- proxy = proxyChain.dumpAllToString()
631
- if not proxy["OK"]:
632
- self.log.error("Invalid proxy", proxy)
633
- return S_ERROR("Payload Proxy Not Found")
633
+ # Pass proxy to the CE, writing it to a temporary file to ensure the DiracX token is included
634
+ retVal = writeChainToTemporaryFile(proxyChain)
635
+ if not retVal["OK"]:
636
+ self.log.error("Invalid proxy", retVal["Message"])
637
+ return S_ERROR("Failed to write proxy to temporary file")
638
+ proxyLocation = Path(retVal["Value"])
639
+ payloadProxy = proxyLocation.read_text()
640
+ proxyLocation.unlink()
634
641
 
635
- payloadProxy = proxy["Value"]
636
642
  try:
637
643
  result = self.computingElement.submitJob(
638
644
  wrapperFile,
@@ -651,7 +657,7 @@ class JobAgent(AgentModule):
651
657
  self.log.exception("Exception occurred when submitting", f"JobID: {jobID}")
652
658
  taskID = 0
653
659
  # We create a S_ERROR from the exception to compute it as a normal error
654
- self.computingElement.taskResults[taskID] = S_ERROR(unexpectedSubmitException)
660
+ self.computingElement.taskResults[taskID] = S_ERROR(str(unexpectedSubmitException))
655
661
  self.jobs[jobID]["TaskID"] = taskID
656
662
  return S_OK()
657
663
 
@@ -683,7 +689,13 @@ class JobAgent(AgentModule):
683
689
  # Here we iterate over a copy of the keys because we are modifying the dictionary within the loop
684
690
  for jobID in list(self.jobs.keys()):
685
691
  taskID = self.jobs[jobID].get("TaskID")
686
- if taskID is None or taskID not in self.computingElement.taskResults:
692
+ if taskID is None:
693
+ # This generally means that there was an error before the submission
694
+ # and the TaskID was not set and will never be.
695
+ self.log.info("No taskID found for job", jobID)
696
+ del self.jobs[jobID]
697
+ continue
698
+ if taskID not in self.computingElement.taskResults:
687
699
  continue
688
700
 
689
701
  result = self.computingElement.taskResults[taskID]
@@ -35,10 +35,12 @@ from DIRAC.RequestManagementSystem.Client.Operation import Operation
35
35
  from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient
36
36
  from DIRAC.RequestManagementSystem.Client.Request import Request
37
37
  from DIRAC.WorkloadManagementSystem.Client import JobStatus
38
- from DIRAC.WorkloadManagementSystem.Client.JobMonitoringClient import JobMonitoringClient
39
- from DIRAC.WorkloadManagementSystem.Client.SandboxStoreClient import SandboxStoreClient
40
38
  from DIRAC.WorkloadManagementSystem.Client.WMSClient import WMSClient
41
39
  from DIRAC.WorkloadManagementSystem.DB.JobDB import JobDB
40
+ from DIRAC.WorkloadManagementSystem.DB.SandboxMetadataDB import SandboxMetadataDB
41
+ from DIRAC.WorkloadManagementSystem.Service.JobPolicy import RIGHT_DELETE
42
+ from DIRAC.WorkloadManagementSystem.DB.StatusUtils import kill_delete_jobs
43
+ from DIRAC.WorkloadManagementSystem.Utilities.JobParameters import getJobParameters
42
44
 
43
45
 
44
46
  class JobCleaningAgent(AgentModule):
@@ -152,8 +154,9 @@ class JobCleaningAgent(AgentModule):
152
154
  return S_OK()
153
155
 
154
156
  self.log.info("Unassigning sandboxes from soon to be deleted jobs", f"({len(jobList)})")
155
- result = SandboxStoreClient(useCertificates=True).unassignJobs(jobList)
156
- if not result["OK"]:
157
+
158
+ entitiesList = [f"Job:{jobId}" for jobId in jobList]
159
+ if not (result := SandboxMetadataDB().unassignEntities(entitiesList))["OK"]:
157
160
  self.log.error("Cannot unassign jobs to sandboxes", result["Message"])
158
161
  return result
159
162
 
@@ -229,14 +232,14 @@ class JobCleaningAgent(AgentModule):
229
232
  if not res["OK"]:
230
233
  self.log.error("No DN found", f"for {user}")
231
234
  return res
232
- wmsClient = WMSClient(useCertificates=True, delegatedDN=res["Value"][0], delegatedGroup=ownerGroup)
233
235
  if remove:
236
+ wmsClient = WMSClient(useCertificates=True, delegatedDN=res["Value"][0], delegatedGroup=ownerGroup)
234
237
  result = wmsClient.removeJob(jobsList)
235
238
  else:
236
- result = wmsClient.deleteJob(jobsList)
239
+ result = kill_delete_jobs(RIGHT_DELETE, jobsList)
237
240
  if not result["OK"]:
238
241
  self.log.error(
239
- "Could not {'remove' if remove else 'delete'} jobs",
242
+ f"Could not {'remove' if remove else 'delete'} jobs",
240
243
  f"for {user} : {ownerGroup} (n={len(jobsList)}) : {result['Message']}",
241
244
  )
242
245
  fail = True
@@ -293,7 +296,8 @@ class JobCleaningAgent(AgentModule):
293
296
  failed = {}
294
297
  successful = {}
295
298
 
296
- result = JobMonitoringClient().getJobParameters(jobIDList, ["OutputSandboxLFN"])
299
+ jobIDs = [int(jobID) for jobID in jobIDList]
300
+ result = getJobParameters(jobIDs, "OutputSandboxLFN")
297
301
  if not result["OK"]:
298
302
  return result
299
303
  osLFNDict = result["Value"]
@@ -1,4 +1,4 @@
1
- """ This agent syncs CS and pilot files to a web server of your choice
1
+ """This agent syncs CS and pilot files to a web server of your choice
2
2
 
3
3
  .. literalinclude:: ../ConfigTemplate.cfg
4
4
  :start-after: ##BEGIN PilotSyncAgent
@@ -7,6 +7,7 @@
7
7
  :caption: PilotsSyncAgent options
8
8
 
9
9
  """
10
+
10
11
  import os
11
12
  import json
12
13
  import shutil
@@ -38,8 +39,8 @@ class PilotSyncAgent(AgentModule):
38
39
  self.workingDirectory = self.am_getOption("WorkDirectory")
39
40
  self.saveDir = self.am_getOption("SaveDirectory", self.saveDir)
40
41
  self.uploadLocations = self.am_getOption("UploadLocations", self.uploadLocations)
41
- includeMasterCS = self.am_getOption("IncludeMasterCS", self.includeMasterCS)
42
- if isinstance(includeMasterCS, str) and includeMasterCS.lower() in ["n", "no", "false"]:
42
+ self.includeMasterCS = self.am_getOption("IncludeMasterCS", self.includeMasterCS)
43
+ if isinstance(self.includeMasterCS, str) and self.includeMasterCS.lower() in ["n", "no", "false"]:
43
44
  self.includeMasterCS = False
44
45
 
45
46
  self.certAndKeyLocation = getHostCertificateAndKeyLocation()
@@ -12,16 +12,14 @@
12
12
  import hashlib
13
13
  import json
14
14
  import os
15
- from pathlib import Path
16
15
  import random
17
16
  import shutil
18
17
  import sys
19
- from collections import defaultdict
20
18
  import time
19
+ from collections import defaultdict
20
+ from pathlib import Path
21
21
 
22
- from diraccfg import CFG
23
-
24
- from DIRAC import gConfig, S_OK, S_ERROR
22
+ from DIRAC import S_ERROR, S_OK, gConfig
25
23
  from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations
26
24
  from DIRAC.ConfigurationSystem.Client.Helpers.Resources import getQueues
27
25
  from DIRAC.Core.Utilities import DErrno
@@ -30,6 +28,7 @@ from DIRAC.Core.Utilities.Proxy import executeWithUserProxy
30
28
  from DIRAC.Core.Utilities.Version import getVersion
31
29
  from DIRAC.FrameworkSystem.Client.ProxyManagerClient import gProxyManager
32
30
  from DIRAC.Resources.Computing import ComputingElement
31
+ from DIRAC.WorkloadManagementSystem.Agent.JobAgent import JobAgent
33
32
  from DIRAC.WorkloadManagementSystem.Client import JobMinorStatus, JobStatus, PilotStatus
34
33
  from DIRAC.WorkloadManagementSystem.Client.JobMonitoringClient import JobMonitoringClient
35
34
  from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport
@@ -41,11 +40,10 @@ from DIRAC.WorkloadManagementSystem.JobWrapper.JobWrapperUtilities import (
41
40
  resolveInputData,
42
41
  transferInputSandbox,
43
42
  )
44
- from DIRAC.WorkloadManagementSystem.Utilities.QueueUtilities import getQueuesResolved
45
- from DIRAC.WorkloadManagementSystem.Agent.JobAgent import JobAgent
46
- from DIRAC.WorkloadManagementSystem.Utilities.Utils import createJobWrapper
47
43
  from DIRAC.WorkloadManagementSystem.private.ConfigHelper import findGenericPilotCredentials
44
+ from DIRAC.WorkloadManagementSystem.Utilities.JobParameters import getJobParameters
48
45
  from DIRAC.WorkloadManagementSystem.Utilities.QueueUtilities import getQueuesResolved
46
+ from DIRAC.WorkloadManagementSystem.Utilities.Utils import createJobWrapper
49
47
 
50
48
  MAX_JOBS_MANAGED = 100
51
49
 
@@ -230,6 +228,12 @@ class PushJobAgent(JobAgent):
230
228
  return result
231
229
  pilotProxy = result["Value"]
232
230
 
231
+ # Dump the proxy to a file to get DiracX token (it's later used by DiracX)
232
+ result = gProxyManager.dumpProxyToFile(pilotProxy)
233
+ if not result["OK"]:
234
+ return result
235
+ os.environ["X509_USER_PROXY"] = result["Value"]
236
+
233
237
  for queueName, queueDictionary in queueDictItems:
234
238
  # Make sure there is no problem with the queue before trying to submit
235
239
  if not self._allowedToSubmit(queueName):
@@ -281,7 +285,6 @@ class PushJobAgent(JobAgent):
281
285
  jobGroup = matcherInfo["Group"]
282
286
  owner = matcherInfo["Owner"]
283
287
  ceDict = matcherInfo["CEDict"]
284
- matchTime = matcherInfo["matchTime"]
285
288
 
286
289
  optimizerParams = {}
287
290
  for key in matcherInfo:
@@ -309,9 +312,6 @@ class PushJobAgent(JobAgent):
309
312
  self.log.verbose("Job request successful: \n", jobRequest["Value"])
310
313
  self.log.info("Received", f"JobID={jobID}, JobType={jobType}, Owner={owner}, JobGroup={jobGroup}")
311
314
 
312
- self.jobs[jobID]["JobReport"].setJobParameter(
313
- par_name="MatcherServiceTime", par_value=str(matchTime), sendFlag=False
314
- )
315
315
  self.jobs[jobID]["JobReport"].setJobStatus(
316
316
  status=JobStatus.MATCHED, minorStatus="Job Received by Agent", sendFlag=False
317
317
  )
@@ -734,7 +734,7 @@ class PushJobAgent(JobAgent):
734
734
  return S_OK()
735
735
 
736
736
  # Get their parameters
737
- if not (result := self.jobMonitoring.getJobParameters(jobs, ["GridCE", "TaskID", "Stamp"]))["OK"]:
737
+ if not (result := getJobParameters(jobs, ["GridCE", "TaskID", "Stamp"]))["OK"]:
738
738
  self.log.error("Failed to get the list of taskIDs", result["Message"])
739
739
  return result
740
740
 
@@ -1,4 +1,4 @@
1
- """ The Site Director is an agent performing pilot job submission to particular sites/Computing Elements.
1
+ """The Site Director is an agent performing pilot job submission to particular sites/Computing Elements.
2
2
 
3
3
  .. literalinclude:: ../ConfigTemplate.cfg
4
4
  :start-after: ##BEGIN SiteDirector
@@ -7,6 +7,7 @@
7
7
  :caption: SiteDirector options
8
8
 
9
9
  """
10
+
10
11
  import datetime
11
12
  import os
12
13
  from collections import defaultdict
@@ -14,7 +15,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
14
15
  from typing import Any
15
16
 
16
17
  import DIRAC
17
- from DIRAC import S_ERROR, S_OK, gConfig
18
+ from DIRAC import S_ERROR, S_OK
18
19
  from DIRAC.AccountingSystem.Client.DataStoreClient import gDataStoreClient
19
20
  from DIRAC.AccountingSystem.Client.Types.Pilot import Pilot as PilotAccounting
20
21
  from DIRAC.AccountingSystem.Client.Types.PilotSubmission import (
@@ -147,10 +148,10 @@ class SiteDirector(AgentModule):
147
148
  self.sendSubmissionAccounting = True
148
149
 
149
150
  # Get the site description dictionary
150
- siteNames = self.am_getOption("Site", [])
151
- ceTypes = self.am_getOption("CETypes", [])
152
- ces = self.am_getOption("CEs", [])
153
- tags = self.am_getOption("Tags", [])
151
+ siteNames = self.am_getOption("Site")
152
+ ceTypes = self.am_getOption("CETypes")
153
+ ces = self.am_getOption("CEs")
154
+ tags = self.am_getOption("Tags")
154
155
 
155
156
  # Display options used
156
157
  self.log.always("VO:", self.vo)
@@ -168,7 +169,7 @@ class SiteDirector(AgentModule):
168
169
  self.log.always("MaxPilotsToSubmit:", self.maxPilotsToSubmit)
169
170
 
170
171
  # Build the dictionary of queues that are going to be used: self.queueDict
171
- if not (result := self._buildQueueDict(siteNames, ceTypes, ces, tags))["OK"]:
172
+ if not (result := self._buildQueueDict(siteNames, ces, ceTypes, tags))["OK"]:
172
173
  return result
173
174
 
174
175
  # Stop the execution if there is no usable queue
@@ -229,12 +230,8 @@ class SiteDirector(AgentModule):
229
230
  site = self.queueDict[queueName]["Site"]
230
231
  ce = self.queueDict[queueName]["CEName"]
231
232
 
232
- # Check the status of the Site
233
- if site in siteMaskList:
234
- continue
235
-
236
- # Check the status of the CE (only for RSS=Active)
237
- if ce not in ceMaskList:
233
+ # Check the status of the Site and CE
234
+ if site in siteMaskList and ce in ceMaskList:
238
235
  continue
239
236
 
240
237
  self.log.warn("Queue not considered because not usable:", queueName)
@@ -580,7 +577,7 @@ class SiteDirector(AgentModule):
580
577
  pilotOptions = []
581
578
 
582
579
  pilotOptions = " ".join(pilotOptions)
583
- self.log.verbose(f"pilotOptions: {pilotOptions}")
580
+ self.log.verbose(f"{pilotOptions=}")
584
581
 
585
582
  # if a global workingDirectory is defined for the CEType (like HTCondor)
586
583
  # use it (otherwise the __cleanup done by HTCondor will be in the wrong folder !)
@@ -624,6 +621,11 @@ class SiteDirector(AgentModule):
624
621
  else:
625
622
  self.log.info("DIRAC project will be installed by pilots")
626
623
 
624
+ # Architecture script to use
625
+ architectureScript = opsHelper.getValue("Pilot/ArchitectureScript", "")
626
+ if architectureScript:
627
+ pilotOptions.append(f"--architectureScript={architectureScript}")
628
+
627
629
  # Preinstalled environment or list of CVMFS locations defined ?
628
630
  preinstalledEnv = opsHelper.getValue("Pilot/PreinstalledEnv", "")
629
631
  preinstalledEnvPrefix = opsHelper.getValue("Pilot/PreinstalledEnvPrefix", "")
@@ -695,6 +697,8 @@ class SiteDirector(AgentModule):
695
697
  :returns: file name of the pilot wrapper created
696
698
  """
697
699
 
700
+ pilotFilesCompressedEncodedDict = None
701
+
698
702
  try:
699
703
  pilotFilesCompressedEncodedDict = getPilotFilesCompressedEncodedDict([], proxy)
700
704
  except Exception as be:
@@ -14,18 +14,18 @@ import datetime
14
14
  from DIRAC import S_ERROR, S_OK, gConfig
15
15
  from DIRAC.AccountingSystem.Client.Types.Job import Job
16
16
  from DIRAC.ConfigurationSystem.Client.Helpers import cfgPath
17
- from DIRAC.ConfigurationSystem.Client.Helpers.Registry import getDNForUsername
18
17
  from DIRAC.Core.Base.AgentModule import AgentModule
19
18
  from DIRAC.Core.Utilities import DErrno
20
19
  from DIRAC.Core.Utilities.ClassAd.ClassAdLight import ClassAd
21
20
  from DIRAC.Core.Utilities.TimeUtilities import fromString, second, toEpoch
22
21
  from DIRAC.WorkloadManagementSystem.Client import JobMinorStatus, JobStatus
23
- from DIRAC.WorkloadManagementSystem.Client.JobManagerClient import JobManagerClient
24
- from DIRAC.WorkloadManagementSystem.Client.JobMonitoringClient import JobMonitoringClient
25
- from DIRAC.WorkloadManagementSystem.Client.PilotManagerClient import PilotManagerClient
26
- from DIRAC.WorkloadManagementSystem.Client.WMSClient import WMSClient
27
22
  from DIRAC.WorkloadManagementSystem.DB.JobDB import JobDB
28
23
  from DIRAC.WorkloadManagementSystem.DB.JobLoggingDB import JobLoggingDB
24
+ from DIRAC.WorkloadManagementSystem.DB.PilotAgentsDB import PilotAgentsDB
25
+ from DIRAC.WorkloadManagementSystem.Service.JobPolicy import RIGHT_KILL
26
+ from DIRAC.WorkloadManagementSystem.DB.StatusUtils import kill_delete_jobs
27
+ from DIRAC.WorkloadManagementSystem.Utilities.JobParameters import getJobParameters
28
+ from DIRAC.WorkloadManagementSystem.Utilities.Utils import rescheduleJobs
29
29
 
30
30
 
31
31
  class StalledJobAgent(AgentModule):
@@ -235,7 +235,7 @@ class StalledJobAgent(AgentModule):
235
235
  # Set the jobs Failed, send them a kill signal in case they are not really dead
236
236
  # and send accounting info
237
237
  if setFailed:
238
- res = self._sendKillCommand(jobID)
238
+ res = kill_delete_jobs(RIGHT_KILL, [jobID], nonauthJobList=[], force=True)
239
239
  if not res["OK"]:
240
240
  self.log.error("Failed to kill job", jobID)
241
241
 
@@ -254,15 +254,15 @@ class StalledJobAgent(AgentModule):
254
254
 
255
255
  def _getJobPilotStatus(self, jobID):
256
256
  """Get the job pilot status."""
257
- result = JobMonitoringClient().getJobParameter(jobID, "Pilot_Reference")
257
+ result = getJobParameters([jobID], "Pilot_Reference")
258
258
  if not result["OK"]:
259
259
  return result
260
- pilotReference = result["Value"].get("Pilot_Reference", "Unknown")
261
- if pilotReference == "Unknown":
260
+ pilotReference = result["Value"].get("Pilot_Reference")
261
+ if not pilotReference:
262
262
  # There is no pilot reference, hence its status is unknown
263
263
  return S_OK("NoPilot")
264
264
 
265
- result = PilotManagerClient().getPilotInfo(pilotReference)
265
+ result = PilotAgentsDB().getPilotInfo(pilotReference)
266
266
  if not result["OK"]:
267
267
  if DErrno.cmpError(result, DErrno.EWMSNOPILOT):
268
268
  self.log.warn("No pilot found", f"for job {jobID}: {result['Message']}")
@@ -389,11 +389,11 @@ class StalledJobAgent(AgentModule):
389
389
  if lastHeartBeatTime is not None and lastHeartBeatTime > endTime:
390
390
  endTime = lastHeartBeatTime
391
391
 
392
- result = JobMonitoringClient().getJobParameter(jobID, "CPUNormalizationFactor")
393
- if not result["OK"] or not result["Value"]:
392
+ result = getJobParameters([jobID], "CPUNormalizationFactor")
393
+ if not result["OK"] or not result["Value"] or not result["Value"].get("CPUNormalizationFactor"):
394
394
  self.log.error(
395
395
  "Error getting Job Parameter CPUNormalizationFactor, setting 0",
396
- result.get("Message", "No such value"),
396
+ result.get("Message"),
397
397
  )
398
398
  cpuNormalization = 0.0
399
399
  else:
@@ -518,8 +518,7 @@ class StalledJobAgent(AgentModule):
518
518
  return startTime, endTime
519
519
 
520
520
  def _kickStuckJobs(self):
521
- """Reschedule jobs stuck in initialization status Rescheduled,
522
- Matched."""
521
+ """Reschedule jobs stuck in initialization status Rescheduled, Matched."""
523
522
 
524
523
  message = ""
525
524
 
@@ -530,17 +529,12 @@ class StalledJobAgent(AgentModule):
530
529
  return result
531
530
 
532
531
  jobIDs = result["Value"]
533
- jobManagerClient = JobManagerClient()
534
532
  if jobIDs:
535
533
  self.log.info(f"Rescheduling {len(jobIDs)} jobs stuck in {JobStatus.MATCHED} status")
536
- result = jobManagerClient.rescheduleJob(jobIDs)
534
+ result = rescheduleJobs(jobIDs)
537
535
  if not result["OK"]:
538
536
  message = f"Failed to reschedule jobs stuck in {JobStatus.MATCHED} status"
539
537
  message += "\n" + result["Message"]
540
- if "InvalidJobIDs" in result:
541
- message += "\n" + "\tInvalid job IDs: " + str(result["InvalidJobIDs"])
542
- if "NonauthorizedJobIDs" in result:
543
- message += "\n" + "\tNon authorized job IDs: " + str(result["NonauthorizedJobIDs"])
544
538
 
545
539
  checkTime = datetime.datetime.utcnow() - self.rescheduledTime * second
546
540
  result = self.jobDB.selectJobs({"Status": JobStatus.RESCHEDULED}, older=checkTime)
@@ -550,18 +544,14 @@ class StalledJobAgent(AgentModule):
550
544
 
551
545
  jobIDs = result["Value"]
552
546
  if jobIDs:
553
- self.log.info(f"Rescheduling {len(jobIDs)} jobs stuck in Rescheduled status")
554
- result = jobManagerClient.rescheduleJob(jobIDs)
547
+ self.log.info(f"Rescheduling {len(jobIDs)} jobs stuck in {JobStatus.RESCHEDULED} status")
548
+ result = rescheduleJobs(jobIDs)
555
549
  if not result["OK"]:
556
550
  message = f"Failed to reschedule jobs stuck in {JobStatus.RESCHEDULED} status"
557
551
  message += "\n" + result["Message"]
558
- if "InvalidJobIDs" in result:
559
- message += "\n" + "\tInvalid job IDs: " + str(result["InvalidJobIDs"])
560
- if "NonauthorizedJobIDs" in result:
561
- message += "\n" + "\tNon authorized job IDs: " + str(result["NonauthorizedJobIDs"])
562
552
 
563
553
  if message:
564
- return S_ERROR(message)
554
+ self.log.error(message)
565
555
  return S_OK()
566
556
 
567
557
  def _failSubmittingJobs(self):
@@ -584,26 +574,3 @@ class StalledJobAgent(AgentModule):
584
574
  continue
585
575
 
586
576
  return S_OK()
587
-
588
- def _sendKillCommand(self, job):
589
- """Send a kill signal to the job such that it cannot continue running.
590
-
591
- :param int job: ID of job to send kill command
592
- """
593
-
594
- res = self.jobDB.getJobAttribute(job, "Owner")
595
- if not res["OK"]:
596
- return res
597
- owner = res["Value"]
598
-
599
- res = self.jobDB.getJobAttribute(job, "OwnerGroup")
600
- if not res["OK"]:
601
- return res
602
- ownerGroup = res["Value"]
603
-
604
- wmsClient = WMSClient(
605
- useCertificates=True,
606
- delegatedDN=getDNForUsername(owner)["Value"][0] if owner else None,
607
- delegatedGroup=ownerGroup,
608
- )
609
- return wmsClient.killJob(job)
@@ -9,10 +9,11 @@
9
9
  """
10
10
  import datetime
11
11
 
12
- from DIRAC import S_ERROR, S_OK
12
+ from DIRAC import S_ERROR, S_OK, gConfig
13
13
  from DIRAC.AccountingSystem.Client.DataStoreClient import DataStoreClient
14
14
  from DIRAC.AccountingSystem.Client.Types.WMSHistory import WMSHistory
15
15
  from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations
16
+ from DIRAC.ConfigurationSystem.Client.Helpers.Resources import getSites
16
17
  from DIRAC.Core.Base.AgentModule import AgentModule
17
18
  from DIRAC.Core.Utilities import TimeUtilities
18
19
  from DIRAC.MonitoringSystem.Client.MonitoringReporter import MonitoringReporter
@@ -77,6 +78,8 @@ class StatesAccountingAgent(AgentModule):
77
78
  def execute(self):
78
79
  """Main execution method"""
79
80
 
81
+ site_metadata = self._getSitesMetadata()
82
+
80
83
  # on the first iteration of the agent, do nothing in order to avoid double committing after a restart
81
84
  if self.am_getModuleParam("cyclesDone") == 0:
82
85
  self.log.notice("Skipping the first iteration of the agent")
@@ -131,6 +134,16 @@ class StatesAccountingAgent(AgentModule):
131
134
 
132
135
  for backend in self.datastores:
133
136
  if backend.lower() == "monitoring":
137
+ site_name = rD["Site"]
138
+ if site_name not in site_metadata:
139
+ self.log.warn(
140
+ f"Site {site_name} not found in site metadata, using default values",
141
+ )
142
+ rD["Tier"] = "4"
143
+ rD["Type"] = site_name.split(".")[0]
144
+ else:
145
+ rD["Tier"] = site_metadata[site_name]["Tier"]
146
+ rD["Type"] = site_metadata[site_name]["Type"]
134
147
  rD["timestamp"] = int(TimeUtilities.toEpochMilliSeconds(now))
135
148
  self.datastores["Monitoring"].addRecord(rD)
136
149
 
@@ -154,3 +167,30 @@ class StatesAccountingAgent(AgentModule):
154
167
  self.log.verbose(f"Done committing WMSHistory to {backend} backend")
155
168
 
156
169
  return S_OK()
170
+
171
+ def _getSitesMetadata(self):
172
+ """Get the metadata for the sites"""
173
+ res = getSites()
174
+ if not res["OK"]:
175
+ return res
176
+ sites = res["Value"]
177
+ site_metadata = {}
178
+
179
+ for site in sites:
180
+ site_metadata[site] = {}
181
+
182
+ # Get the site metadata from the Configuration System
183
+ grid = site.split(".")[0]
184
+ res = gConfig.getOptionsDict(f"Resources/Sites/{grid}/{site}")
185
+ if not res["OK"]:
186
+ self.log.error("Failure getting options dict for site", f"{site}: {res['Message']}")
187
+ continue
188
+ siteInfoCS = res["Value"]
189
+
190
+ # The site tier is normally 1 or 2. Few VOs may define tier 3.
191
+ # If the tier is not defined, we assume it is 4, with 4 meaning "not pledged" (opportunistic).
192
+ site_metadata[site]["Tier"] = siteInfoCS.get("MoUTierLevel", "4")
193
+ # The site type is defined by the first part of the site name.
194
+ # It needs to be interpreted at the Monitoring side (e.g. in Grafana).
195
+ site_metadata[site]["Type"] = site.split(".")[0]
196
+ return site_metadata