DIRAC 9.0.0a42__py3-none-any.whl → 9.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. DIRAC/AccountingSystem/Client/AccountingCLI.py +0 -140
  2. DIRAC/AccountingSystem/Client/DataStoreClient.py +0 -13
  3. DIRAC/AccountingSystem/Client/Types/BaseAccountingType.py +0 -7
  4. DIRAC/AccountingSystem/ConfigTemplate.cfg +0 -5
  5. DIRAC/AccountingSystem/Service/DataStoreHandler.py +0 -72
  6. DIRAC/ConfigurationSystem/Client/Helpers/CSGlobals.py +0 -9
  7. DIRAC/ConfigurationSystem/Client/Helpers/Registry.py +38 -26
  8. DIRAC/ConfigurationSystem/Client/Helpers/Resources.py +11 -43
  9. DIRAC/ConfigurationSystem/Client/Helpers/test/Test_Helpers.py +0 -16
  10. DIRAC/ConfigurationSystem/Client/LocalConfiguration.py +14 -8
  11. DIRAC/ConfigurationSystem/Client/PathFinder.py +47 -8
  12. DIRAC/ConfigurationSystem/Client/SyncPlugins/CERNLDAPSyncPlugin.py +4 -1
  13. DIRAC/ConfigurationSystem/Client/VOMS2CSSynchronizer.py +32 -19
  14. DIRAC/ConfigurationSystem/Client/test/Test_PathFinder.py +41 -1
  15. DIRAC/ConfigurationSystem/private/RefresherBase.py +4 -2
  16. DIRAC/Core/Base/API.py +4 -7
  17. DIRAC/Core/Base/SQLAlchemyDB.py +1 -0
  18. DIRAC/Core/DISET/ServiceReactor.py +11 -3
  19. DIRAC/Core/DISET/private/BaseClient.py +1 -2
  20. DIRAC/Core/DISET/private/Transports/M2SSLTransport.py +9 -7
  21. DIRAC/Core/DISET/private/Transports/SSL/M2Utils.py +3 -1
  22. DIRAC/Core/LCG/GOCDBClient.py +5 -7
  23. DIRAC/Core/Security/DiracX.py +31 -17
  24. DIRAC/Core/Security/IAMService.py +5 -10
  25. DIRAC/Core/Security/Locations.py +27 -18
  26. DIRAC/Core/Security/ProxyInfo.py +9 -5
  27. DIRAC/Core/Security/VOMSService.py +2 -4
  28. DIRAC/Core/Security/m2crypto/X509Certificate.py +4 -6
  29. DIRAC/Core/Security/m2crypto/asn1_utils.py +17 -5
  30. DIRAC/Core/Security/test/test_diracx_token_from_pem.py +161 -0
  31. DIRAC/Core/Tornado/Client/ClientSelector.py +4 -1
  32. DIRAC/Core/Tornado/Server/TornadoService.py +1 -1
  33. DIRAC/Core/Utilities/CGroups2.py +328 -0
  34. DIRAC/Core/Utilities/ClassAd/ClassAdLight.py +4 -290
  35. DIRAC/Core/Utilities/DErrno.py +5 -309
  36. DIRAC/Core/Utilities/Extensions.py +10 -1
  37. DIRAC/Core/Utilities/File.py +1 -1
  38. DIRAC/Core/Utilities/Graphs/GraphData.py +1 -1
  39. DIRAC/Core/Utilities/Graphs/GraphUtilities.py +6 -1
  40. DIRAC/Core/Utilities/JDL.py +1 -195
  41. DIRAC/Core/Utilities/List.py +1 -124
  42. DIRAC/Core/Utilities/MySQL.py +103 -99
  43. DIRAC/Core/Utilities/Os.py +32 -1
  44. DIRAC/Core/Utilities/Platform.py +2 -107
  45. DIRAC/Core/Utilities/Proxy.py +0 -4
  46. DIRAC/Core/Utilities/ReturnValues.py +7 -252
  47. DIRAC/Core/Utilities/StateMachine.py +12 -178
  48. DIRAC/Core/Utilities/Subprocess.py +35 -14
  49. DIRAC/Core/Utilities/TimeUtilities.py +10 -253
  50. DIRAC/Core/Utilities/test/Test_JDL.py +0 -3
  51. DIRAC/Core/Utilities/test/Test_Profiler.py +20 -20
  52. DIRAC/Core/scripts/dirac_agent.py +1 -1
  53. DIRAC/Core/scripts/dirac_apptainer_exec.py +72 -46
  54. DIRAC/Core/scripts/dirac_configure.py +1 -3
  55. DIRAC/Core/scripts/dirac_install_db.py +24 -6
  56. DIRAC/Core/scripts/dirac_platform.py +1 -92
  57. DIRAC/DataManagementSystem/Agent/FTS3Agent.py +8 -7
  58. DIRAC/DataManagementSystem/Agent/RequestOperations/RemoveFile.py +7 -6
  59. DIRAC/DataManagementSystem/Client/FTS3Job.py +71 -34
  60. DIRAC/DataManagementSystem/DB/FTS3DB.py +7 -3
  61. DIRAC/DataManagementSystem/DB/FileCatalogComponents/DatasetManager/DatasetManager.py +1 -1
  62. DIRAC/DataManagementSystem/DB/FileCatalogDB.sql +9 -9
  63. DIRAC/DataManagementSystem/DB/FileCatalogWithFkAndPsDB.sql +9 -9
  64. DIRAC/DataManagementSystem/Utilities/DMSHelpers.py +6 -2
  65. DIRAC/DataManagementSystem/scripts/dirac_admin_allow_se.py +13 -8
  66. DIRAC/DataManagementSystem/scripts/dirac_admin_ban_se.py +13 -8
  67. DIRAC/DataManagementSystem/scripts/dirac_dms_create_moving_request.py +2 -0
  68. DIRAC/DataManagementSystem/scripts/dirac_dms_protocol_matrix.py +0 -1
  69. DIRAC/FrameworkSystem/Client/BundleDeliveryClient.py +2 -7
  70. DIRAC/FrameworkSystem/Client/ComponentInstaller.py +9 -4
  71. DIRAC/FrameworkSystem/Client/ProxyManagerClient.py +5 -2
  72. DIRAC/FrameworkSystem/Client/SystemAdministratorClientCLI.py +11 -6
  73. DIRAC/FrameworkSystem/ConfigTemplate.cfg +2 -0
  74. DIRAC/FrameworkSystem/DB/AuthDB.py +3 -3
  75. DIRAC/FrameworkSystem/DB/InstalledComponentsDB.py +4 -4
  76. DIRAC/FrameworkSystem/DB/ProxyDB.py +11 -3
  77. DIRAC/FrameworkSystem/DB/TokenDB.py +1 -1
  78. DIRAC/FrameworkSystem/Service/ProxyManagerHandler.py +8 -6
  79. DIRAC/FrameworkSystem/Utilities/MonitoringUtilities.py +2 -19
  80. DIRAC/FrameworkSystem/Utilities/TokenManagementUtilities.py +3 -2
  81. DIRAC/FrameworkSystem/Utilities/diracx.py +36 -14
  82. DIRAC/FrameworkSystem/private/authorization/AuthServer.py +2 -2
  83. DIRAC/FrameworkSystem/scripts/dirac_admin_update_pilot.py +18 -11
  84. DIRAC/FrameworkSystem/scripts/dirac_login.py +2 -2
  85. DIRAC/FrameworkSystem/scripts/dirac_proxy_init.py +7 -8
  86. DIRAC/Interfaces/API/Dirac.py +27 -15
  87. DIRAC/Interfaces/API/DiracAdmin.py +45 -17
  88. DIRAC/Interfaces/API/Job.py +9 -13
  89. DIRAC/Interfaces/scripts/dirac_admin_allow_site.py +12 -18
  90. DIRAC/Interfaces/scripts/dirac_admin_ban_site.py +12 -10
  91. DIRAC/Interfaces/scripts/dirac_admin_get_site_mask.py +4 -13
  92. DIRAC/Interfaces/scripts/dirac_admin_reset_job.py +3 -6
  93. DIRAC/Interfaces/scripts/dirac_wms_job_parameters.py +0 -1
  94. DIRAC/MonitoringSystem/Client/Types/WMSHistory.py +4 -0
  95. DIRAC/MonitoringSystem/Client/WebAppClient.py +26 -0
  96. DIRAC/MonitoringSystem/ConfigTemplate.cfg +9 -0
  97. DIRAC/MonitoringSystem/DB/MonitoringDB.py +6 -25
  98. DIRAC/MonitoringSystem/Service/MonitoringHandler.py +0 -33
  99. DIRAC/MonitoringSystem/Service/WebAppHandler.py +599 -0
  100. DIRAC/MonitoringSystem/private/MainReporter.py +0 -3
  101. DIRAC/ProductionSystem/DB/ProductionDB.sql +4 -4
  102. DIRAC/ProductionSystem/scripts/dirac_prod_get.py +2 -2
  103. DIRAC/ProductionSystem/scripts/dirac_prod_get_all.py +2 -2
  104. DIRAC/ProductionSystem/scripts/dirac_prod_get_trans.py +2 -3
  105. DIRAC/RequestManagementSystem/Agent/RequestExecutingAgent.py +8 -6
  106. DIRAC/RequestManagementSystem/Agent/RequestOperations/ForwardDISET.py +2 -14
  107. DIRAC/RequestManagementSystem/Client/ReqClient.py +66 -13
  108. DIRAC/RequestManagementSystem/ConfigTemplate.cfg +6 -6
  109. DIRAC/RequestManagementSystem/DB/RequestDB.py +10 -5
  110. DIRAC/RequestManagementSystem/DB/test/RMSTestScenari.py +2 -0
  111. DIRAC/RequestManagementSystem/private/RequestValidator.py +40 -46
  112. DIRAC/ResourceStatusSystem/Client/SiteStatus.py +4 -2
  113. DIRAC/ResourceStatusSystem/Command/FreeDiskSpaceCommand.py +3 -1
  114. DIRAC/ResourceStatusSystem/DB/ResourceManagementDB.py +8 -8
  115. DIRAC/ResourceStatusSystem/DB/ResourceStatusDB.py +2 -2
  116. DIRAC/ResourceStatusSystem/Utilities/CSHelpers.py +2 -31
  117. DIRAC/ResourceStatusSystem/scripts/dirac_rss_set_status.py +30 -12
  118. DIRAC/Resources/Catalog/RucioFileCatalogClient.py +195 -1
  119. DIRAC/Resources/Catalog/test/Test_RucioFileCatalogClient.py +181 -0
  120. DIRAC/Resources/Computing/AREXComputingElement.py +25 -8
  121. DIRAC/Resources/Computing/BatchSystems/Condor.py +126 -108
  122. DIRAC/Resources/Computing/BatchSystems/SLURM.py +5 -1
  123. DIRAC/Resources/Computing/BatchSystems/test/Test_SLURM.py +46 -0
  124. DIRAC/Resources/Computing/ComputingElement.py +1 -1
  125. DIRAC/Resources/Computing/HTCondorCEComputingElement.py +44 -44
  126. DIRAC/Resources/Computing/InProcessComputingElement.py +4 -2
  127. DIRAC/Resources/Computing/LocalComputingElement.py +1 -18
  128. DIRAC/Resources/Computing/SSHBatchComputingElement.py +1 -17
  129. DIRAC/Resources/Computing/SSHComputingElement.py +1 -18
  130. DIRAC/Resources/Computing/SingularityComputingElement.py +19 -5
  131. DIRAC/Resources/Computing/test/Test_HTCondorCEComputingElement.py +67 -49
  132. DIRAC/Resources/Computing/test/Test_PoolComputingElement.py +2 -1
  133. DIRAC/Resources/IdProvider/CheckInIdProvider.py +13 -0
  134. DIRAC/Resources/IdProvider/IdProviderFactory.py +11 -3
  135. DIRAC/Resources/MessageQueue/StompMQConnector.py +1 -1
  136. DIRAC/Resources/Storage/GFAL2_StorageBase.py +24 -15
  137. DIRAC/Resources/Storage/OccupancyPlugins/WLCGAccountingHTTPJson.py +1 -3
  138. DIRAC/Resources/Storage/StorageBase.py +4 -2
  139. DIRAC/Resources/Storage/StorageElement.py +6 -7
  140. DIRAC/StorageManagementSystem/DB/StorageManagementDB.sql +2 -2
  141. DIRAC/TransformationSystem/Agent/TaskManagerAgentBase.py +10 -16
  142. DIRAC/TransformationSystem/Agent/TransformationAgent.py +22 -1
  143. DIRAC/TransformationSystem/Agent/TransformationCleaningAgent.py +16 -16
  144. DIRAC/TransformationSystem/Client/TaskManager.py +2 -4
  145. DIRAC/TransformationSystem/Client/Transformation.py +6 -7
  146. DIRAC/TransformationSystem/Client/TransformationClient.py +21 -11
  147. DIRAC/TransformationSystem/Client/Utilities.py +9 -0
  148. DIRAC/TransformationSystem/DB/TransformationDB.py +11 -14
  149. DIRAC/TransformationSystem/DB/TransformationDB.sql +9 -9
  150. DIRAC/TransformationSystem/Service/TransformationManagerHandler.py +0 -333
  151. DIRAC/TransformationSystem/Utilities/ReplicationCLIParameters.py +3 -3
  152. DIRAC/TransformationSystem/Utilities/TransformationInfo.py +7 -5
  153. DIRAC/TransformationSystem/scripts/dirac_production_runjoblocal.py +2 -4
  154. DIRAC/TransformationSystem/test/Test_TransformationInfo.py +22 -15
  155. DIRAC/TransformationSystem/test/Test_replicationTransformation.py +5 -6
  156. DIRAC/Workflow/Modules/test/Test_Modules.py +5 -0
  157. DIRAC/WorkloadManagementSystem/Agent/JobAgent.py +38 -26
  158. DIRAC/WorkloadManagementSystem/Agent/JobCleaningAgent.py +12 -8
  159. DIRAC/WorkloadManagementSystem/Agent/PilotSyncAgent.py +4 -3
  160. DIRAC/WorkloadManagementSystem/Agent/PushJobAgent.py +13 -13
  161. DIRAC/WorkloadManagementSystem/Agent/SiteDirector.py +18 -14
  162. DIRAC/WorkloadManagementSystem/Agent/StalledJobAgent.py +18 -51
  163. DIRAC/WorkloadManagementSystem/Agent/StatesAccountingAgent.py +41 -1
  164. DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_JobAgent.py +45 -4
  165. DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_JobCleaningAgent.py +7 -9
  166. DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_PushJobAgent.py +1 -0
  167. DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_SiteDirector.py +9 -2
  168. DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_StalledJobAgent.py +4 -5
  169. DIRAC/WorkloadManagementSystem/Client/DownloadInputData.py +9 -9
  170. DIRAC/WorkloadManagementSystem/Client/InputDataResolution.py +6 -6
  171. DIRAC/WorkloadManagementSystem/Client/JobMonitoringClient.py +10 -11
  172. DIRAC/WorkloadManagementSystem/Client/JobReport.py +1 -1
  173. DIRAC/WorkloadManagementSystem/Client/JobState/CachedJobState.py +3 -0
  174. DIRAC/WorkloadManagementSystem/Client/JobState/JobManifest.py +32 -261
  175. DIRAC/WorkloadManagementSystem/Client/JobState/JobState.py +6 -0
  176. DIRAC/WorkloadManagementSystem/Client/JobStateUpdateClient.py +3 -0
  177. DIRAC/WorkloadManagementSystem/Client/JobStatus.py +8 -152
  178. DIRAC/WorkloadManagementSystem/Client/PoolXMLSlice.py +12 -19
  179. DIRAC/WorkloadManagementSystem/Client/SandboxStoreClient.py +25 -38
  180. DIRAC/WorkloadManagementSystem/Client/WMSClient.py +2 -3
  181. DIRAC/WorkloadManagementSystem/Client/test/Test_Client_DownloadInputData.py +29 -0
  182. DIRAC/WorkloadManagementSystem/ConfigTemplate.cfg +4 -8
  183. DIRAC/WorkloadManagementSystem/DB/JobDB.py +89 -132
  184. DIRAC/WorkloadManagementSystem/DB/JobDB.sql +8 -8
  185. DIRAC/WorkloadManagementSystem/DB/JobDBUtils.py +18 -147
  186. DIRAC/WorkloadManagementSystem/DB/JobLoggingDB.py +19 -6
  187. DIRAC/WorkloadManagementSystem/DB/JobParametersDB.py +9 -9
  188. DIRAC/WorkloadManagementSystem/DB/PilotAgentsDB.py +16 -5
  189. DIRAC/WorkloadManagementSystem/DB/PilotAgentsDB.sql +3 -3
  190. DIRAC/WorkloadManagementSystem/DB/SandboxMetadataDB.py +44 -82
  191. DIRAC/WorkloadManagementSystem/DB/StatusUtils.py +125 -0
  192. DIRAC/WorkloadManagementSystem/DB/tests/Test_JobDB.py +1 -1
  193. DIRAC/WorkloadManagementSystem/DB/tests/Test_StatusUtils.py +28 -0
  194. DIRAC/WorkloadManagementSystem/Executor/JobSanity.py +5 -4
  195. DIRAC/WorkloadManagementSystem/Executor/JobScheduling.py +4 -0
  196. DIRAC/WorkloadManagementSystem/FutureClient/JobStateUpdateClient.py +75 -33
  197. DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapper.py +22 -11
  198. DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapperTemplate.py +9 -10
  199. DIRAC/WorkloadManagementSystem/JobWrapper/test/Test_JobWrapper.py +60 -10
  200. DIRAC/WorkloadManagementSystem/JobWrapper/test/Test_JobWrapperTemplate.py +4 -0
  201. DIRAC/WorkloadManagementSystem/Service/JobManagerHandler.py +33 -154
  202. DIRAC/WorkloadManagementSystem/Service/JobMonitoringHandler.py +5 -323
  203. DIRAC/WorkloadManagementSystem/Service/JobStateUpdateHandler.py +0 -16
  204. DIRAC/WorkloadManagementSystem/Service/PilotManagerHandler.py +6 -103
  205. DIRAC/WorkloadManagementSystem/Service/SandboxStoreHandler.py +7 -53
  206. DIRAC/WorkloadManagementSystem/Service/WMSAdministratorHandler.py +16 -79
  207. DIRAC/WorkloadManagementSystem/Service/WMSUtilities.py +4 -18
  208. DIRAC/WorkloadManagementSystem/Utilities/JobModel.py +28 -209
  209. DIRAC/WorkloadManagementSystem/Utilities/JobParameters.py +65 -3
  210. DIRAC/WorkloadManagementSystem/Utilities/JobStatusUtility.py +2 -64
  211. DIRAC/WorkloadManagementSystem/Utilities/ParametricJob.py +7 -171
  212. DIRAC/WorkloadManagementSystem/Utilities/PilotCStoJSONSynchronizer.py +73 -7
  213. DIRAC/WorkloadManagementSystem/Utilities/PilotWrapper.py +41 -11
  214. DIRAC/WorkloadManagementSystem/Utilities/RemoteRunner.py +16 -0
  215. DIRAC/WorkloadManagementSystem/Utilities/Utils.py +36 -1
  216. DIRAC/WorkloadManagementSystem/Utilities/jobAdministration.py +15 -0
  217. DIRAC/WorkloadManagementSystem/Utilities/test/Test_JobModel.py +1 -15
  218. DIRAC/WorkloadManagementSystem/Utilities/test/Test_ParametricJob.py +45 -128
  219. DIRAC/WorkloadManagementSystem/Utilities/test/Test_PilotWrapper.py +16 -0
  220. DIRAC/WorkloadManagementSystem/scripts/dirac_jobexec.py +7 -2
  221. DIRAC/WorkloadManagementSystem/scripts/dirac_wms_pilot_job_info.py +1 -1
  222. DIRAC/__init__.py +62 -60
  223. DIRAC/tests/Utilities/testJobDefinitions.py +22 -28
  224. {DIRAC-9.0.0a42.dist-info → dirac-9.0.7.dist-info}/METADATA +8 -5
  225. {DIRAC-9.0.0a42.dist-info → dirac-9.0.7.dist-info}/RECORD +229 -228
  226. {DIRAC-9.0.0a42.dist-info → dirac-9.0.7.dist-info}/WHEEL +1 -1
  227. {DIRAC-9.0.0a42.dist-info → dirac-9.0.7.dist-info}/entry_points.txt +0 -3
  228. DIRAC/Core/Utilities/test/Test_List.py +0 -150
  229. DIRAC/Core/Utilities/test/Test_Time.py +0 -88
  230. DIRAC/Resources/Computing/PilotBundle.py +0 -70
  231. DIRAC/TransformationSystem/scripts/dirac_transformation_archive.py +0 -30
  232. DIRAC/TransformationSystem/scripts/dirac_transformation_clean.py +0 -30
  233. DIRAC/TransformationSystem/scripts/dirac_transformation_remove_output.py +0 -30
  234. DIRAC/WorkloadManagementSystem/Utilities/test/Test_JobManager.py +0 -58
  235. {DIRAC-9.0.0a42.dist-info → dirac-9.0.7.dist-info/licenses}/LICENSE +0 -0
  236. {DIRAC-9.0.0a42.dist-info → dirac-9.0.7.dist-info}/top_level.txt +0 -0
@@ -10,6 +10,7 @@ and a Watchdog Agent that can monitor its progress.
10
10
  :caption: JobWrapper options
11
11
 
12
12
  """
13
+
13
14
  import contextlib
14
15
  import datetime
15
16
  import glob
@@ -54,6 +55,8 @@ from DIRAC.WorkloadManagementSystem.Client.JobStateUpdateClient import JobStateU
54
55
  from DIRAC.WorkloadManagementSystem.Client.SandboxStoreClient import SandboxStoreClient
55
56
  from DIRAC.WorkloadManagementSystem.JobWrapper.Watchdog import Watchdog
56
57
 
58
+ CHILD_PID_POLL_INTERVALS = list(range(5, 40, 5))
59
+
57
60
 
58
61
  class JobWrapper:
59
62
  """The only user of the JobWrapper is the JobWrapperTemplate"""
@@ -119,14 +122,16 @@ class JobWrapper:
119
122
  self.pilotRef = gConfig.getValue("/LocalSite/PilotReference", "Unknown")
120
123
  self.cpuNormalizationFactor = gConfig.getValue("/LocalSite/CPUNormalizationFactor", 0.0)
121
124
  self.bufferLimit = gConfig.getValue(self.section + "/BufferLimit", 10485760)
122
- self.defaultOutputSE = getDestinationSEList(
123
- gConfig.getValue("/Resources/StorageElementGroups/SE-USER", []), self.siteName
124
- )
125
+ try:
126
+ self.defaultOutputSE = getDestinationSEList("SE-USER", self.siteName)
127
+ except RuntimeError:
128
+ self.defaultOutputSE = []
125
129
  self.defaultCatalog = gConfig.getValue(self.section + "/DefaultCatalog", [])
126
130
  self.masterCatalogOnlyFlag = gConfig.getValue(self.section + "/MasterCatalogOnlyFlag", True)
127
- self.defaultFailoverSE = getDestinationSEList(
128
- gConfig.getValue("/Resources/StorageElementGroups/Tier1-Failover", []), self.siteName
129
- )
131
+ try:
132
+ self.defaultFailoverSE = getDestinationSEList("Tier1-Failover", self.siteName)
133
+ except RuntimeError:
134
+ self.defaultFailoverSE = []
130
135
  self.defaultOutputPath = ""
131
136
  self.retryUpload = gConfig.getValue(self.section + "/RetryUpload", False)
132
137
  self.dm = DataManager()
@@ -338,6 +343,9 @@ class JobWrapper:
338
343
  submissionPolicy = self.ceArgs.get("SubmissionPolicy", gConfig.getValue("/LocalSite/SubmissionPolicy", ""))
339
344
  if submissionPolicy == "Application":
340
345
  configOptions += "-o /LocalSite/RemoteExecution=True "
346
+ # Disable the watchdog CPU wallclock check because the application is running
347
+ # on a remote worker node, so values are not relevant
348
+ (self.jobIDPath / "DISABLE_WATCHDOG_CPU_WALLCLOCK_CHECK").touch()
341
349
 
342
350
  command = executable
343
351
  if jobArguments:
@@ -349,7 +357,7 @@ class JobWrapper:
349
357
 
350
358
  def __prepareEnvironment(self):
351
359
  """Prepare the environment to be used by the payload."""
352
- os.environ["DIRACJOBID"] = str(self.jobID)
360
+ os.environ["JOBID"] = str(self.jobID)
353
361
 
354
362
  diracSite = DIRAC.siteName()
355
363
  os.environ["DIRACSITE"] = diracSite
@@ -424,14 +432,14 @@ class JobWrapper:
424
432
  )
425
433
  exeThread.start()
426
434
  payloadPID = None
427
- for seconds in range(5, 40, 5):
435
+ for seconds in CHILD_PID_POLL_INTERVALS:
428
436
  time.sleep(seconds)
429
437
  payloadPID = spObject.getChildPID()
430
438
  if payloadPID:
431
439
  self.__setJobParam("PayloadPID", payloadPID)
432
440
  break
433
441
  if not payloadPID:
434
- return S_ERROR("Payload process could not start after 140 seconds")
442
+ return S_ERROR(f"Payload process could not start after {sum(CHILD_PID_POLL_INTERVALS)} seconds")
435
443
 
436
444
  watchdog = Watchdog(
437
445
  pid=self.currentPID,
@@ -765,6 +773,7 @@ class JobWrapper:
765
773
 
766
774
  configDict = {
767
775
  "JobID": self.jobID,
776
+ "JobIDPath": self.jobIDPath,
768
777
  "LocalSEList": localSEList,
769
778
  "DiskSEList": self.diskSE,
770
779
  "TapeSEList": self.tapeSE,
@@ -1206,8 +1215,8 @@ class JobWrapper:
1206
1215
  lfn = str(basePath / outputPath / os.path.basename(localfile))
1207
1216
  else:
1208
1217
  # if LFN is given, take it as it is
1209
- localfile = str(self.jobIDPath / outputFile.replace("LFN:", ""))
1210
1218
  lfn = outputFile.replace("LFN:", "")
1219
+ localfile = str(self.jobIDPath / os.path.basename(lfn))
1211
1220
 
1212
1221
  return (lfn, localfile)
1213
1222
 
@@ -1583,7 +1592,9 @@ class ExecutionThread(threading.Thread):
1583
1592
  start = time.time()
1584
1593
  initialStat = os.times()
1585
1594
  log.verbose("Cmd called", self.cmd)
1586
- output = self.spObject.systemCall(self.cmd, env=self.exeEnv, callbackFunction=self.sendOutput, shell=True)
1595
+ output = self.spObject.systemCall(
1596
+ self.cmd, env=self.exeEnv, callbackFunction=self.sendOutput, shell=True, start_new_session=True
1597
+ )
1587
1598
  log.verbose(f"Output of system call within execution thread: {output}")
1588
1599
  self.executionResults["Thread"] = output
1589
1600
  timing = time.time() - start
@@ -15,16 +15,6 @@ import sys
15
15
  import json
16
16
  import os
17
17
 
18
- from DIRAC.WorkloadManagementSystem.JobWrapper.JobWrapperUtilities import (
19
- createAndEnterWorkingDirectory,
20
- executePayload,
21
- finalize,
22
- getJobWrapper,
23
- processJobOutputs,
24
- resolveInputData,
25
- transferInputSandbox,
26
- )
27
-
28
18
  sitePython = os.path.realpath("@SITEPYTHON@")
29
19
  if sitePython:
30
20
  sys.path.insert(0, sitePython)
@@ -35,6 +25,15 @@ Script.parseCommandLine()
35
25
 
36
26
  from DIRAC import gLogger
37
27
  from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport
28
+ from DIRAC.WorkloadManagementSystem.JobWrapper.JobWrapperUtilities import (
29
+ createAndEnterWorkingDirectory,
30
+ executePayload,
31
+ finalize,
32
+ getJobWrapper,
33
+ processJobOutputs,
34
+ resolveInputData,
35
+ transferInputSandbox,
36
+ )
38
37
 
39
38
 
40
39
  os.umask(0o22)
@@ -296,6 +296,30 @@ def test_processFailedSubprocess(mocker):
296
296
  assert not result["Value"]["watchdogStats"]
297
297
 
298
298
 
299
+ @pytest.mark.slow
300
+ def test_processKilledSubprocess(mocker):
301
+ """Test the process method of the JobWrapper class: the job is stalled and is killed by the Watchdog."""
302
+ jw = JobWrapper()
303
+ jw.jobArgs = {"CPUTime": 100, "Memory": 1}
304
+
305
+ mocker.patch.object(jw, "_JobWrapper__report")
306
+ mocker.patch.object(jw, "_JobWrapper__setJobParam")
307
+
308
+ mock_progress_call = mocker.patch("DIRAC.WorkloadManagementSystem.JobWrapper.Watchdog.Watchdog._checkProgress")
309
+ mock_progress_call.return_value = S_ERROR("Job is stalled!")
310
+
311
+ with tempfile.NamedTemporaryFile(delete=True) as std_out, tempfile.NamedTemporaryFile(delete=True) as std_err:
312
+ jw.outputFile = std_out.name
313
+ jw.errorFile = std_err.name
314
+ result = jw.process("sleep 20", {})
315
+
316
+ assert result["OK"]
317
+ assert result["Value"]["payloadStatus"] == 15 # SIGTERM
318
+ assert not result["Value"]["payloadOutput"]
319
+ assert not result["Value"]["payloadExecutorError"]
320
+ assert result["Value"]["watchdogError"] == "Job is stalled!" # Error message from the watchdog
321
+
322
+
299
323
  @pytest.mark.slow
300
324
  def test_processQuickExecutionNoWatchdog(mocker):
301
325
  """Test the process method of the JobWrapper class: the payload is too fast to start the watchdog."""
@@ -320,24 +344,40 @@ def test_processQuickExecutionNoWatchdog(mocker):
320
344
 
321
345
 
322
346
  @pytest.mark.slow
323
- def test_processSubprocessFailureNoPid(mocker):
324
- """Test the process method of the JobWrapper class: the subprocess fails and no PID is returned."""
347
+ @pytest.mark.parametrize("expect_failure", [True, False])
348
+ def test_processSubprocessFailureNoPid(mocker, monkeypatch, expect_failure):
349
+ """Test the process method of the JobWrapper class: the subprocess fails and no PID is returned.
350
+
351
+ expect_failure is used to ensure that the JobWrapper is functioning correctly even with the other patching
352
+ that is applied in the test (e.g. CHILD_PID_POLL_INTERVALS).
353
+ """
325
354
  # Test failure in starting the payload process
326
355
  jw = JobWrapper()
327
356
  jw.jobArgs = {}
328
357
 
329
358
  mocker.patch.object(jw, "_JobWrapper__report")
330
359
  mocker.patch.object(jw, "_JobWrapper__setJobParam")
360
+ monkeypatch.setattr(
361
+ "DIRAC.WorkloadManagementSystem.JobWrapper.JobWrapper.CHILD_PID_POLL_INTERVALS", [0.1, 0.2, 0.3, 0.4, 0.5]
362
+ )
363
+
331
364
  mock_exeThread = mocker.Mock()
332
365
  mock_exeThread.start.side_effect = lambda: time.sleep(0.1)
333
- mocker.patch("DIRAC.WorkloadManagementSystem.JobWrapper.JobWrapper.ExecutionThread", return_value=mock_exeThread)
366
+ if expect_failure:
367
+ mocker.patch(
368
+ "DIRAC.WorkloadManagementSystem.JobWrapper.JobWrapper.ExecutionThread", return_value=mock_exeThread
369
+ )
334
370
 
335
371
  with tempfile.NamedTemporaryFile(delete=True) as std_out, tempfile.NamedTemporaryFile(delete=True) as std_err:
336
372
  jw.outputFile = std_out.name
337
373
  jw.errorFile = std_err.name
338
374
  result = jw.process(command="mock_command", env={})
339
- assert not result["OK"]
340
- assert "Payload process could not start after 140 seconds" in result["Message"]
375
+
376
+ if expect_failure:
377
+ assert not result["OK"]
378
+ assert "Payload process could not start after 1.5 seconds" in result["Message"]
379
+ else:
380
+ assert result["OK"]
341
381
 
342
382
 
343
383
  # -------------------------------------------------------------------------------------------------
@@ -624,6 +664,7 @@ def jobIDPath():
624
664
  # Output data files
625
665
  (p / "00232454_00000244_1.sim").touch()
626
666
  (p / "1720442808testFileUpload.txt").touch()
667
+ (p / "testFileUploadFullLFN.txt").touch()
627
668
 
628
669
  with open(p / "pool_xml_catalog.xml", "w") as f:
629
670
  f.write(
@@ -823,7 +864,11 @@ def test_processJobOutputs_output_data_upload(mocker, setup_another_job_wrapper)
823
864
  # BTW, isn't the concept of pool_xml_catalog.xml from lhcbdirac?
824
865
  jw.jobArgs = {
825
866
  "OutputSandbox": [],
826
- "OutputData": ["1720442808testFileUpload.txt", "LFN:00232454_00000244_1.sim"],
867
+ "OutputData": [
868
+ "1720442808testFileUpload.txt",
869
+ "LFN:00232454_00000244_1.sim",
870
+ "LFN:/dirac/user/u/unknown/testFileUploadFullLFN.txt",
871
+ ],
827
872
  "Owner": "Jane Doe",
828
873
  }
829
874
 
@@ -839,10 +884,15 @@ def test_processJobOutputs_output_data_upload(mocker, setup_another_job_wrapper)
839
884
  assert jw.jobReport.jobStatusInfo[1][:-1] == ("", JobMinorStatus.UPLOADING_OUTPUT_DATA)
840
885
  assert jw.jobReport.jobStatusInfo[2][:-1] == (JobStatus.COMPLETING, JobMinorStatus.OUTPUT_DATA_UPLOADED)
841
886
  assert len(jw.jobReport.jobParameters) == 1
842
- assert jw.jobReport.jobParameters[0] == (
843
- "UploadedOutputData",
844
- "00232454_00000244_1.sim, /dirac/user/u/unknown/0/123/1720442808testFileUpload.txt",
845
- )
887
+
888
+ expected_files = {
889
+ "00232454_00000244_1.sim",
890
+ "/dirac/user/u/unknown/0/123/1720442808testFileUpload.txt",
891
+ "/dirac/user/u/unknown/testFileUploadFullLFN.txt",
892
+ }
893
+ assert jw.jobReport.jobParameters[0][0] == "UploadedOutputData"
894
+ uploaded_files = set(jw.jobReport.jobParameters[0][1].split(", "))
895
+ assert uploaded_files == expected_files
846
896
 
847
897
 
848
898
  # -------------------------------------------------------------------------------------------------
@@ -72,6 +72,7 @@ def extraOptions():
72
72
  os.remove(extraOptions)
73
73
 
74
74
 
75
+ @pytest.mark.slow
75
76
  def test_createAndExecuteJobWrapperTemplate_success(extraOptions):
76
77
  """Test the creation of a classical job wrapper and its execution:
77
78
  There is an extra option cfg file to be passed to the job wrapper.
@@ -144,6 +145,7 @@ def test_createAndExecuteJobWrapperTemplate_success(extraOptions):
144
145
  shutil.rmtree(os.path.join(os.getcwd(), "job"))
145
146
 
146
147
 
148
+ @pytest.mark.slow
147
149
  def test_createAndExecuteJobWrapperTemplate_missingExtraOptions():
148
150
  """Test the creation of a classical job wrapper and its execution:
149
151
  There is no extra options to be passed to the job wrapper.
@@ -205,6 +207,7 @@ def test_createAndExecuteJobWrapperTemplate_missingExtraOptions():
205
207
  shutil.rmtree(os.path.join(os.getcwd(), "job"))
206
208
 
207
209
 
210
+ @pytest.mark.slow
208
211
  def test_createAndExecuteRelocatedJobWrapperTemplate_success(extraOptions):
209
212
  """Test the creation of a relocated job wrapper and its execution:
210
213
  This is generally used when containers are involved (SingularityCE).
@@ -325,6 +328,7 @@ def test_createAndExecuteRelocatedJobWrapperTemplate_success(extraOptions):
325
328
  shutil.rmtree(wrapperPath)
326
329
 
327
330
 
331
+ @pytest.mark.slow
328
332
  def test_createAndExecuteJobWrapperOfflineTemplate_success(extraOptions):
329
333
  """Test the creation of an offline job wrapper and its execution:
330
334
  This is generally used when pre/post processing operations are executed locally,
@@ -21,9 +21,7 @@ from DIRAC.Core.Utilities.JDL import jdlToBaseJobDescriptionModel
21
21
  from DIRAC.Core.Utilities.JEncode import strToIntDict
22
22
  from DIRAC.Core.Utilities.ObjectLoader import ObjectLoader
23
23
  from DIRAC.FrameworkSystem.Client.ProxyManagerClient import gProxyManager
24
- from DIRAC.StorageManagementSystem.Client.StorageManagerClient import StorageManagerClient
25
24
  from DIRAC.WorkloadManagementSystem.Client import JobStatus
26
- from DIRAC.WorkloadManagementSystem.Client.JobStatus import filterJobStateTransition
27
25
  from DIRAC.WorkloadManagementSystem.Service.JobPolicy import (
28
26
  RIGHT_DELETE,
29
27
  RIGHT_KILL,
@@ -32,8 +30,10 @@ from DIRAC.WorkloadManagementSystem.Service.JobPolicy import (
32
30
  RIGHT_SUBMIT,
33
31
  JobPolicy,
34
32
  )
33
+ from DIRAC.WorkloadManagementSystem.DB.StatusUtils import kill_delete_jobs
35
34
  from DIRAC.WorkloadManagementSystem.Utilities.JobModel import JobDescriptionModel
36
35
  from DIRAC.WorkloadManagementSystem.Utilities.ParametricJob import generateParametricJobs, getParameterVectorLength
36
+ from DIRAC.WorkloadManagementSystem.Utilities.Utils import rescheduleJobs
37
37
 
38
38
  MAX_PARAMETRIC_JOBS = 20
39
39
 
@@ -104,16 +104,6 @@ class JobManagerHandlerMixin:
104
104
  return
105
105
  self.log.info("Optimize msg sent", f"for {len(jids)} jobs")
106
106
 
107
- ###########################################################################
108
- types_getMaxParametricJobs = []
109
-
110
- def export_getMaxParametricJobs(self):
111
- """Get the maximum number of parametric jobs
112
-
113
- :return: S_OK()/S_ERROR()
114
- """
115
- return S_OK(self.maxParametricJobs)
116
-
117
107
  types_submitJob = [str]
118
108
 
119
109
  def export_submitJob(self, jobDesc):
@@ -345,8 +335,7 @@ class JobManagerHandlerMixin:
345
335
  types_rescheduleJob = []
346
336
 
347
337
  def export_rescheduleJob(self, jobIDs):
348
- """Reschedule a single job. If the optional proxy parameter is given
349
- it will be used to refresh the proxy in the Proxy Repository
338
+ """Reschedule a list of jobs.
350
339
 
351
340
  :param list jobIDs: list of job IDs
352
341
 
@@ -360,22 +349,12 @@ class JobManagerHandlerMixin:
360
349
  validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights(
361
350
  jobList, RIGHT_RESCHEDULE
362
351
  )
363
- for jobID in validJobList:
364
- self.taskQueueDB.deleteJob(jobID)
365
- result = self.jobDB.rescheduleJob(jobID)
366
- self.log.debug(str(result))
367
- if not result["OK"]:
368
- return result
369
- self.jobLoggingDB.addLoggingRecord(
370
- result["JobID"],
371
- status=result["Status"],
372
- minorStatus=result["MinorStatus"],
373
- applicationStatus="Unknown",
374
- source="JobManager",
375
- )
352
+ res = rescheduleJobs(validJobList, source="JobManager")
353
+ if not res["OK"]:
354
+ self.log.error(res["Message"])
376
355
 
377
356
  if invalidJobList or nonauthJobList:
378
- result = S_ERROR("Some jobs failed reschedule")
357
+ result = S_ERROR("Some jobs can not be rescheduled")
379
358
  if invalidJobList:
380
359
  result["InvalidJobIDs"] = invalidJobList
381
360
  if nonauthJobList:
@@ -450,131 +429,28 @@ class JobManagerHandlerMixin:
450
429
 
451
430
  return S_OK(validJobList)
452
431
 
453
- def __deleteJob(self, jobID, force=False):
454
- """Set the job status to "Deleted"
455
- and remove the pilot that ran and its logging info if the pilot is finished.
456
-
457
- :param int jobID: job ID
458
- :return: S_OK()/S_ERROR()
459
- """
460
- if not (result := self.jobDB.setJobStatus(jobID, JobStatus.DELETED, "Checking accounting", force=force))["OK"]:
461
- return result
462
-
463
- if not (result := self.taskQueueDB.deleteJob(jobID))["OK"]:
464
- self.log.warn("Failed to delete job from the TaskQueue")
465
-
466
- # if it was the last job for the pilot
467
- result = self.pilotAgentsDB.getPilotsForJobID(jobID)
468
- if not result["OK"]:
469
- self.log.error("Failed to get Pilots for JobID", result["Message"])
470
- return result
471
- for pilot in result["Value"]:
472
- res = self.pilotAgentsDB.getJobsForPilot(pilot)
473
- if not res["OK"]:
474
- self.log.error("Failed to get jobs for pilot", res["Message"])
475
- return res
476
- if not res["Value"]: # if list of jobs for pilot is empty, delete pilot
477
- result = self.pilotAgentsDB.getPilotInfo(pilotID=pilot)
478
- if not result["OK"]:
479
- self.log.error("Failed to get pilot info", result["Message"])
480
- return result
481
- ret = self.pilotAgentsDB.deletePilot(result["Value"]["PilotJobReference"])
482
- if not ret["OK"]:
483
- self.log.error("Failed to delete pilot from PilotAgentsDB", ret["Message"])
484
- return ret
485
-
486
- return S_OK()
432
+ ###########################################################################
433
+ types_deleteJob = []
487
434
 
488
- def __killJob(self, jobID, sendKillCommand=True, force=False):
489
- """Kill one job
435
+ def export_deleteJob(self, jobIDs, force=False):
436
+ """Delete jobs specified in the jobIDs list
490
437
 
491
- :param int jobID: job ID
492
- :param bool sendKillCommand: send kill command
438
+ :param list jobIDs: list of job IDs
493
439
 
494
- :return: S_OK()/S_ERROR()
440
+ :return: S_OK/S_ERROR
495
441
  """
496
- if sendKillCommand:
497
- if not (result := self.jobDB.setJobCommand(jobID, "Kill"))["OK"]:
498
- return result
499
-
500
- self.log.info("Job marked for termination", jobID)
501
- if not (result := self.jobDB.setJobStatus(jobID, JobStatus.KILLED, "Marked for termination", force=force))[
502
- "OK"
503
- ]:
504
- self.log.warn("Failed to set job Killed status", result["Message"])
505
- if not (result := self.taskQueueDB.deleteJob(jobID))["OK"]:
506
- self.log.warn("Failed to delete job from the TaskQueue", result["Message"])
507
-
508
- return S_OK()
509
-
510
- def _kill_delete_jobs(self, jobIDList, right, force=False):
511
- """Kill (== set the status to "KILLED") or delete (== set the status to "DELETED") jobs as necessary
512
-
513
- :param list jobIDList: job IDs
514
- :param str right: RIGHT_KILL or RIGHT_DELETE
515
442
 
516
- :return: S_OK()/S_ERROR()
517
- """
518
- jobList = self.__getJobList(jobIDList)
443
+ jobList = self.__getJobList(jobIDs)
519
444
  if not jobList:
520
445
  self.log.warn("No jobs specified")
521
446
  return S_OK([])
522
447
 
523
- validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights(jobList, right)
524
-
525
- badIDs = []
526
-
527
- killJobList = []
528
- deleteJobList = []
529
- if validJobList:
530
- # Get the jobs allowed to transition to the Killed state
531
- filterRes = filterJobStateTransition(validJobList, JobStatus.KILLED)
532
- if not filterRes["OK"]:
533
- return filterRes
534
- killJobList.extend(filterRes["Value"])
535
-
536
- if not right == RIGHT_KILL:
537
- # Get the jobs allowed to transition to the Deleted state
538
- filterRes = filterJobStateTransition(validJobList, JobStatus.DELETED)
539
- if not filterRes["OK"]:
540
- return filterRes
541
- deleteJobList.extend(filterRes["Value"])
542
-
543
- # Look for jobs that are in the Staging state to send kill signal to the stager
544
- result = self.jobDB.getJobsAttributes(killJobList, ["Status"])
545
- if not result["OK"]:
546
- return result
547
- stagingJobList = [jobID for jobID, sDict in result["Value"].items() if sDict["Status"] == JobStatus.STAGING]
548
-
549
- for jobID in killJobList:
550
- result = self.__killJob(jobID, force=force)
551
- if not result["OK"]:
552
- badIDs.append(jobID)
553
-
554
- for jobID in deleteJobList:
555
- result = self.__deleteJob(jobID, force=force)
556
- if not result["OK"]:
557
- badIDs.append(jobID)
558
-
559
- if stagingJobList:
560
- stagerClient = StorageManagerClient()
561
- self.log.info("Going to send killing signal to stager as well!")
562
- result = stagerClient.killTasksBySourceTaskID(stagingJobList)
563
- if not result["OK"]:
564
- self.log.warn("Failed to kill some Stager tasks", result["Message"])
448
+ validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights(
449
+ jobList, RIGHT_DELETE
450
+ )
565
451
 
566
- if nonauthJobList or badIDs:
567
- result = S_ERROR("Some jobs failed deletion")
568
- if nonauthJobList:
569
- self.log.warn("Non-authorized JobIDs won't be deleted", str(nonauthJobList))
570
- result["NonauthorizedJobIDs"] = nonauthJobList
571
- if badIDs:
572
- self.log.warn("JobIDs failed to be deleted", str(badIDs))
573
- result["FailedJobIDs"] = badIDs
574
- return result
452
+ result = kill_delete_jobs(RIGHT_DELETE, validJobList, nonauthJobList, force=force)
575
453
 
576
- jobsList = killJobList if right == RIGHT_KILL else deleteJobList
577
- result = S_OK(jobsList)
578
454
  result["requireProxyUpload"] = len(ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired()
579
455
 
580
456
  if invalidJobList:
@@ -583,30 +459,33 @@ class JobManagerHandlerMixin:
583
459
  return result
584
460
 
585
461
  ###########################################################################
586
- types_deleteJob = []
462
+ types_killJob = []
587
463
 
588
- def export_deleteJob(self, jobIDs, force=False):
589
- """Delete jobs specified in the jobIDs list
464
+ def export_killJob(self, jobIDs, force=False):
465
+ """Kill jobs specified in the jobIDs list
590
466
 
591
467
  :param list jobIDs: list of job IDs
592
468
 
593
469
  :return: S_OK/S_ERROR
594
470
  """
595
471
 
596
- return self._kill_delete_jobs(jobIDs, RIGHT_DELETE, force=force)
472
+ jobList = self.__getJobList(jobIDs)
473
+ if not jobList:
474
+ self.log.warn("No jobs specified")
475
+ return S_OK([])
597
476
 
598
- ###########################################################################
599
- types_killJob = []
477
+ validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights(
478
+ jobList, RIGHT_KILL
479
+ )
600
480
 
601
- def export_killJob(self, jobIDs, force=False):
602
- """Kill jobs specified in the jobIDs list
481
+ result = kill_delete_jobs(RIGHT_KILL, validJobList, nonauthJobList, force=force)
603
482
 
604
- :param list jobIDs: list of job IDs
483
+ result["requireProxyUpload"] = len(ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired()
605
484
 
606
- :return: S_OK/S_ERROR
607
- """
485
+ if invalidJobList:
486
+ result["InvalidJobIDs"] = invalidJobList
608
487
 
609
- return self._kill_delete_jobs(jobIDs, RIGHT_KILL, force=force)
488
+ return result
610
489
 
611
490
  ###########################################################################
612
491
  types_resetJob = []