DIRAC 9.0.0a42__py3-none-any.whl → 9.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. DIRAC/AccountingSystem/Client/AccountingCLI.py +0 -140
  2. DIRAC/AccountingSystem/Client/DataStoreClient.py +0 -13
  3. DIRAC/AccountingSystem/Client/Types/BaseAccountingType.py +0 -7
  4. DIRAC/AccountingSystem/ConfigTemplate.cfg +0 -5
  5. DIRAC/AccountingSystem/Service/DataStoreHandler.py +0 -72
  6. DIRAC/ConfigurationSystem/Client/Helpers/CSGlobals.py +0 -9
  7. DIRAC/ConfigurationSystem/Client/Helpers/Registry.py +38 -26
  8. DIRAC/ConfigurationSystem/Client/Helpers/Resources.py +11 -43
  9. DIRAC/ConfigurationSystem/Client/Helpers/test/Test_Helpers.py +0 -16
  10. DIRAC/ConfigurationSystem/Client/LocalConfiguration.py +14 -8
  11. DIRAC/ConfigurationSystem/Client/PathFinder.py +47 -8
  12. DIRAC/ConfigurationSystem/Client/SyncPlugins/CERNLDAPSyncPlugin.py +4 -1
  13. DIRAC/ConfigurationSystem/Client/VOMS2CSSynchronizer.py +32 -19
  14. DIRAC/ConfigurationSystem/Client/test/Test_PathFinder.py +41 -1
  15. DIRAC/ConfigurationSystem/private/RefresherBase.py +4 -2
  16. DIRAC/Core/Base/API.py +4 -7
  17. DIRAC/Core/Base/SQLAlchemyDB.py +1 -0
  18. DIRAC/Core/DISET/ServiceReactor.py +11 -3
  19. DIRAC/Core/DISET/private/BaseClient.py +1 -2
  20. DIRAC/Core/DISET/private/Transports/M2SSLTransport.py +9 -7
  21. DIRAC/Core/DISET/private/Transports/SSL/M2Utils.py +3 -1
  22. DIRAC/Core/LCG/GOCDBClient.py +5 -7
  23. DIRAC/Core/Security/DiracX.py +31 -17
  24. DIRAC/Core/Security/IAMService.py +5 -10
  25. DIRAC/Core/Security/Locations.py +27 -18
  26. DIRAC/Core/Security/ProxyInfo.py +9 -5
  27. DIRAC/Core/Security/VOMSService.py +2 -4
  28. DIRAC/Core/Security/m2crypto/X509Certificate.py +4 -6
  29. DIRAC/Core/Security/m2crypto/asn1_utils.py +17 -5
  30. DIRAC/Core/Security/test/test_diracx_token_from_pem.py +161 -0
  31. DIRAC/Core/Tornado/Client/ClientSelector.py +4 -1
  32. DIRAC/Core/Tornado/Server/TornadoService.py +1 -1
  33. DIRAC/Core/Utilities/CGroups2.py +328 -0
  34. DIRAC/Core/Utilities/ClassAd/ClassAdLight.py +4 -290
  35. DIRAC/Core/Utilities/DErrno.py +5 -309
  36. DIRAC/Core/Utilities/Extensions.py +10 -1
  37. DIRAC/Core/Utilities/File.py +1 -1
  38. DIRAC/Core/Utilities/Graphs/GraphData.py +1 -1
  39. DIRAC/Core/Utilities/Graphs/GraphUtilities.py +6 -1
  40. DIRAC/Core/Utilities/JDL.py +1 -195
  41. DIRAC/Core/Utilities/List.py +1 -124
  42. DIRAC/Core/Utilities/MySQL.py +103 -99
  43. DIRAC/Core/Utilities/Os.py +32 -1
  44. DIRAC/Core/Utilities/Platform.py +2 -107
  45. DIRAC/Core/Utilities/Proxy.py +0 -4
  46. DIRAC/Core/Utilities/ReturnValues.py +7 -252
  47. DIRAC/Core/Utilities/StateMachine.py +12 -178
  48. DIRAC/Core/Utilities/Subprocess.py +35 -14
  49. DIRAC/Core/Utilities/TimeUtilities.py +10 -253
  50. DIRAC/Core/Utilities/test/Test_JDL.py +0 -3
  51. DIRAC/Core/Utilities/test/Test_Profiler.py +20 -20
  52. DIRAC/Core/scripts/dirac_agent.py +1 -1
  53. DIRAC/Core/scripts/dirac_apptainer_exec.py +72 -46
  54. DIRAC/Core/scripts/dirac_configure.py +1 -3
  55. DIRAC/Core/scripts/dirac_install_db.py +24 -6
  56. DIRAC/Core/scripts/dirac_platform.py +1 -92
  57. DIRAC/DataManagementSystem/Agent/FTS3Agent.py +8 -7
  58. DIRAC/DataManagementSystem/Agent/RequestOperations/RemoveFile.py +7 -6
  59. DIRAC/DataManagementSystem/Client/FTS3Job.py +71 -34
  60. DIRAC/DataManagementSystem/DB/FTS3DB.py +7 -3
  61. DIRAC/DataManagementSystem/DB/FileCatalogComponents/DatasetManager/DatasetManager.py +1 -1
  62. DIRAC/DataManagementSystem/DB/FileCatalogDB.sql +9 -9
  63. DIRAC/DataManagementSystem/DB/FileCatalogWithFkAndPsDB.sql +9 -9
  64. DIRAC/DataManagementSystem/Utilities/DMSHelpers.py +6 -2
  65. DIRAC/DataManagementSystem/scripts/dirac_admin_allow_se.py +13 -8
  66. DIRAC/DataManagementSystem/scripts/dirac_admin_ban_se.py +13 -8
  67. DIRAC/DataManagementSystem/scripts/dirac_dms_create_moving_request.py +2 -0
  68. DIRAC/DataManagementSystem/scripts/dirac_dms_protocol_matrix.py +0 -1
  69. DIRAC/FrameworkSystem/Client/BundleDeliveryClient.py +2 -7
  70. DIRAC/FrameworkSystem/Client/ComponentInstaller.py +9 -4
  71. DIRAC/FrameworkSystem/Client/ProxyManagerClient.py +5 -2
  72. DIRAC/FrameworkSystem/Client/SystemAdministratorClientCLI.py +11 -6
  73. DIRAC/FrameworkSystem/ConfigTemplate.cfg +2 -0
  74. DIRAC/FrameworkSystem/DB/AuthDB.py +3 -3
  75. DIRAC/FrameworkSystem/DB/InstalledComponentsDB.py +4 -4
  76. DIRAC/FrameworkSystem/DB/ProxyDB.py +11 -3
  77. DIRAC/FrameworkSystem/DB/TokenDB.py +1 -1
  78. DIRAC/FrameworkSystem/Service/ProxyManagerHandler.py +8 -6
  79. DIRAC/FrameworkSystem/Utilities/MonitoringUtilities.py +2 -19
  80. DIRAC/FrameworkSystem/Utilities/TokenManagementUtilities.py +3 -2
  81. DIRAC/FrameworkSystem/Utilities/diracx.py +36 -14
  82. DIRAC/FrameworkSystem/private/authorization/AuthServer.py +2 -2
  83. DIRAC/FrameworkSystem/scripts/dirac_admin_update_pilot.py +18 -11
  84. DIRAC/FrameworkSystem/scripts/dirac_login.py +2 -2
  85. DIRAC/FrameworkSystem/scripts/dirac_proxy_init.py +7 -8
  86. DIRAC/Interfaces/API/Dirac.py +27 -15
  87. DIRAC/Interfaces/API/DiracAdmin.py +45 -17
  88. DIRAC/Interfaces/API/Job.py +9 -13
  89. DIRAC/Interfaces/scripts/dirac_admin_allow_site.py +12 -18
  90. DIRAC/Interfaces/scripts/dirac_admin_ban_site.py +12 -10
  91. DIRAC/Interfaces/scripts/dirac_admin_get_site_mask.py +4 -13
  92. DIRAC/Interfaces/scripts/dirac_admin_reset_job.py +3 -6
  93. DIRAC/Interfaces/scripts/dirac_wms_job_parameters.py +0 -1
  94. DIRAC/MonitoringSystem/Client/Types/WMSHistory.py +4 -0
  95. DIRAC/MonitoringSystem/Client/WebAppClient.py +26 -0
  96. DIRAC/MonitoringSystem/ConfigTemplate.cfg +9 -0
  97. DIRAC/MonitoringSystem/DB/MonitoringDB.py +6 -25
  98. DIRAC/MonitoringSystem/Service/MonitoringHandler.py +0 -33
  99. DIRAC/MonitoringSystem/Service/WebAppHandler.py +599 -0
  100. DIRAC/MonitoringSystem/private/MainReporter.py +0 -3
  101. DIRAC/ProductionSystem/DB/ProductionDB.sql +4 -4
  102. DIRAC/ProductionSystem/scripts/dirac_prod_get.py +2 -2
  103. DIRAC/ProductionSystem/scripts/dirac_prod_get_all.py +2 -2
  104. DIRAC/ProductionSystem/scripts/dirac_prod_get_trans.py +2 -3
  105. DIRAC/RequestManagementSystem/Agent/RequestExecutingAgent.py +8 -6
  106. DIRAC/RequestManagementSystem/Agent/RequestOperations/ForwardDISET.py +2 -14
  107. DIRAC/RequestManagementSystem/Client/ReqClient.py +66 -13
  108. DIRAC/RequestManagementSystem/ConfigTemplate.cfg +6 -6
  109. DIRAC/RequestManagementSystem/DB/RequestDB.py +10 -5
  110. DIRAC/RequestManagementSystem/DB/test/RMSTestScenari.py +2 -0
  111. DIRAC/RequestManagementSystem/private/RequestValidator.py +40 -46
  112. DIRAC/ResourceStatusSystem/Client/SiteStatus.py +4 -2
  113. DIRAC/ResourceStatusSystem/Command/FreeDiskSpaceCommand.py +3 -1
  114. DIRAC/ResourceStatusSystem/DB/ResourceManagementDB.py +8 -8
  115. DIRAC/ResourceStatusSystem/DB/ResourceStatusDB.py +2 -2
  116. DIRAC/ResourceStatusSystem/Utilities/CSHelpers.py +2 -31
  117. DIRAC/ResourceStatusSystem/scripts/dirac_rss_set_status.py +30 -12
  118. DIRAC/Resources/Catalog/RucioFileCatalogClient.py +195 -1
  119. DIRAC/Resources/Catalog/test/Test_RucioFileCatalogClient.py +181 -0
  120. DIRAC/Resources/Computing/AREXComputingElement.py +25 -8
  121. DIRAC/Resources/Computing/BatchSystems/Condor.py +126 -108
  122. DIRAC/Resources/Computing/BatchSystems/SLURM.py +5 -1
  123. DIRAC/Resources/Computing/BatchSystems/test/Test_SLURM.py +46 -0
  124. DIRAC/Resources/Computing/ComputingElement.py +1 -1
  125. DIRAC/Resources/Computing/HTCondorCEComputingElement.py +44 -44
  126. DIRAC/Resources/Computing/InProcessComputingElement.py +4 -2
  127. DIRAC/Resources/Computing/LocalComputingElement.py +1 -18
  128. DIRAC/Resources/Computing/SSHBatchComputingElement.py +1 -17
  129. DIRAC/Resources/Computing/SSHComputingElement.py +1 -18
  130. DIRAC/Resources/Computing/SingularityComputingElement.py +19 -5
  131. DIRAC/Resources/Computing/test/Test_HTCondorCEComputingElement.py +67 -49
  132. DIRAC/Resources/Computing/test/Test_PoolComputingElement.py +2 -1
  133. DIRAC/Resources/IdProvider/CheckInIdProvider.py +13 -0
  134. DIRAC/Resources/IdProvider/IdProviderFactory.py +11 -3
  135. DIRAC/Resources/MessageQueue/StompMQConnector.py +1 -1
  136. DIRAC/Resources/Storage/GFAL2_StorageBase.py +24 -15
  137. DIRAC/Resources/Storage/OccupancyPlugins/WLCGAccountingHTTPJson.py +1 -3
  138. DIRAC/Resources/Storage/StorageBase.py +4 -2
  139. DIRAC/Resources/Storage/StorageElement.py +6 -7
  140. DIRAC/StorageManagementSystem/DB/StorageManagementDB.sql +2 -2
  141. DIRAC/TransformationSystem/Agent/TaskManagerAgentBase.py +10 -16
  142. DIRAC/TransformationSystem/Agent/TransformationAgent.py +22 -1
  143. DIRAC/TransformationSystem/Agent/TransformationCleaningAgent.py +16 -16
  144. DIRAC/TransformationSystem/Client/TaskManager.py +2 -4
  145. DIRAC/TransformationSystem/Client/Transformation.py +6 -7
  146. DIRAC/TransformationSystem/Client/TransformationClient.py +21 -11
  147. DIRAC/TransformationSystem/Client/Utilities.py +9 -0
  148. DIRAC/TransformationSystem/DB/TransformationDB.py +11 -14
  149. DIRAC/TransformationSystem/DB/TransformationDB.sql +9 -9
  150. DIRAC/TransformationSystem/Service/TransformationManagerHandler.py +0 -333
  151. DIRAC/TransformationSystem/Utilities/ReplicationCLIParameters.py +3 -3
  152. DIRAC/TransformationSystem/Utilities/TransformationInfo.py +7 -5
  153. DIRAC/TransformationSystem/scripts/dirac_production_runjoblocal.py +2 -4
  154. DIRAC/TransformationSystem/test/Test_TransformationInfo.py +22 -15
  155. DIRAC/TransformationSystem/test/Test_replicationTransformation.py +5 -6
  156. DIRAC/Workflow/Modules/test/Test_Modules.py +5 -0
  157. DIRAC/WorkloadManagementSystem/Agent/JobAgent.py +38 -26
  158. DIRAC/WorkloadManagementSystem/Agent/JobCleaningAgent.py +12 -8
  159. DIRAC/WorkloadManagementSystem/Agent/PilotSyncAgent.py +4 -3
  160. DIRAC/WorkloadManagementSystem/Agent/PushJobAgent.py +13 -13
  161. DIRAC/WorkloadManagementSystem/Agent/SiteDirector.py +18 -14
  162. DIRAC/WorkloadManagementSystem/Agent/StalledJobAgent.py +18 -51
  163. DIRAC/WorkloadManagementSystem/Agent/StatesAccountingAgent.py +41 -1
  164. DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_JobAgent.py +45 -4
  165. DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_JobCleaningAgent.py +7 -9
  166. DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_PushJobAgent.py +1 -0
  167. DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_SiteDirector.py +9 -2
  168. DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_StalledJobAgent.py +4 -5
  169. DIRAC/WorkloadManagementSystem/Client/DownloadInputData.py +9 -9
  170. DIRAC/WorkloadManagementSystem/Client/InputDataResolution.py +6 -6
  171. DIRAC/WorkloadManagementSystem/Client/JobMonitoringClient.py +10 -11
  172. DIRAC/WorkloadManagementSystem/Client/JobReport.py +1 -1
  173. DIRAC/WorkloadManagementSystem/Client/JobState/CachedJobState.py +3 -0
  174. DIRAC/WorkloadManagementSystem/Client/JobState/JobManifest.py +32 -261
  175. DIRAC/WorkloadManagementSystem/Client/JobState/JobState.py +6 -0
  176. DIRAC/WorkloadManagementSystem/Client/JobStateUpdateClient.py +3 -0
  177. DIRAC/WorkloadManagementSystem/Client/JobStatus.py +8 -152
  178. DIRAC/WorkloadManagementSystem/Client/PoolXMLSlice.py +12 -19
  179. DIRAC/WorkloadManagementSystem/Client/SandboxStoreClient.py +25 -38
  180. DIRAC/WorkloadManagementSystem/Client/WMSClient.py +2 -3
  181. DIRAC/WorkloadManagementSystem/Client/test/Test_Client_DownloadInputData.py +29 -0
  182. DIRAC/WorkloadManagementSystem/ConfigTemplate.cfg +4 -8
  183. DIRAC/WorkloadManagementSystem/DB/JobDB.py +89 -132
  184. DIRAC/WorkloadManagementSystem/DB/JobDB.sql +8 -8
  185. DIRAC/WorkloadManagementSystem/DB/JobDBUtils.py +18 -147
  186. DIRAC/WorkloadManagementSystem/DB/JobLoggingDB.py +19 -6
  187. DIRAC/WorkloadManagementSystem/DB/JobParametersDB.py +9 -9
  188. DIRAC/WorkloadManagementSystem/DB/PilotAgentsDB.py +16 -5
  189. DIRAC/WorkloadManagementSystem/DB/PilotAgentsDB.sql +3 -3
  190. DIRAC/WorkloadManagementSystem/DB/SandboxMetadataDB.py +44 -82
  191. DIRAC/WorkloadManagementSystem/DB/StatusUtils.py +125 -0
  192. DIRAC/WorkloadManagementSystem/DB/tests/Test_JobDB.py +1 -1
  193. DIRAC/WorkloadManagementSystem/DB/tests/Test_StatusUtils.py +28 -0
  194. DIRAC/WorkloadManagementSystem/Executor/JobSanity.py +5 -4
  195. DIRAC/WorkloadManagementSystem/Executor/JobScheduling.py +4 -0
  196. DIRAC/WorkloadManagementSystem/FutureClient/JobStateUpdateClient.py +75 -33
  197. DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapper.py +22 -11
  198. DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapperTemplate.py +9 -10
  199. DIRAC/WorkloadManagementSystem/JobWrapper/test/Test_JobWrapper.py +60 -10
  200. DIRAC/WorkloadManagementSystem/JobWrapper/test/Test_JobWrapperTemplate.py +4 -0
  201. DIRAC/WorkloadManagementSystem/Service/JobManagerHandler.py +33 -154
  202. DIRAC/WorkloadManagementSystem/Service/JobMonitoringHandler.py +5 -323
  203. DIRAC/WorkloadManagementSystem/Service/JobStateUpdateHandler.py +0 -16
  204. DIRAC/WorkloadManagementSystem/Service/PilotManagerHandler.py +6 -103
  205. DIRAC/WorkloadManagementSystem/Service/SandboxStoreHandler.py +7 -53
  206. DIRAC/WorkloadManagementSystem/Service/WMSAdministratorHandler.py +16 -79
  207. DIRAC/WorkloadManagementSystem/Service/WMSUtilities.py +4 -18
  208. DIRAC/WorkloadManagementSystem/Utilities/JobModel.py +28 -209
  209. DIRAC/WorkloadManagementSystem/Utilities/JobParameters.py +65 -3
  210. DIRAC/WorkloadManagementSystem/Utilities/JobStatusUtility.py +2 -64
  211. DIRAC/WorkloadManagementSystem/Utilities/ParametricJob.py +7 -171
  212. DIRAC/WorkloadManagementSystem/Utilities/PilotCStoJSONSynchronizer.py +73 -7
  213. DIRAC/WorkloadManagementSystem/Utilities/PilotWrapper.py +41 -11
  214. DIRAC/WorkloadManagementSystem/Utilities/RemoteRunner.py +16 -0
  215. DIRAC/WorkloadManagementSystem/Utilities/Utils.py +36 -1
  216. DIRAC/WorkloadManagementSystem/Utilities/jobAdministration.py +15 -0
  217. DIRAC/WorkloadManagementSystem/Utilities/test/Test_JobModel.py +1 -15
  218. DIRAC/WorkloadManagementSystem/Utilities/test/Test_ParametricJob.py +45 -128
  219. DIRAC/WorkloadManagementSystem/Utilities/test/Test_PilotWrapper.py +16 -0
  220. DIRAC/WorkloadManagementSystem/scripts/dirac_jobexec.py +7 -2
  221. DIRAC/WorkloadManagementSystem/scripts/dirac_wms_pilot_job_info.py +1 -1
  222. DIRAC/__init__.py +62 -60
  223. DIRAC/tests/Utilities/testJobDefinitions.py +22 -28
  224. {DIRAC-9.0.0a42.dist-info → dirac-9.0.7.dist-info}/METADATA +8 -5
  225. {DIRAC-9.0.0a42.dist-info → dirac-9.0.7.dist-info}/RECORD +229 -228
  226. {DIRAC-9.0.0a42.dist-info → dirac-9.0.7.dist-info}/WHEEL +1 -1
  227. {DIRAC-9.0.0a42.dist-info → dirac-9.0.7.dist-info}/entry_points.txt +0 -3
  228. DIRAC/Core/Utilities/test/Test_List.py +0 -150
  229. DIRAC/Core/Utilities/test/Test_Time.py +0 -88
  230. DIRAC/Resources/Computing/PilotBundle.py +0 -70
  231. DIRAC/TransformationSystem/scripts/dirac_transformation_archive.py +0 -30
  232. DIRAC/TransformationSystem/scripts/dirac_transformation_clean.py +0 -30
  233. DIRAC/TransformationSystem/scripts/dirac_transformation_remove_output.py +0 -30
  234. DIRAC/WorkloadManagementSystem/Utilities/test/Test_JobManager.py +0 -58
  235. {DIRAC-9.0.0a42.dist-info → dirac-9.0.7.dist-info/licenses}/LICENSE +0 -0
  236. {DIRAC-9.0.0a42.dist-info → dirac-9.0.7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,328 @@
1
+ #!/usr/bin/env python3
2
+ """cgroup2 support for DIRAC pilot."""
3
+
4
+ import os
5
+ import functools
6
+ import subprocess
7
+ from DIRAC import S_OK, S_ERROR, gLogger
8
+ from DIRAC.Core.Utilities.DIRACSingleton import DIRACSingleton
9
+ from DIRAC.Core.Utilities import Subprocess
10
+
11
+
12
+ class CG2Manager(metaclass=DIRACSingleton):
13
+ """A class to manage cgroup2 hierachy for a typical pilot job use-case.
14
+
15
+ This creates a group for all of the pilot processes (anything in the
16
+ group at the start. This is a requirement for controlling the
17
+ sub-groups (no processes in non-leaf groups).
18
+
19
+ A group is then created on request for each "slot" under the pilot,
20
+ with the requested limits.
21
+ """
22
+
23
+ # Paths used to lookup cgroup info
24
+ FILE_MOUNTS = "/proc/mounts"
25
+ FILE_CUR_CGROUP = f"/proc/{os.getpid()}/cgroup"
26
+ # Control file names within the cgroup2 hierachy
27
+ CTRL_CONTROLLERS = "cgroup.controllers"
28
+ CTRL_PROCS = "cgroup.procs"
29
+ CTRL_SUBTREE = "cgroup.subtree_control"
30
+ CTRL_MEM_OOM_GROUP = "memory.oom.group"
31
+ CTRL_MEM_EVENTS = "memory.events"
32
+ CTRL_MEM_MAX = "memory.max"
33
+ CTRL_MEM_SWAP_MAX = "memory.swap.max"
34
+ CTRL_MEM_PEAK = "memory.peak"
35
+ CTRL_CPU_MAX = "cpu.max"
36
+ # CPU controller constants
37
+ # Weight is the max value for 1 CPU core
38
+ CPU_WEIGHT = 100000
39
+ # Period is the averaging time in us to apply the limit
40
+ # The default is 100k and I see no particularly reason this should change
41
+ CPU_PERIOD = 100000
42
+ # Name of the group for the existing pilot processes
43
+ PILOT_GROUP = f"dirac_pilot_{os.getpid()}"
44
+
45
+ def __init__(self):
46
+ """Set-up CGroup2 manager."""
47
+ # This boolean will be set to True if the cgroups are configured
48
+ # in the expected way
49
+ self._ready = False
50
+ # A counter of number of subgroups created
51
+ # Used to create unique group names
52
+ self._subproc_num = 0
53
+ # Physical path to the starting cgroup for this process
54
+ # (i.e. the base of our hierachy)
55
+ self._cgroup_path = None
56
+ # Logger
57
+ self.log = gLogger.getSubLogger("CG2Manager")
58
+
59
+ @staticmethod
60
+ def _filter_file(path, filterfcn):
61
+ """Opens a file and runs filterfcn for each line.
62
+ If filterfcn returns any value, that value will be returned
63
+ by this function.
64
+ Returns None if no line matches.
65
+ """
66
+ with open(path, encoding="ascii") as file_in:
67
+ for line in file_in.readlines():
68
+ line = line.strip()
69
+ if res := filterfcn(line):
70
+ return res
71
+ return None
72
+
73
+ def _detect_root(self):
74
+ """Find the cgroup2 filesystem mountpoint on this system.
75
+ Returns the mountpoint path or None if it isn't found.
76
+ """
77
+
78
+ def filt(line):
79
+ """Filter function to find the first cgroup2 mount point
80
+ from a standard /proc/mounts layout file.
81
+ """
82
+ parts = line.split(" ")
83
+ if len(parts) < 3:
84
+ return None
85
+ if parts[2] == "cgroup2":
86
+ return parts[1]
87
+ return None
88
+
89
+ return self._filter_file(self.FILE_MOUNTS, filt)
90
+
91
+ def _detect_path(self):
92
+ """Finds the full physical path to the current cgroup control dir.
93
+ Sets self._cgroup_path on success.
94
+ Raises a RuntimeError if the path cannot be determined.
95
+ """
96
+
97
+ def filt(line):
98
+ """Filter to find the current cgroup2 name for the current
99
+ process, without the leading /.
100
+ """
101
+ if line.startswith("0::/"):
102
+ return line[4:]
103
+ return False
104
+
105
+ if not (root_path := self._detect_root()):
106
+ raise RuntimeError("Failed to find cgroup mount point")
107
+ if not (cur_group := self._filter_file(self.FILE_CUR_CGROUP, filt)):
108
+ raise RuntimeError("Failed to find current cgroup")
109
+ self._cgroup_path = os.path.join(root_path, cur_group)
110
+
111
+ def _create_group(self, group_name, isolate_oom=True):
112
+ """Creates a new group.
113
+ If "isolate_oom" is True, the new group will be decoupled
114
+ from the parent's OOM group.
115
+ Raises a RuntimeError if the group cannot be created.
116
+ """
117
+ try:
118
+ os.mkdir(os.path.join(self._cgroup_path, group_name))
119
+ except PermissionError as err:
120
+ raise RuntimeError(f"Permission denied creating sub-cgroup '{group_name}'") from err
121
+ if isolate_oom:
122
+ self._write_control(group_name, self.CTRL_MEM_OOM_GROUP, "0")
123
+
124
+ def _remove_group(self, group_name):
125
+ """Removes a group."""
126
+ os.rmdir(os.path.join(self._cgroup_path, group_name))
127
+
128
+ def _move_init_procs(self):
129
+ """Creates the pilot sub-group and moves all of the initial processes
130
+ from the top group into the new sub-group.
131
+ Will raise a RuntimeError if any cgroup configuration problem
132
+ prevents this from completing succesfully.
133
+ """
134
+ self._create_group(self.PILOT_GROUP, isolate_oom=False)
135
+ cur_pids = self._read_control("", self.CTRL_PROCS)
136
+ self._write_control(self.PILOT_GROUP, self.CTRL_PROCS, cur_pids)
137
+
138
+ def _read_control(self, group_name, ctrl_name):
139
+ """Reads a control value for the given group_name (relative to our base path).
140
+ The returned value varies depending on the value content:
141
+ - For a single token value, a string containing that token will be returned.
142
+ - For a single line value with space-seperated tokens, a list of tokens will be returned.
143
+ - For a multi-line value (where each line is a token), a list of tokens will be returned.
144
+ All tokens in the return values are strings.
145
+ A RuntimeError will be raised if the control cannot be read.
146
+ """
147
+ try:
148
+ with open(
149
+ os.path.join(self._cgroup_path, group_name, ctrl_name),
150
+ encoding="ascii",
151
+ ) as file_in:
152
+ values = [line.strip() for line in file_in.readlines()]
153
+ if " " in values and len(values) == 1:
154
+ values = values[0].split(" ")
155
+ if len(values) == 1:
156
+ values = values[0]
157
+ return values
158
+ except PermissionError as err:
159
+ raise RuntimeError(f"Access denied reading read control '{group_name}/{ctrl_name}'") from err
160
+
161
+ def _write_control(self, group_name, ctrl_name, value):
162
+ """Writes a control value for a given group_name (relative to our base path).
163
+ The value can be a string or an iterable of strings. The values should not
164
+ contain any whitespace characters.
165
+ A RuntimeError will be raised if the control cannot be set.
166
+ """
167
+ try:
168
+ ctrl_path = os.path.join(self._cgroup_path, group_name, ctrl_name)
169
+ with open(ctrl_path, "w", encoding="ascii") as file_out:
170
+ if isinstance(value, str):
171
+ value = [value]
172
+ for arg in value:
173
+ file_out.write(f"{arg}\n")
174
+ # Flush is critical here as setting multiple values at the same time may fail
175
+ file_out.flush()
176
+ except PermissionError as err:
177
+ raise RuntimeError(f"Access denied writing control '{group_name}/{ctrl_name}'") from err
178
+ except OSError as err:
179
+ # This generally happens if we're trying to set a value that is
180
+ # considered invalid, for example delegating a controller that isn't enabled
181
+ # in the first place.
182
+ raise RuntimeError(f"Error writing control '{group_name}/{ctrl_name}' = {value}") from err
183
+
184
+ def _get_oom_count(self, slot_name):
185
+ """Extracts the OOM counter as an int for the given slot.
186
+ Returns an int on success, can return a None if the memory.events
187
+ doesn't contain an oom counter or throws RuntimeError on failure.
188
+ """
189
+
190
+ def filt(line):
191
+ """Filter to find the oom counter from a memory.events file."""
192
+ if line.startswith("oom "):
193
+ return int(line[4:])
194
+ return False
195
+
196
+ mem_events = os.path.join(self._cgroup_path, slot_name, self.CTRL_MEM_EVENTS)
197
+ return self._filter_file(mem_events, filt)
198
+
199
+ def _set_limits(self, group_name, cores=None, memory=None, noswap=False):
200
+ """Sets the limits for an existing group.
201
+ See create_slot for a description of the other parameters.
202
+ This will raise a RuntimeError if appyling any of the limits fail to apply.
203
+ """
204
+ if cores:
205
+ proc_max = int(cores * self.CPU_WEIGHT)
206
+ self._write_control(group_name, self.CTRL_CPU_MAX, f"{proc_max} {self.CPU_PERIOD}")
207
+ if memory:
208
+ self._write_control(group_name, self.CTRL_MEM_MAX, f"{memory}")
209
+ if noswap:
210
+ self._write_control(group_name, self.CTRL_MEM_SWAP_MAX, "0")
211
+
212
+ def _prepare(self):
213
+ """Sets up the cgroup tree for the current process.
214
+ Should be called once, before using any of the other functions in this class.
215
+
216
+ Note that this function (specifcally the _move_init_procs call) assumes that
217
+ the list of processes is static. If the process list changes while this is running,
218
+ it is likely that this will fail to set things up properly.
219
+ """
220
+ self._detect_path()
221
+ controllers = self._read_control("", self.CTRL_CONTROLLERS)
222
+ if not controllers:
223
+ raise RuntimeError("No controllers enabled")
224
+ for ctrl in ["cpu", "memory"]:
225
+ if not ctrl in controllers:
226
+ raise RuntimeError(f"{ctrl} controller not enabled")
227
+ self._move_init_procs()
228
+ self._write_control("", self.CTRL_SUBTREE, ["+cpu", "+memory"])
229
+ self._ready = True
230
+
231
+ def _create_slot(self, slot_name, cores=None, memory=None, noswap=False):
232
+ """Creates a slot for a job with the given slot_name.
233
+ Cores is a float, number of CPU cores this group may use.
234
+ Memory is a string or int, either a number of bytes to limit the group RSS,
235
+ or a string limit with a unit suffix, e.g. "1G" as supported by the cgroup memory
236
+ controller.
237
+ If noswap is set to true, the swap memory limit will be set to 0; this is mostly
238
+ useful for testing (where the system may swap memory instead of triggering an
239
+ OOM, which may allow a process to use more than the memory limit).
240
+ This will raise a RuntimeError if setting up the slot fails.
241
+ """
242
+ if not self._ready:
243
+ return
244
+ self._create_group(slot_name)
245
+ self._set_limits(slot_name, cores, memory, noswap)
246
+
247
+ def _remove_slot(self, slot_name):
248
+ """Removes a slot with the given name.
249
+ Can raise usual filesystem OSError if the slot doesn't exist.
250
+ """
251
+ if not self._ready:
252
+ return
253
+ self._remove_group(slot_name)
254
+
255
+ def _setup_subproc(self, slot_name):
256
+ """A subprocess preexec function for setting up cgroups.
257
+ This will move te current process into the given cgroup slot.
258
+ On failure, no error will be reported.
259
+ """
260
+ # Threading danger!
261
+ # There are potential threading issues with preexec functions
262
+ # They must not hold any locks that the parent process might already
263
+ # be holding, including ones in standard library functions.
264
+ # This function should be kept as minimal as possible.
265
+ try:
266
+ self._write_control(slot_name, self.CTRL_PROCS, f"{os.getpid()}")
267
+ except Exception as err:
268
+ # We can't even really log here as we're in the set-up
269
+ # context of the new proces
270
+ pass
271
+
272
+ def setUp(self):
273
+ """Creates the base cgroup tree if possible. Should be called once
274
+ per process before using systemCall.
275
+ Returns S_OK/S_ERROR.
276
+ """
277
+ try:
278
+ self._prepare()
279
+ except Exception as err:
280
+ # The majority of CGroup failures will be RuntimeError
281
+ # However we don't want any unexpected failure to crash the upstream module,
282
+ # We just want to continue without cgroup support instead
283
+ return S_ERROR(str(err))
284
+ return S_OK()
285
+
286
+ def systemCall(self, *args, **kwargs):
287
+ """A proxy function for Subprocess.systemCall but will create a cgroup2 slot
288
+ if the functionality is available. An optional ceParameters dictionary
289
+ may be included, which will be searched for specific cgroup memory options.
290
+ Returns the usual S_OK/S_ERROR from Subprocess.systemCall.
291
+ """
292
+ preexec_fn = None
293
+ slot_name = f"subproc_{os.getpid()}_{self._subproc_num}"
294
+ self._subproc_num += 1
295
+ if self._ready:
296
+ self.log.info(f"Creating slot cgroup {slot_name}")
297
+ cores = None
298
+ memory = None
299
+ noswap = False
300
+ if "ceParameters" in kwargs:
301
+ if cpuLimit := kwargs["ceParameters"].get("CPULimit", None):
302
+ cores = float(cpuLimit)
303
+ if memoryMB := int(kwargs["ceParameters"].get("MemoryLimitMB", 0)):
304
+ memory = memoryMB * 1024 * 1024
305
+ if kwargs["ceParameters"].get("MemoryNoSwap", "no").lower() in ("yes", "true"):
306
+ noswap = True
307
+ try:
308
+ self.log.info(f"CGroup Limits, CPU: {cores}, Mem: {memory}, NoSwap: {noswap}")
309
+ self._create_slot(slot_name, cores=cores, memory=memory, noswap=noswap)
310
+ preexec_fn = functools.partial(CG2Manager._setup_subproc, self, slot_name)
311
+ except Exception as err:
312
+ self.log.warn("Failed to create slot cgroup:", str(err))
313
+ kwargs["preexec_fn"] = preexec_fn
314
+ kwargs.pop("ceParameters", None)
315
+ res = Subprocess.systemCall(*args, **kwargs)
316
+ if self._ready:
317
+ self.log.info(f"Removing slot cgroup {slot_name}")
318
+ try:
319
+ oom_count = self._get_oom_count(slot_name)
320
+ if oom_count:
321
+ # Child process triggered an OOM
322
+ # We can't readily report this upstream (child process will probably
323
+ # fail with an error code), so just log it and continue
324
+ self.log.info(f"OOM detected from child process (slot {slot_name})")
325
+ self._remove_slot(slot_name)
326
+ except Exception as err:
327
+ self.log.warn(f"Failed to delete slot {slot_name} cgroup:", str(err))
328
+ return res
@@ -2,294 +2,8 @@
2
2
  Condor ClassAd library.
3
3
  """
4
4
 
5
+ # Import from DIRACCommon for backward compatibility
6
+ from DIRACCommon.Core.Utilities.ClassAd.ClassAdLight import ClassAd
5
7
 
6
- class ClassAd:
7
- def __init__(self, jdl):
8
- """ClassAd constructor from a JDL string"""
9
- self.contents = {}
10
- result = self.__analyse_jdl(jdl)
11
- if result:
12
- self.contents = result
13
-
14
- def __analyse_jdl(self, jdl, index=0):
15
- """Analyse one [] jdl enclosure"""
16
-
17
- jdl = jdl.strip()
18
-
19
- # Strip all the blanks first
20
- # temp = jdl.replace(' ','').replace('\n','')
21
- temp = jdl
22
-
23
- result = {}
24
-
25
- if temp[0] != "[" or temp[-1] != "]":
26
- print("Invalid JDL: it should start with [ and end with ]")
27
- return result
28
-
29
- # Parse the jdl string now
30
- body = temp[1:-1]
31
- index = 0
32
- namemode = 1
33
- valuemode = 0
34
- while index < len(body):
35
- if namemode:
36
- ind = body.find("=", index)
37
- if ind != -1:
38
- name = body[index:ind]
39
- index = ind + 1
40
- valuemode = 1
41
- namemode = 0
42
- else:
43
- break
44
- elif valuemode:
45
- ind1 = body.find("[", index)
46
- ind2 = body.find(";", index)
47
- if ind1 != -1 and ind1 < ind2:
48
- value, newind = self.__find_subjdl(body, ind1)
49
- elif ind1 == -1 and ind2 == -1:
50
- value = body[index:]
51
- newind = len(body)
52
- else:
53
- if index == ind2:
54
- return {}
55
- else:
56
- value = body[index:ind2]
57
- newind = ind2 + 1
58
-
59
- result[name.strip()] = value.strip().replace("\n", "")
60
- index = newind
61
- valuemode = 0
62
- namemode = 1
63
-
64
- return result
65
-
66
- def __find_subjdl(self, body, index):
67
- """Find a full [] enclosure starting from index"""
68
- result = ""
69
- if body[index] != "[":
70
- return (result, 0)
71
-
72
- depth = 0
73
- ind = index
74
- while depth < 10:
75
- ind1 = body.find("]", ind + 1)
76
- ind2 = body.find("[", ind + 1)
77
- if ind2 != -1 and ind2 < ind1:
78
- depth += 1
79
- ind = ind2
80
- else:
81
- if depth > 0:
82
- depth -= 1
83
- ind = ind1
84
- else:
85
- result = body[index : ind1 + 1]
86
- if body[ind1 + 1] == ";":
87
- return (result, ind1 + 2)
88
- return result, 0
89
-
90
- return result, 0
91
-
92
- def insertAttributeInt(self, name, attribute):
93
- """Insert a named integer attribute"""
94
-
95
- self.contents[name] = str(attribute)
96
-
97
- def insertAttributeBool(self, name, attribute):
98
- """Insert a named boolean attribute"""
99
-
100
- if attribute:
101
- self.contents[name] = "true"
102
- else:
103
- self.contents[name] = "false"
104
-
105
- def insertAttributeString(self, name, attribute):
106
- """Insert a named string attribute"""
107
-
108
- self.contents[name] = '"' + str(attribute) + '"'
109
-
110
- def insertAttributeVectorString(self, name, attributelist):
111
- """Insert a named string list attribute"""
112
-
113
- tmp = ['"' + x + '"' for x in attributelist]
114
- tmpstr = ",".join(tmp)
115
- self.contents[name] = "{" + tmpstr + "}"
116
-
117
- def insertAttributeVectorInt(self, name, attributelist):
118
- """Insert a named string list attribute"""
119
-
120
- tmp = [str(x) for x in attributelist]
121
- tmpstr = ",".join(tmp)
122
- self.contents[name] = "{" + tmpstr + "}"
123
-
124
- def insertAttributeVectorStringList(self, name, attributelist):
125
- """Insert a named list of string lists"""
126
-
127
- listOfLists = []
128
- for stringList in attributelist:
129
- # tmp = map ( lambda x : '"' + x + '"', stringList )
130
- tmpstr = ",".join(stringList)
131
- listOfLists.append("{" + tmpstr + "}")
132
- self.contents[name] = "{" + ",".join(listOfLists) + "}"
133
-
134
- def lookupAttribute(self, name):
135
- """Check the presence of the given attribute"""
136
-
137
- return name in self.contents
138
-
139
- def set_expression(self, name, attribute):
140
- """Insert a named expression attribute"""
141
-
142
- self.contents[name] = str(attribute)
143
-
144
- def get_expression(self, name):
145
- """Get expression corresponding to a named attribute"""
146
-
147
- if name in self.contents:
148
- if isinstance(self.contents[name], int):
149
- return str(self.contents[name])
150
- return self.contents[name]
151
- return ""
152
-
153
- def isAttributeList(self, name):
154
- """Check if the given attribute is of the List type"""
155
- attribute = self.get_expression(name).strip()
156
- return attribute.startswith("{")
157
-
158
- def getListFromExpression(self, name):
159
- """Get a list of strings from a given expression"""
160
-
161
- tempString = self.get_expression(name).strip()
162
- listMode = False
163
- if tempString.startswith("{"):
164
- tempString = tempString[1:-1]
165
- listMode = True
166
-
167
- tempString = tempString.replace(" ", "").replace("\n", "")
168
- if tempString.find("{") < 0:
169
- if not listMode:
170
- tempString = tempString.replace('"', "")
171
- if not tempString:
172
- return []
173
- return tempString.split(",")
174
-
175
- resultList = []
176
- while tempString:
177
- if tempString.find("{") == 0:
178
- end = tempString.find("}")
179
- resultList.append(tempString[: end + 1])
180
- tempString = tempString[end + 1 :]
181
- if tempString.startswith(","):
182
- tempString = tempString[1:]
183
- elif tempString.find('"') == 0:
184
- end = tempString[1:].find('"')
185
- resultList.append(tempString[1 : end + 1])
186
- tempString = tempString[end + 2 :]
187
- if tempString.startswith(","):
188
- tempString = tempString[1:]
189
- else:
190
- end = tempString.find(",")
191
- if end < 0:
192
- resultList.append(tempString.replace('"', "").replace(" ", ""))
193
- break
194
- else:
195
- resultList.append(tempString[:end].replace('"', "").replace(" ", ""))
196
- tempString = tempString[end + 1 :]
197
-
198
- return resultList
199
-
200
- def getDictionaryFromSubJDL(self, name):
201
- """Get a dictionary of the JDL attributes from a subsection"""
202
-
203
- tempList = self.get_expression(name)[1:-1]
204
- resDict = {}
205
- for item in tempList.split(";"):
206
- if len(item.split("=")) == 2:
207
- resDict[item.split("=")[0].strip()] = item.split("=")[1].strip().replace('"', "")
208
- else:
209
- return {}
210
-
211
- return resDict
212
-
213
- def deleteAttribute(self, name):
214
- """Delete a named attribute"""
215
-
216
- if name in self.contents:
217
- del self.contents[name]
218
- return 1
219
- return 0
220
-
221
- def isOK(self):
222
- """Check the JDL validity - to be defined"""
223
-
224
- if self.contents:
225
- return 1
226
- return 0
227
-
228
- def asJDL(self):
229
- """Convert the JDL description into a string"""
230
-
231
- result = []
232
- for name, value in sorted(self.contents.items()):
233
- if value[0:1] == "{":
234
- result += [4 * " " + name + " = \n"]
235
- result += [8 * " " + "{\n"]
236
- strings = value[1:-1].split(",")
237
- for st in strings:
238
- result += [12 * " " + st.strip() + ",\n"]
239
- result[-1] = result[-1][:-2]
240
- result += ["\n" + 8 * " " + "};\n"]
241
- elif value[0:1] == "[":
242
- tempad = ClassAd(value)
243
- tempjdl = tempad.asJDL() + ";"
244
- lines = tempjdl.split("\n")
245
- result += [4 * " " + name + " = \n"]
246
- for line in lines:
247
- result += [8 * " " + line + "\n"]
248
-
249
- else:
250
- result += [4 * " " + name + " = " + str(value) + ";\n"]
251
- if result:
252
- result[-1] = result[-1][:-1]
253
- return "[ \n" + "".join(result) + "\n]"
254
-
255
- def getAttributeString(self, name):
256
- """Get String type attribute value"""
257
- value = ""
258
- if self.lookupAttribute(name):
259
- value = self.get_expression(name).replace('"', "")
260
- return value
261
-
262
- def getAttributeInt(self, name):
263
- """Get Integer type attribute value"""
264
- value = None
265
- if self.lookupAttribute(name):
266
- try:
267
- value = int(self.get_expression(name).replace('"', ""))
268
- except Exception:
269
- value = None
270
- return value
271
-
272
- def getAttributeBool(self, name):
273
- """Get Boolean type attribute value"""
274
- if not self.lookupAttribute(name):
275
- return False
276
-
277
- value = self.get_expression(name).replace('"', "")
278
- return value.lower() == "true"
279
-
280
- def getAttributeFloat(self, name):
281
- """Get Float type attribute value"""
282
- value = None
283
- if self.lookupAttribute(name):
284
- try:
285
- value = float(self.get_expression(name).replace('"', ""))
286
- except Exception:
287
- value = None
288
- return value
289
-
290
- def getAttributes(self) -> list[str]:
291
- """Get the list of all the attribute names
292
-
293
- :return: list of names as strings
294
- """
295
- return list(self.contents)
8
+ # Re-export for backward compatibility
9
+ __all__ = ["ClassAd"]