DIRAC 9.0.0a54__py3-none-any.whl → 9.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- DIRAC/AccountingSystem/Client/AccountingCLI.py +0 -140
- DIRAC/AccountingSystem/Client/DataStoreClient.py +0 -13
- DIRAC/AccountingSystem/Client/Types/BaseAccountingType.py +0 -7
- DIRAC/AccountingSystem/ConfigTemplate.cfg +0 -5
- DIRAC/AccountingSystem/Service/DataStoreHandler.py +0 -72
- DIRAC/ConfigurationSystem/Client/Helpers/CSGlobals.py +0 -9
- DIRAC/ConfigurationSystem/Client/Helpers/Registry.py +34 -32
- DIRAC/ConfigurationSystem/Client/Helpers/Resources.py +11 -43
- DIRAC/ConfigurationSystem/Client/Helpers/test/Test_Helpers.py +0 -16
- DIRAC/ConfigurationSystem/Client/LocalConfiguration.py +14 -8
- DIRAC/ConfigurationSystem/Client/PathFinder.py +47 -8
- DIRAC/ConfigurationSystem/Client/SyncPlugins/CERNLDAPSyncPlugin.py +4 -1
- DIRAC/ConfigurationSystem/Client/VOMS2CSSynchronizer.py +9 -2
- DIRAC/ConfigurationSystem/Client/test/Test_PathFinder.py +41 -1
- DIRAC/ConfigurationSystem/private/RefresherBase.py +4 -2
- DIRAC/Core/DISET/ServiceReactor.py +11 -3
- DIRAC/Core/DISET/private/BaseClient.py +1 -2
- DIRAC/Core/DISET/private/Transports/M2SSLTransport.py +9 -7
- DIRAC/Core/Security/DiracX.py +12 -7
- DIRAC/Core/Security/IAMService.py +4 -3
- DIRAC/Core/Security/ProxyInfo.py +9 -5
- DIRAC/Core/Security/test/test_diracx_token_from_pem.py +161 -0
- DIRAC/Core/Tornado/Client/ClientSelector.py +4 -1
- DIRAC/Core/Tornado/Server/TornadoService.py +1 -1
- DIRAC/Core/Utilities/ClassAd/ClassAdLight.py +4 -290
- DIRAC/Core/Utilities/DErrno.py +5 -309
- DIRAC/Core/Utilities/Extensions.py +10 -1
- DIRAC/Core/Utilities/Graphs/GraphData.py +1 -1
- DIRAC/Core/Utilities/JDL.py +1 -195
- DIRAC/Core/Utilities/List.py +1 -124
- DIRAC/Core/Utilities/MySQL.py +101 -97
- DIRAC/Core/Utilities/Os.py +32 -1
- DIRAC/Core/Utilities/Platform.py +2 -107
- DIRAC/Core/Utilities/ReturnValues.py +7 -252
- DIRAC/Core/Utilities/StateMachine.py +12 -178
- DIRAC/Core/Utilities/TimeUtilities.py +10 -253
- DIRAC/Core/Utilities/test/Test_JDL.py +0 -3
- DIRAC/Core/Utilities/test/Test_Profiler.py +20 -20
- DIRAC/Core/scripts/dirac_agent.py +1 -1
- DIRAC/Core/scripts/dirac_apptainer_exec.py +16 -7
- DIRAC/Core/scripts/dirac_platform.py +1 -92
- DIRAC/DataManagementSystem/Agent/FTS3Agent.py +8 -7
- DIRAC/DataManagementSystem/Agent/RequestOperations/RemoveFile.py +7 -6
- DIRAC/DataManagementSystem/Client/FTS3Job.py +71 -34
- DIRAC/DataManagementSystem/DB/FTS3DB.py +3 -0
- DIRAC/DataManagementSystem/DB/FileCatalogComponents/DatasetManager/DatasetManager.py +1 -1
- DIRAC/DataManagementSystem/Utilities/DMSHelpers.py +6 -2
- DIRAC/DataManagementSystem/scripts/dirac_dms_create_moving_request.py +2 -0
- DIRAC/DataManagementSystem/scripts/dirac_dms_protocol_matrix.py +0 -1
- DIRAC/FrameworkSystem/Client/ComponentInstaller.py +4 -2
- DIRAC/FrameworkSystem/DB/ProxyDB.py +9 -5
- DIRAC/FrameworkSystem/Utilities/TokenManagementUtilities.py +3 -2
- DIRAC/FrameworkSystem/Utilities/diracx.py +2 -74
- DIRAC/FrameworkSystem/private/authorization/AuthServer.py +2 -2
- DIRAC/FrameworkSystem/scripts/dirac_login.py +2 -2
- DIRAC/FrameworkSystem/scripts/dirac_proxy_init.py +1 -1
- DIRAC/Interfaces/API/Dirac.py +27 -13
- DIRAC/Interfaces/API/DiracAdmin.py +42 -7
- DIRAC/Interfaces/API/Job.py +1 -0
- DIRAC/Interfaces/scripts/dirac_admin_allow_site.py +7 -1
- DIRAC/Interfaces/scripts/dirac_admin_ban_site.py +7 -1
- DIRAC/Interfaces/scripts/dirac_wms_job_parameters.py +0 -1
- DIRAC/MonitoringSystem/Client/Types/WMSHistory.py +4 -0
- DIRAC/MonitoringSystem/Client/WebAppClient.py +26 -0
- DIRAC/MonitoringSystem/ConfigTemplate.cfg +9 -0
- DIRAC/MonitoringSystem/DB/MonitoringDB.py +6 -25
- DIRAC/MonitoringSystem/Service/MonitoringHandler.py +0 -33
- DIRAC/MonitoringSystem/Service/WebAppHandler.py +599 -0
- DIRAC/MonitoringSystem/private/MainReporter.py +0 -3
- DIRAC/ProductionSystem/scripts/dirac_prod_get_trans.py +2 -3
- DIRAC/RequestManagementSystem/Agent/RequestExecutingAgent.py +8 -6
- DIRAC/RequestManagementSystem/ConfigTemplate.cfg +6 -6
- DIRAC/RequestManagementSystem/DB/test/RMSTestScenari.py +2 -0
- DIRAC/ResourceStatusSystem/Client/SiteStatus.py +4 -2
- DIRAC/ResourceStatusSystem/Command/FreeDiskSpaceCommand.py +3 -1
- DIRAC/ResourceStatusSystem/Utilities/CSHelpers.py +2 -31
- DIRAC/ResourceStatusSystem/scripts/dirac_rss_set_status.py +18 -4
- DIRAC/Resources/Catalog/RucioFileCatalogClient.py +1 -1
- DIRAC/Resources/Computing/AREXComputingElement.py +19 -3
- DIRAC/Resources/Computing/BatchSystems/Condor.py +126 -108
- DIRAC/Resources/Computing/BatchSystems/SLURM.py +5 -1
- DIRAC/Resources/Computing/BatchSystems/test/Test_SLURM.py +46 -0
- DIRAC/Resources/Computing/HTCondorCEComputingElement.py +37 -43
- DIRAC/Resources/Computing/SingularityComputingElement.py +6 -1
- DIRAC/Resources/Computing/test/Test_HTCondorCEComputingElement.py +67 -49
- DIRAC/Resources/Computing/test/Test_PoolComputingElement.py +2 -1
- DIRAC/Resources/IdProvider/CheckInIdProvider.py +13 -0
- DIRAC/Resources/IdProvider/IdProviderFactory.py +11 -3
- DIRAC/Resources/Storage/StorageBase.py +4 -2
- DIRAC/Resources/Storage/StorageElement.py +4 -4
- DIRAC/TransformationSystem/Agent/TaskManagerAgentBase.py +10 -16
- DIRAC/TransformationSystem/Agent/TransformationAgent.py +22 -1
- DIRAC/TransformationSystem/Agent/TransformationCleaningAgent.py +15 -15
- DIRAC/TransformationSystem/Client/Transformation.py +2 -1
- DIRAC/TransformationSystem/Client/TransformationClient.py +0 -7
- DIRAC/TransformationSystem/Client/Utilities.py +9 -0
- DIRAC/TransformationSystem/Service/TransformationManagerHandler.py +0 -336
- DIRAC/TransformationSystem/Utilities/ReplicationCLIParameters.py +3 -3
- DIRAC/TransformationSystem/scripts/dirac_production_runjoblocal.py +2 -4
- DIRAC/TransformationSystem/test/Test_replicationTransformation.py +5 -6
- DIRAC/Workflow/Modules/test/Test_Modules.py +5 -0
- DIRAC/WorkloadManagementSystem/Agent/JobAgent.py +1 -5
- DIRAC/WorkloadManagementSystem/Agent/JobCleaningAgent.py +11 -7
- DIRAC/WorkloadManagementSystem/Agent/PilotSyncAgent.py +4 -3
- DIRAC/WorkloadManagementSystem/Agent/PushJobAgent.py +13 -13
- DIRAC/WorkloadManagementSystem/Agent/SiteDirector.py +10 -13
- DIRAC/WorkloadManagementSystem/Agent/StalledJobAgent.py +18 -51
- DIRAC/WorkloadManagementSystem/Agent/StatesAccountingAgent.py +41 -1
- DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_JobAgent.py +2 -0
- DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_JobCleaningAgent.py +7 -9
- DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_PushJobAgent.py +1 -0
- DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_SiteDirector.py +8 -2
- DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_StalledJobAgent.py +4 -5
- DIRAC/WorkloadManagementSystem/Client/DownloadInputData.py +7 -5
- DIRAC/WorkloadManagementSystem/Client/JobMonitoringClient.py +10 -11
- DIRAC/WorkloadManagementSystem/Client/JobState/JobManifest.py +32 -261
- DIRAC/WorkloadManagementSystem/Client/JobStateUpdateClient.py +3 -0
- DIRAC/WorkloadManagementSystem/Client/JobStatus.py +8 -152
- DIRAC/WorkloadManagementSystem/Client/SandboxStoreClient.py +25 -38
- DIRAC/WorkloadManagementSystem/Client/WMSClient.py +2 -3
- DIRAC/WorkloadManagementSystem/Client/test/Test_Client_DownloadInputData.py +29 -0
- DIRAC/WorkloadManagementSystem/ConfigTemplate.cfg +4 -8
- DIRAC/WorkloadManagementSystem/DB/JobDB.py +40 -69
- DIRAC/WorkloadManagementSystem/DB/JobDBUtils.py +18 -147
- DIRAC/WorkloadManagementSystem/DB/JobParametersDB.py +9 -9
- DIRAC/WorkloadManagementSystem/DB/PilotAgentsDB.py +3 -2
- DIRAC/WorkloadManagementSystem/DB/SandboxMetadataDB.py +28 -39
- DIRAC/WorkloadManagementSystem/DB/StatusUtils.py +125 -0
- DIRAC/WorkloadManagementSystem/DB/tests/Test_JobDB.py +1 -1
- DIRAC/WorkloadManagementSystem/DB/tests/Test_StatusUtils.py +28 -0
- DIRAC/WorkloadManagementSystem/Executor/JobSanity.py +3 -3
- DIRAC/WorkloadManagementSystem/FutureClient/JobStateUpdateClient.py +2 -14
- DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapper.py +14 -9
- DIRAC/WorkloadManagementSystem/JobWrapper/test/Test_JobWrapper.py +36 -10
- DIRAC/WorkloadManagementSystem/JobWrapper/test/Test_JobWrapperTemplate.py +4 -0
- DIRAC/WorkloadManagementSystem/Service/JobManagerHandler.py +33 -154
- DIRAC/WorkloadManagementSystem/Service/JobMonitoringHandler.py +5 -323
- DIRAC/WorkloadManagementSystem/Service/JobStateUpdateHandler.py +0 -16
- DIRAC/WorkloadManagementSystem/Service/PilotManagerHandler.py +6 -102
- DIRAC/WorkloadManagementSystem/Service/SandboxStoreHandler.py +5 -51
- DIRAC/WorkloadManagementSystem/Service/WMSAdministratorHandler.py +16 -79
- DIRAC/WorkloadManagementSystem/Utilities/JobModel.py +28 -199
- DIRAC/WorkloadManagementSystem/Utilities/JobParameters.py +65 -3
- DIRAC/WorkloadManagementSystem/Utilities/JobStatusUtility.py +2 -64
- DIRAC/WorkloadManagementSystem/Utilities/ParametricJob.py +7 -171
- DIRAC/WorkloadManagementSystem/Utilities/PilotCStoJSONSynchronizer.py +73 -7
- DIRAC/WorkloadManagementSystem/Utilities/PilotWrapper.py +2 -0
- DIRAC/WorkloadManagementSystem/Utilities/RemoteRunner.py +16 -0
- DIRAC/WorkloadManagementSystem/Utilities/Utils.py +36 -1
- DIRAC/WorkloadManagementSystem/Utilities/jobAdministration.py +15 -0
- DIRAC/WorkloadManagementSystem/Utilities/test/Test_JobModel.py +1 -5
- DIRAC/WorkloadManagementSystem/Utilities/test/Test_ParametricJob.py +45 -128
- DIRAC/WorkloadManagementSystem/Utilities/test/Test_PilotWrapper.py +16 -0
- DIRAC/__init__.py +55 -54
- {dirac-9.0.0a54.dist-info → dirac-9.0.7.dist-info}/METADATA +6 -4
- {dirac-9.0.0a54.dist-info → dirac-9.0.7.dist-info}/RECORD +160 -160
- {dirac-9.0.0a54.dist-info → dirac-9.0.7.dist-info}/WHEEL +1 -1
- {dirac-9.0.0a54.dist-info → dirac-9.0.7.dist-info}/entry_points.txt +0 -3
- DIRAC/Core/Utilities/test/Test_List.py +0 -150
- DIRAC/Core/Utilities/test/Test_Time.py +0 -88
- DIRAC/TransformationSystem/scripts/dirac_transformation_archive.py +0 -30
- DIRAC/TransformationSystem/scripts/dirac_transformation_clean.py +0 -30
- DIRAC/TransformationSystem/scripts/dirac_transformation_remove_output.py +0 -30
- DIRAC/WorkloadManagementSystem/Utilities/test/Test_JobManager.py +0 -58
- {dirac-9.0.0a54.dist-info → dirac-9.0.7.dist-info}/licenses/LICENSE +0 -0
- {dirac-9.0.0a54.dist-info → dirac-9.0.7.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
""" The Job Sanity executor assigns sandboxes to the job """
|
|
2
2
|
from DIRAC import S_OK
|
|
3
|
-
from DIRAC.WorkloadManagementSystem.
|
|
3
|
+
from DIRAC.WorkloadManagementSystem.DB.SandboxMetadataDB import SandboxMetadataDB
|
|
4
4
|
from DIRAC.WorkloadManagementSystem.Executor.Base.OptimizerExecutor import OptimizerExecutor
|
|
5
5
|
|
|
6
6
|
|
|
@@ -15,7 +15,6 @@ class JobSanity(OptimizerExecutor):
|
|
|
15
15
|
@classmethod
|
|
16
16
|
def initializeOptimizer(cls):
|
|
17
17
|
"""Initialize specific parameters for JobSanityAgent."""
|
|
18
|
-
cls.sandboxClient = SandboxStoreClient(useCertificates=True, smdb=True)
|
|
19
18
|
return S_OK()
|
|
20
19
|
|
|
21
20
|
def optimizeJob(self, jid, jobState):
|
|
@@ -57,7 +56,8 @@ class JobSanity(OptimizerExecutor):
|
|
|
57
56
|
if not numSBsToAssign:
|
|
58
57
|
return S_OK(0)
|
|
59
58
|
self.jobLog.info("Assigning sandboxes", f"({numSBsToAssign} on behalf of {ownerName}@{ownerGroup}@{vo})")
|
|
60
|
-
|
|
59
|
+
eId = f"Job:{jobState.jid}"
|
|
60
|
+
result = SandboxMetadataDB().assignSandboxesToEntities({eId: sbsToAssign}, ownerName, ownerGroup)
|
|
61
61
|
if not result["OK"]:
|
|
62
62
|
self.jobLog.error("Could not assign sandboxes in the SandboxStore")
|
|
63
63
|
return result
|
|
@@ -77,17 +77,11 @@ class JobStateUpdateClient(FutureClient):
|
|
|
77
77
|
def setJobAttribute(self, jobID: str | int, attribute: str, value: str):
|
|
78
78
|
with DiracXClient() as api:
|
|
79
79
|
if attribute == "Status":
|
|
80
|
-
api.jobs.set_job_statuses(
|
|
80
|
+
return api.jobs.set_job_statuses(
|
|
81
81
|
{jobID: {datetime.now(tz=timezone.utc): {"Status": value}}},
|
|
82
82
|
)
|
|
83
83
|
else:
|
|
84
|
-
api.jobs.patch_metadata({jobID: {attribute: value}})
|
|
85
|
-
|
|
86
|
-
@stripValueIfOK
|
|
87
|
-
@convertToReturnValue
|
|
88
|
-
def setJobFlag(self, jobID: str | int, flag: str):
|
|
89
|
-
with DiracXClient() as api:
|
|
90
|
-
api.jobs.patch_metadata({jobID: {flag: True}})
|
|
84
|
+
return api.jobs.patch_metadata({jobID: {attribute: value}})
|
|
91
85
|
|
|
92
86
|
@stripValueIfOK
|
|
93
87
|
@convertToReturnValue
|
|
@@ -151,12 +145,6 @@ class JobStateUpdateClient(FutureClient):
|
|
|
151
145
|
updates = {job_id: {k: v} for job_id, (k, v) in jobsParameterDict.items()}
|
|
152
146
|
api.jobs.patch_metadata(updates)
|
|
153
147
|
|
|
154
|
-
@stripValueIfOK
|
|
155
|
-
@convertToReturnValue
|
|
156
|
-
def unsetJobFlag(self, jobID: str | int, flag: str):
|
|
157
|
-
with DiracXClient() as api:
|
|
158
|
-
api.jobs.patch_metadata({jobID: {flag: False}})
|
|
159
|
-
|
|
160
148
|
@stripValueIfOK
|
|
161
149
|
@convertToReturnValue
|
|
162
150
|
def updateJobFromStager(self, jobID: str | int, status: str):
|
|
@@ -10,6 +10,7 @@ and a Watchdog Agent that can monitor its progress.
|
|
|
10
10
|
:caption: JobWrapper options
|
|
11
11
|
|
|
12
12
|
"""
|
|
13
|
+
|
|
13
14
|
import contextlib
|
|
14
15
|
import datetime
|
|
15
16
|
import glob
|
|
@@ -54,6 +55,8 @@ from DIRAC.WorkloadManagementSystem.Client.JobStateUpdateClient import JobStateU
|
|
|
54
55
|
from DIRAC.WorkloadManagementSystem.Client.SandboxStoreClient import SandboxStoreClient
|
|
55
56
|
from DIRAC.WorkloadManagementSystem.JobWrapper.Watchdog import Watchdog
|
|
56
57
|
|
|
58
|
+
CHILD_PID_POLL_INTERVALS = list(range(5, 40, 5))
|
|
59
|
+
|
|
57
60
|
|
|
58
61
|
class JobWrapper:
|
|
59
62
|
"""The only user of the JobWrapper is the JobWrapperTemplate"""
|
|
@@ -119,14 +122,16 @@ class JobWrapper:
|
|
|
119
122
|
self.pilotRef = gConfig.getValue("/LocalSite/PilotReference", "Unknown")
|
|
120
123
|
self.cpuNormalizationFactor = gConfig.getValue("/LocalSite/CPUNormalizationFactor", 0.0)
|
|
121
124
|
self.bufferLimit = gConfig.getValue(self.section + "/BufferLimit", 10485760)
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
+
try:
|
|
126
|
+
self.defaultOutputSE = getDestinationSEList("SE-USER", self.siteName)
|
|
127
|
+
except RuntimeError:
|
|
128
|
+
self.defaultOutputSE = []
|
|
125
129
|
self.defaultCatalog = gConfig.getValue(self.section + "/DefaultCatalog", [])
|
|
126
130
|
self.masterCatalogOnlyFlag = gConfig.getValue(self.section + "/MasterCatalogOnlyFlag", True)
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
131
|
+
try:
|
|
132
|
+
self.defaultFailoverSE = getDestinationSEList("Tier1-Failover", self.siteName)
|
|
133
|
+
except RuntimeError:
|
|
134
|
+
self.defaultFailoverSE = []
|
|
130
135
|
self.defaultOutputPath = ""
|
|
131
136
|
self.retryUpload = gConfig.getValue(self.section + "/RetryUpload", False)
|
|
132
137
|
self.dm = DataManager()
|
|
@@ -427,14 +432,14 @@ class JobWrapper:
|
|
|
427
432
|
)
|
|
428
433
|
exeThread.start()
|
|
429
434
|
payloadPID = None
|
|
430
|
-
for seconds in
|
|
435
|
+
for seconds in CHILD_PID_POLL_INTERVALS:
|
|
431
436
|
time.sleep(seconds)
|
|
432
437
|
payloadPID = spObject.getChildPID()
|
|
433
438
|
if payloadPID:
|
|
434
439
|
self.__setJobParam("PayloadPID", payloadPID)
|
|
435
440
|
break
|
|
436
441
|
if not payloadPID:
|
|
437
|
-
return S_ERROR("Payload process could not start after
|
|
442
|
+
return S_ERROR(f"Payload process could not start after {sum(CHILD_PID_POLL_INTERVALS)} seconds")
|
|
438
443
|
|
|
439
444
|
watchdog = Watchdog(
|
|
440
445
|
pid=self.currentPID,
|
|
@@ -1210,8 +1215,8 @@ class JobWrapper:
|
|
|
1210
1215
|
lfn = str(basePath / outputPath / os.path.basename(localfile))
|
|
1211
1216
|
else:
|
|
1212
1217
|
# if LFN is given, take it as it is
|
|
1213
|
-
localfile = str(self.jobIDPath / outputFile.replace("LFN:", ""))
|
|
1214
1218
|
lfn = outputFile.replace("LFN:", "")
|
|
1219
|
+
localfile = str(self.jobIDPath / os.path.basename(lfn))
|
|
1215
1220
|
|
|
1216
1221
|
return (lfn, localfile)
|
|
1217
1222
|
|
|
@@ -344,24 +344,40 @@ def test_processQuickExecutionNoWatchdog(mocker):
|
|
|
344
344
|
|
|
345
345
|
|
|
346
346
|
@pytest.mark.slow
|
|
347
|
-
|
|
348
|
-
|
|
347
|
+
@pytest.mark.parametrize("expect_failure", [True, False])
|
|
348
|
+
def test_processSubprocessFailureNoPid(mocker, monkeypatch, expect_failure):
|
|
349
|
+
"""Test the process method of the JobWrapper class: the subprocess fails and no PID is returned.
|
|
350
|
+
|
|
351
|
+
expect_failure is used to ensure that the JobWrapper is functioning correctly even with the other patching
|
|
352
|
+
that is applied in the test (e.g. CHILD_PID_POLL_INTERVALS).
|
|
353
|
+
"""
|
|
349
354
|
# Test failure in starting the payload process
|
|
350
355
|
jw = JobWrapper()
|
|
351
356
|
jw.jobArgs = {}
|
|
352
357
|
|
|
353
358
|
mocker.patch.object(jw, "_JobWrapper__report")
|
|
354
359
|
mocker.patch.object(jw, "_JobWrapper__setJobParam")
|
|
360
|
+
monkeypatch.setattr(
|
|
361
|
+
"DIRAC.WorkloadManagementSystem.JobWrapper.JobWrapper.CHILD_PID_POLL_INTERVALS", [0.1, 0.2, 0.3, 0.4, 0.5]
|
|
362
|
+
)
|
|
363
|
+
|
|
355
364
|
mock_exeThread = mocker.Mock()
|
|
356
365
|
mock_exeThread.start.side_effect = lambda: time.sleep(0.1)
|
|
357
|
-
|
|
366
|
+
if expect_failure:
|
|
367
|
+
mocker.patch(
|
|
368
|
+
"DIRAC.WorkloadManagementSystem.JobWrapper.JobWrapper.ExecutionThread", return_value=mock_exeThread
|
|
369
|
+
)
|
|
358
370
|
|
|
359
371
|
with tempfile.NamedTemporaryFile(delete=True) as std_out, tempfile.NamedTemporaryFile(delete=True) as std_err:
|
|
360
372
|
jw.outputFile = std_out.name
|
|
361
373
|
jw.errorFile = std_err.name
|
|
362
374
|
result = jw.process(command="mock_command", env={})
|
|
363
|
-
|
|
364
|
-
|
|
375
|
+
|
|
376
|
+
if expect_failure:
|
|
377
|
+
assert not result["OK"]
|
|
378
|
+
assert "Payload process could not start after 1.5 seconds" in result["Message"]
|
|
379
|
+
else:
|
|
380
|
+
assert result["OK"]
|
|
365
381
|
|
|
366
382
|
|
|
367
383
|
# -------------------------------------------------------------------------------------------------
|
|
@@ -648,6 +664,7 @@ def jobIDPath():
|
|
|
648
664
|
# Output data files
|
|
649
665
|
(p / "00232454_00000244_1.sim").touch()
|
|
650
666
|
(p / "1720442808testFileUpload.txt").touch()
|
|
667
|
+
(p / "testFileUploadFullLFN.txt").touch()
|
|
651
668
|
|
|
652
669
|
with open(p / "pool_xml_catalog.xml", "w") as f:
|
|
653
670
|
f.write(
|
|
@@ -847,7 +864,11 @@ def test_processJobOutputs_output_data_upload(mocker, setup_another_job_wrapper)
|
|
|
847
864
|
# BTW, isn't the concept of pool_xml_catalog.xml from lhcbdirac?
|
|
848
865
|
jw.jobArgs = {
|
|
849
866
|
"OutputSandbox": [],
|
|
850
|
-
"OutputData": [
|
|
867
|
+
"OutputData": [
|
|
868
|
+
"1720442808testFileUpload.txt",
|
|
869
|
+
"LFN:00232454_00000244_1.sim",
|
|
870
|
+
"LFN:/dirac/user/u/unknown/testFileUploadFullLFN.txt",
|
|
871
|
+
],
|
|
851
872
|
"Owner": "Jane Doe",
|
|
852
873
|
}
|
|
853
874
|
|
|
@@ -863,10 +884,15 @@ def test_processJobOutputs_output_data_upload(mocker, setup_another_job_wrapper)
|
|
|
863
884
|
assert jw.jobReport.jobStatusInfo[1][:-1] == ("", JobMinorStatus.UPLOADING_OUTPUT_DATA)
|
|
864
885
|
assert jw.jobReport.jobStatusInfo[2][:-1] == (JobStatus.COMPLETING, JobMinorStatus.OUTPUT_DATA_UPLOADED)
|
|
865
886
|
assert len(jw.jobReport.jobParameters) == 1
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
"00232454_00000244_1.sim
|
|
869
|
-
|
|
887
|
+
|
|
888
|
+
expected_files = {
|
|
889
|
+
"00232454_00000244_1.sim",
|
|
890
|
+
"/dirac/user/u/unknown/0/123/1720442808testFileUpload.txt",
|
|
891
|
+
"/dirac/user/u/unknown/testFileUploadFullLFN.txt",
|
|
892
|
+
}
|
|
893
|
+
assert jw.jobReport.jobParameters[0][0] == "UploadedOutputData"
|
|
894
|
+
uploaded_files = set(jw.jobReport.jobParameters[0][1].split(", "))
|
|
895
|
+
assert uploaded_files == expected_files
|
|
870
896
|
|
|
871
897
|
|
|
872
898
|
# -------------------------------------------------------------------------------------------------
|
|
@@ -72,6 +72,7 @@ def extraOptions():
|
|
|
72
72
|
os.remove(extraOptions)
|
|
73
73
|
|
|
74
74
|
|
|
75
|
+
@pytest.mark.slow
|
|
75
76
|
def test_createAndExecuteJobWrapperTemplate_success(extraOptions):
|
|
76
77
|
"""Test the creation of a classical job wrapper and its execution:
|
|
77
78
|
There is an extra option cfg file to be passed to the job wrapper.
|
|
@@ -144,6 +145,7 @@ def test_createAndExecuteJobWrapperTemplate_success(extraOptions):
|
|
|
144
145
|
shutil.rmtree(os.path.join(os.getcwd(), "job"))
|
|
145
146
|
|
|
146
147
|
|
|
148
|
+
@pytest.mark.slow
|
|
147
149
|
def test_createAndExecuteJobWrapperTemplate_missingExtraOptions():
|
|
148
150
|
"""Test the creation of a classical job wrapper and its execution:
|
|
149
151
|
There is no extra options to be passed to the job wrapper.
|
|
@@ -205,6 +207,7 @@ def test_createAndExecuteJobWrapperTemplate_missingExtraOptions():
|
|
|
205
207
|
shutil.rmtree(os.path.join(os.getcwd(), "job"))
|
|
206
208
|
|
|
207
209
|
|
|
210
|
+
@pytest.mark.slow
|
|
208
211
|
def test_createAndExecuteRelocatedJobWrapperTemplate_success(extraOptions):
|
|
209
212
|
"""Test the creation of a relocated job wrapper and its execution:
|
|
210
213
|
This is generally used when containers are involved (SingularityCE).
|
|
@@ -325,6 +328,7 @@ def test_createAndExecuteRelocatedJobWrapperTemplate_success(extraOptions):
|
|
|
325
328
|
shutil.rmtree(wrapperPath)
|
|
326
329
|
|
|
327
330
|
|
|
331
|
+
@pytest.mark.slow
|
|
328
332
|
def test_createAndExecuteJobWrapperOfflineTemplate_success(extraOptions):
|
|
329
333
|
"""Test the creation of an offline job wrapper and its execution:
|
|
330
334
|
This is generally used when pre/post processing operations are executed locally,
|
|
@@ -21,9 +21,7 @@ from DIRAC.Core.Utilities.JDL import jdlToBaseJobDescriptionModel
|
|
|
21
21
|
from DIRAC.Core.Utilities.JEncode import strToIntDict
|
|
22
22
|
from DIRAC.Core.Utilities.ObjectLoader import ObjectLoader
|
|
23
23
|
from DIRAC.FrameworkSystem.Client.ProxyManagerClient import gProxyManager
|
|
24
|
-
from DIRAC.StorageManagementSystem.Client.StorageManagerClient import StorageManagerClient
|
|
25
24
|
from DIRAC.WorkloadManagementSystem.Client import JobStatus
|
|
26
|
-
from DIRAC.WorkloadManagementSystem.Client.JobStatus import filterJobStateTransition
|
|
27
25
|
from DIRAC.WorkloadManagementSystem.Service.JobPolicy import (
|
|
28
26
|
RIGHT_DELETE,
|
|
29
27
|
RIGHT_KILL,
|
|
@@ -32,8 +30,10 @@ from DIRAC.WorkloadManagementSystem.Service.JobPolicy import (
|
|
|
32
30
|
RIGHT_SUBMIT,
|
|
33
31
|
JobPolicy,
|
|
34
32
|
)
|
|
33
|
+
from DIRAC.WorkloadManagementSystem.DB.StatusUtils import kill_delete_jobs
|
|
35
34
|
from DIRAC.WorkloadManagementSystem.Utilities.JobModel import JobDescriptionModel
|
|
36
35
|
from DIRAC.WorkloadManagementSystem.Utilities.ParametricJob import generateParametricJobs, getParameterVectorLength
|
|
36
|
+
from DIRAC.WorkloadManagementSystem.Utilities.Utils import rescheduleJobs
|
|
37
37
|
|
|
38
38
|
MAX_PARAMETRIC_JOBS = 20
|
|
39
39
|
|
|
@@ -104,16 +104,6 @@ class JobManagerHandlerMixin:
|
|
|
104
104
|
return
|
|
105
105
|
self.log.info("Optimize msg sent", f"for {len(jids)} jobs")
|
|
106
106
|
|
|
107
|
-
###########################################################################
|
|
108
|
-
types_getMaxParametricJobs = []
|
|
109
|
-
|
|
110
|
-
def export_getMaxParametricJobs(self):
|
|
111
|
-
"""Get the maximum number of parametric jobs
|
|
112
|
-
|
|
113
|
-
:return: S_OK()/S_ERROR()
|
|
114
|
-
"""
|
|
115
|
-
return S_OK(self.maxParametricJobs)
|
|
116
|
-
|
|
117
107
|
types_submitJob = [str]
|
|
118
108
|
|
|
119
109
|
def export_submitJob(self, jobDesc):
|
|
@@ -345,8 +335,7 @@ class JobManagerHandlerMixin:
|
|
|
345
335
|
types_rescheduleJob = []
|
|
346
336
|
|
|
347
337
|
def export_rescheduleJob(self, jobIDs):
|
|
348
|
-
"""Reschedule a
|
|
349
|
-
it will be used to refresh the proxy in the Proxy Repository
|
|
338
|
+
"""Reschedule a list of jobs.
|
|
350
339
|
|
|
351
340
|
:param list jobIDs: list of job IDs
|
|
352
341
|
|
|
@@ -360,22 +349,12 @@ class JobManagerHandlerMixin:
|
|
|
360
349
|
validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights(
|
|
361
350
|
jobList, RIGHT_RESCHEDULE
|
|
362
351
|
)
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
self.log.debug(str(result))
|
|
367
|
-
if not result["OK"]:
|
|
368
|
-
return result
|
|
369
|
-
self.jobLoggingDB.addLoggingRecord(
|
|
370
|
-
result["JobID"],
|
|
371
|
-
status=result["Status"],
|
|
372
|
-
minorStatus=result["MinorStatus"],
|
|
373
|
-
applicationStatus="Unknown",
|
|
374
|
-
source="JobManager",
|
|
375
|
-
)
|
|
352
|
+
res = rescheduleJobs(validJobList, source="JobManager")
|
|
353
|
+
if not res["OK"]:
|
|
354
|
+
self.log.error(res["Message"])
|
|
376
355
|
|
|
377
356
|
if invalidJobList or nonauthJobList:
|
|
378
|
-
result = S_ERROR("Some jobs
|
|
357
|
+
result = S_ERROR("Some jobs can not be rescheduled")
|
|
379
358
|
if invalidJobList:
|
|
380
359
|
result["InvalidJobIDs"] = invalidJobList
|
|
381
360
|
if nonauthJobList:
|
|
@@ -450,131 +429,28 @@ class JobManagerHandlerMixin:
|
|
|
450
429
|
|
|
451
430
|
return S_OK(validJobList)
|
|
452
431
|
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
and remove the pilot that ran and its logging info if the pilot is finished.
|
|
456
|
-
|
|
457
|
-
:param int jobID: job ID
|
|
458
|
-
:return: S_OK()/S_ERROR()
|
|
459
|
-
"""
|
|
460
|
-
if not (result := self.jobDB.setJobStatus(jobID, JobStatus.DELETED, "Checking accounting", force=force))["OK"]:
|
|
461
|
-
return result
|
|
462
|
-
|
|
463
|
-
if not (result := self.taskQueueDB.deleteJob(jobID))["OK"]:
|
|
464
|
-
self.log.warn("Failed to delete job from the TaskQueue")
|
|
465
|
-
|
|
466
|
-
# if it was the last job for the pilot
|
|
467
|
-
result = self.pilotAgentsDB.getPilotsForJobID(jobID)
|
|
468
|
-
if not result["OK"]:
|
|
469
|
-
self.log.error("Failed to get Pilots for JobID", result["Message"])
|
|
470
|
-
return result
|
|
471
|
-
for pilot in result["Value"]:
|
|
472
|
-
res = self.pilotAgentsDB.getJobsForPilot(pilot)
|
|
473
|
-
if not res["OK"]:
|
|
474
|
-
self.log.error("Failed to get jobs for pilot", res["Message"])
|
|
475
|
-
return res
|
|
476
|
-
if not res["Value"]: # if list of jobs for pilot is empty, delete pilot
|
|
477
|
-
result = self.pilotAgentsDB.getPilotInfo(pilotID=pilot)
|
|
478
|
-
if not result["OK"]:
|
|
479
|
-
self.log.error("Failed to get pilot info", result["Message"])
|
|
480
|
-
return result
|
|
481
|
-
ret = self.pilotAgentsDB.deletePilot(result["Value"]["PilotJobReference"])
|
|
482
|
-
if not ret["OK"]:
|
|
483
|
-
self.log.error("Failed to delete pilot from PilotAgentsDB", ret["Message"])
|
|
484
|
-
return ret
|
|
485
|
-
|
|
486
|
-
return S_OK()
|
|
432
|
+
###########################################################################
|
|
433
|
+
types_deleteJob = []
|
|
487
434
|
|
|
488
|
-
def
|
|
489
|
-
"""
|
|
435
|
+
def export_deleteJob(self, jobIDs, force=False):
|
|
436
|
+
"""Delete jobs specified in the jobIDs list
|
|
490
437
|
|
|
491
|
-
:param
|
|
492
|
-
:param bool sendKillCommand: send kill command
|
|
438
|
+
:param list jobIDs: list of job IDs
|
|
493
439
|
|
|
494
|
-
:return: S_OK
|
|
440
|
+
:return: S_OK/S_ERROR
|
|
495
441
|
"""
|
|
496
|
-
if sendKillCommand:
|
|
497
|
-
if not (result := self.jobDB.setJobCommand(jobID, "Kill"))["OK"]:
|
|
498
|
-
return result
|
|
499
|
-
|
|
500
|
-
self.log.info("Job marked for termination", jobID)
|
|
501
|
-
if not (result := self.jobDB.setJobStatus(jobID, JobStatus.KILLED, "Marked for termination", force=force))[
|
|
502
|
-
"OK"
|
|
503
|
-
]:
|
|
504
|
-
self.log.warn("Failed to set job Killed status", result["Message"])
|
|
505
|
-
if not (result := self.taskQueueDB.deleteJob(jobID))["OK"]:
|
|
506
|
-
self.log.warn("Failed to delete job from the TaskQueue", result["Message"])
|
|
507
|
-
|
|
508
|
-
return S_OK()
|
|
509
|
-
|
|
510
|
-
def _kill_delete_jobs(self, jobIDList, right, force=False):
|
|
511
|
-
"""Kill (== set the status to "KILLED") or delete (== set the status to "DELETED") jobs as necessary
|
|
512
|
-
|
|
513
|
-
:param list jobIDList: job IDs
|
|
514
|
-
:param str right: RIGHT_KILL or RIGHT_DELETE
|
|
515
442
|
|
|
516
|
-
|
|
517
|
-
"""
|
|
518
|
-
jobList = self.__getJobList(jobIDList)
|
|
443
|
+
jobList = self.__getJobList(jobIDs)
|
|
519
444
|
if not jobList:
|
|
520
445
|
self.log.warn("No jobs specified")
|
|
521
446
|
return S_OK([])
|
|
522
447
|
|
|
523
|
-
validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights(
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
killJobList = []
|
|
528
|
-
deleteJobList = []
|
|
529
|
-
if validJobList:
|
|
530
|
-
# Get the jobs allowed to transition to the Killed state
|
|
531
|
-
filterRes = filterJobStateTransition(validJobList, JobStatus.KILLED)
|
|
532
|
-
if not filterRes["OK"]:
|
|
533
|
-
return filterRes
|
|
534
|
-
killJobList.extend(filterRes["Value"])
|
|
535
|
-
|
|
536
|
-
if not right == RIGHT_KILL:
|
|
537
|
-
# Get the jobs allowed to transition to the Deleted state
|
|
538
|
-
filterRes = filterJobStateTransition(validJobList, JobStatus.DELETED)
|
|
539
|
-
if not filterRes["OK"]:
|
|
540
|
-
return filterRes
|
|
541
|
-
deleteJobList.extend(filterRes["Value"])
|
|
542
|
-
|
|
543
|
-
# Look for jobs that are in the Staging state to send kill signal to the stager
|
|
544
|
-
result = self.jobDB.getJobsAttributes(killJobList, ["Status"])
|
|
545
|
-
if not result["OK"]:
|
|
546
|
-
return result
|
|
547
|
-
stagingJobList = [jobID for jobID, sDict in result["Value"].items() if sDict["Status"] == JobStatus.STAGING]
|
|
548
|
-
|
|
549
|
-
for jobID in killJobList:
|
|
550
|
-
result = self.__killJob(jobID, force=force)
|
|
551
|
-
if not result["OK"]:
|
|
552
|
-
badIDs.append(jobID)
|
|
553
|
-
|
|
554
|
-
for jobID in deleteJobList:
|
|
555
|
-
result = self.__deleteJob(jobID, force=force)
|
|
556
|
-
if not result["OK"]:
|
|
557
|
-
badIDs.append(jobID)
|
|
558
|
-
|
|
559
|
-
if stagingJobList:
|
|
560
|
-
stagerClient = StorageManagerClient()
|
|
561
|
-
self.log.info("Going to send killing signal to stager as well!")
|
|
562
|
-
result = stagerClient.killTasksBySourceTaskID(stagingJobList)
|
|
563
|
-
if not result["OK"]:
|
|
564
|
-
self.log.warn("Failed to kill some Stager tasks", result["Message"])
|
|
448
|
+
validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights(
|
|
449
|
+
jobList, RIGHT_DELETE
|
|
450
|
+
)
|
|
565
451
|
|
|
566
|
-
|
|
567
|
-
result = S_ERROR("Some jobs failed deletion")
|
|
568
|
-
if nonauthJobList:
|
|
569
|
-
self.log.warn("Non-authorized JobIDs won't be deleted", str(nonauthJobList))
|
|
570
|
-
result["NonauthorizedJobIDs"] = nonauthJobList
|
|
571
|
-
if badIDs:
|
|
572
|
-
self.log.warn("JobIDs failed to be deleted", str(badIDs))
|
|
573
|
-
result["FailedJobIDs"] = badIDs
|
|
574
|
-
return result
|
|
452
|
+
result = kill_delete_jobs(RIGHT_DELETE, validJobList, nonauthJobList, force=force)
|
|
575
453
|
|
|
576
|
-
jobsList = killJobList if right == RIGHT_KILL else deleteJobList
|
|
577
|
-
result = S_OK(jobsList)
|
|
578
454
|
result["requireProxyUpload"] = len(ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired()
|
|
579
455
|
|
|
580
456
|
if invalidJobList:
|
|
@@ -583,30 +459,33 @@ class JobManagerHandlerMixin:
|
|
|
583
459
|
return result
|
|
584
460
|
|
|
585
461
|
###########################################################################
|
|
586
|
-
|
|
462
|
+
types_killJob = []
|
|
587
463
|
|
|
588
|
-
def
|
|
589
|
-
"""
|
|
464
|
+
def export_killJob(self, jobIDs, force=False):
|
|
465
|
+
"""Kill jobs specified in the jobIDs list
|
|
590
466
|
|
|
591
467
|
:param list jobIDs: list of job IDs
|
|
592
468
|
|
|
593
469
|
:return: S_OK/S_ERROR
|
|
594
470
|
"""
|
|
595
471
|
|
|
596
|
-
|
|
472
|
+
jobList = self.__getJobList(jobIDs)
|
|
473
|
+
if not jobList:
|
|
474
|
+
self.log.warn("No jobs specified")
|
|
475
|
+
return S_OK([])
|
|
597
476
|
|
|
598
|
-
|
|
599
|
-
|
|
477
|
+
validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights(
|
|
478
|
+
jobList, RIGHT_KILL
|
|
479
|
+
)
|
|
600
480
|
|
|
601
|
-
|
|
602
|
-
"""Kill jobs specified in the jobIDs list
|
|
481
|
+
result = kill_delete_jobs(RIGHT_KILL, validJobList, nonauthJobList, force=force)
|
|
603
482
|
|
|
604
|
-
|
|
483
|
+
result["requireProxyUpload"] = len(ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired()
|
|
605
484
|
|
|
606
|
-
:
|
|
607
|
-
|
|
485
|
+
if invalidJobList:
|
|
486
|
+
result["InvalidJobIDs"] = invalidJobList
|
|
608
487
|
|
|
609
|
-
return
|
|
488
|
+
return result
|
|
610
489
|
|
|
611
490
|
###########################################################################
|
|
612
491
|
types_resetJob = []
|