DIRAC 9.0.0a68__py3-none-any.whl → 9.0.0a70__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- DIRAC/AccountingSystem/Client/Types/Network.py +8 -8
- DIRAC/AccountingSystem/Client/Types/PilotSubmission.py +3 -3
- DIRAC/ConfigurationSystem/Client/CSAPI.py +11 -1
- DIRAC/ConfigurationSystem/Client/Helpers/CSGlobals.py +0 -9
- DIRAC/ConfigurationSystem/Client/Helpers/Registry.py +3 -29
- DIRAC/ConfigurationSystem/Client/SyncPlugins/CERNLDAPSyncPlugin.py +4 -1
- DIRAC/ConfigurationSystem/ConfigTemplate.cfg +3 -0
- DIRAC/ConfigurationSystem/private/Modificator.py +11 -3
- DIRAC/ConfigurationSystem/private/RefresherBase.py +4 -2
- DIRAC/Core/DISET/ServiceReactor.py +11 -3
- DIRAC/Core/DISET/private/Transports/M2SSLTransport.py +9 -7
- DIRAC/Core/Security/DiracX.py +11 -6
- DIRAC/Core/Security/test/test_diracx_token_from_pem.py +161 -0
- DIRAC/Core/Tornado/Server/TornadoService.py +1 -1
- DIRAC/Core/Utilities/ElasticSearchDB.py +1 -2
- DIRAC/Core/Utilities/Subprocess.py +66 -57
- DIRAC/Core/Utilities/test/Test_Profiler.py +20 -20
- DIRAC/Core/Utilities/test/Test_Subprocess.py +58 -8
- DIRAC/Core/scripts/dirac_apptainer_exec.py +8 -8
- DIRAC/DataManagementSystem/Agent/FTS3Agent.py +8 -7
- DIRAC/DataManagementSystem/Client/DataManager.py +6 -7
- DIRAC/DataManagementSystem/Client/FTS3Job.py +125 -34
- DIRAC/DataManagementSystem/Client/test/Test_FTS3Objects.py +1 -0
- DIRAC/DataManagementSystem/Client/test/Test_scitag.py +69 -0
- DIRAC/DataManagementSystem/DB/FileCatalogComponents/DatasetManager/DatasetManager.py +1 -1
- DIRAC/DataManagementSystem/scripts/dirac_dms_create_moving_request.py +2 -0
- DIRAC/FrameworkSystem/DB/InstalledComponentsDB.py +3 -2
- DIRAC/FrameworkSystem/DB/ProxyDB.py +9 -5
- DIRAC/FrameworkSystem/Utilities/MonitoringUtilities.py +1 -0
- DIRAC/FrameworkSystem/Utilities/TokenManagementUtilities.py +3 -2
- DIRAC/FrameworkSystem/Utilities/diracx.py +41 -10
- DIRAC/FrameworkSystem/scripts/dirac_login.py +2 -2
- DIRAC/FrameworkSystem/scripts/dirac_proxy_init.py +1 -1
- DIRAC/FrameworkSystem/scripts/dirac_uninstall_component.py +1 -0
- DIRAC/Interfaces/API/Dirac.py +3 -6
- DIRAC/Interfaces/Utilities/DConfigCache.py +2 -0
- DIRAC/Interfaces/scripts/dirac_wms_job_parameters.py +0 -1
- DIRAC/MonitoringSystem/DB/MonitoringDB.py +6 -5
- DIRAC/MonitoringSystem/Service/WebAppHandler.py +25 -6
- DIRAC/MonitoringSystem/private/MainReporter.py +0 -3
- DIRAC/RequestManagementSystem/Agent/RequestExecutingAgent.py +8 -6
- DIRAC/RequestManagementSystem/ConfigTemplate.cfg +6 -6
- DIRAC/ResourceStatusSystem/Command/FreeDiskSpaceCommand.py +3 -1
- DIRAC/Resources/Computing/AREXComputingElement.py +18 -2
- DIRAC/Resources/Computing/BatchSystems/Condor.py +0 -3
- DIRAC/Resources/Computing/BatchSystems/executeBatch.py +15 -7
- DIRAC/Resources/Computing/LocalComputingElement.py +0 -2
- DIRAC/Resources/Computing/SSHComputingElement.py +61 -38
- DIRAC/Resources/IdProvider/CheckInIdProvider.py +13 -0
- DIRAC/Resources/IdProvider/IdProviderFactory.py +13 -3
- DIRAC/Resources/IdProvider/tests/Test_IdProviderFactory.py +7 -0
- DIRAC/Resources/Storage/FileStorage.py +121 -2
- DIRAC/TransformationSystem/Agent/InputDataAgent.py +4 -1
- DIRAC/TransformationSystem/Agent/MCExtensionAgent.py +5 -2
- DIRAC/TransformationSystem/Agent/TaskManagerAgentBase.py +3 -4
- DIRAC/TransformationSystem/Agent/TransformationCleaningAgent.py +44 -9
- DIRAC/TransformationSystem/Agent/ValidateOutputDataAgent.py +4 -2
- DIRAC/TransformationSystem/Client/TransformationClient.py +9 -1
- DIRAC/TransformationSystem/Client/Utilities.py +6 -3
- DIRAC/TransformationSystem/DB/TransformationDB.py +105 -43
- DIRAC/TransformationSystem/Utilities/ReplicationCLIParameters.py +3 -3
- DIRAC/TransformationSystem/scripts/dirac_production_runjoblocal.py +2 -4
- DIRAC/TransformationSystem/test/Test_replicationTransformation.py +5 -6
- DIRAC/WorkloadManagementSystem/Agent/JobAgent.py +1 -5
- DIRAC/WorkloadManagementSystem/Agent/PilotSyncAgent.py +4 -3
- DIRAC/WorkloadManagementSystem/Agent/PushJobAgent.py +0 -4
- DIRAC/WorkloadManagementSystem/Agent/SiteDirector.py +8 -11
- DIRAC/WorkloadManagementSystem/Agent/StalledJobAgent.py +39 -7
- DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_SiteDirector.py +8 -2
- DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_StalledJobAgent.py +24 -4
- DIRAC/WorkloadManagementSystem/Client/DownloadInputData.py +4 -3
- DIRAC/WorkloadManagementSystem/ConfigTemplate.cfg +3 -3
- DIRAC/WorkloadManagementSystem/DB/JobParametersDB.py +8 -8
- DIRAC/WorkloadManagementSystem/DB/SandboxMetadataDB.py +1 -1
- DIRAC/WorkloadManagementSystem/DB/StatusUtils.py +48 -21
- DIRAC/WorkloadManagementSystem/DB/tests/Test_StatusUtils.py +19 -4
- DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapper.py +3 -4
- DIRAC/WorkloadManagementSystem/JobWrapper/Watchdog.py +16 -45
- DIRAC/WorkloadManagementSystem/JobWrapper/test/Test_JobWrapper.py +18 -9
- DIRAC/WorkloadManagementSystem/Service/JobManagerHandler.py +25 -2
- DIRAC/WorkloadManagementSystem/Service/WMSAdministratorHandler.py +18 -31
- DIRAC/WorkloadManagementSystem/Utilities/PilotCStoJSONSynchronizer.py +73 -7
- {dirac-9.0.0a68.dist-info → dirac-9.0.0a70.dist-info}/METADATA +6 -5
- {dirac-9.0.0a68.dist-info → dirac-9.0.0a70.dist-info}/RECORD +88 -86
- {dirac-9.0.0a68.dist-info → dirac-9.0.0a70.dist-info}/WHEEL +0 -0
- {dirac-9.0.0a68.dist-info → dirac-9.0.0a70.dist-info}/entry_points.txt +0 -0
- {dirac-9.0.0a68.dist-info → dirac-9.0.0a70.dist-info}/licenses/LICENSE +0 -0
- {dirac-9.0.0a68.dist-info → dirac-9.0.0a70.dist-info}/top_level.txt +0 -0
|
@@ -9,6 +9,7 @@ from DIRAC import gLogger
|
|
|
9
9
|
from DIRAC.Core.Base.Script import Script
|
|
10
10
|
from DIRAC.Core.Utilities.File import secureOpenForWrite
|
|
11
11
|
from DIRAC.ConfigurationSystem.Client.ConfigurationData import gConfigurationData
|
|
12
|
+
from DIRAC.ConfigurationSystem.Client.Helpers.Registry import reset_all_caches
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
class ConfigCache:
|
|
@@ -69,5 +70,6 @@ class ConfigCache:
|
|
|
69
70
|
try:
|
|
70
71
|
with open(self.configCacheName, "rb") as fh:
|
|
71
72
|
gConfigurationData.mergedCFG = pickle.load(fh)
|
|
73
|
+
reset_all_caches()
|
|
72
74
|
except:
|
|
73
75
|
gLogger.error("Cache corrupt or unreadable")
|
|
@@ -5,8 +5,7 @@ Wrapper on top of ElasticDB. It is used to manage the DIRAC monitoring types.
|
|
|
5
5
|
|
|
6
6
|
The following option can be set in `Systems/Monitoring/Databases/MonitoringDB`
|
|
7
7
|
|
|
8
|
-
* *IndexPrefix*: Prefix used to prepend to indexes created in the
|
|
9
|
-
is not present in the CS, the indexes are prefixed with the setup name.
|
|
8
|
+
* *IndexPrefix*: Prefix used to prepend to indexes created in the OpenSearch instance.
|
|
10
9
|
|
|
11
10
|
For each monitoring types managed, the Period (how often a new index is created)
|
|
12
11
|
can be defined with::
|
|
@@ -33,7 +32,6 @@ import time
|
|
|
33
32
|
|
|
34
33
|
from DIRAC import S_ERROR, S_OK
|
|
35
34
|
from DIRAC.ConfigurationSystem.Client.Config import gConfig
|
|
36
|
-
from DIRAC.ConfigurationSystem.Client.Helpers import CSGlobals
|
|
37
35
|
from DIRAC.ConfigurationSystem.Client.PathFinder import getDatabaseSection
|
|
38
36
|
from DIRAC.Core.Base.ElasticDB import ElasticDB
|
|
39
37
|
from DIRAC.Core.Utilities.Plotting.TypeLoader import TypeLoader
|
|
@@ -48,7 +46,7 @@ class MonitoringDB(ElasticDB):
|
|
|
48
46
|
|
|
49
47
|
try:
|
|
50
48
|
section = getDatabaseSection("Monitoring/MonitoringDB")
|
|
51
|
-
indexPrefix = gConfig.getValue(f"{section}/IndexPrefix",
|
|
49
|
+
indexPrefix = gConfig.getValue(f"{section}/IndexPrefix", "").lower()
|
|
52
50
|
# Connecting to the ES cluster
|
|
53
51
|
super().__init__(fullName=name, indexPrefix=indexPrefix)
|
|
54
52
|
except RuntimeError as ex:
|
|
@@ -192,7 +190,10 @@ class MonitoringDB(ElasticDB):
|
|
|
192
190
|
# and now we group with bucket aggregation
|
|
193
191
|
groupingAggregation = self._A("terms", field=grouping, size=self.RESULT_SIZE)
|
|
194
192
|
groupingAggregation.bucket(
|
|
195
|
-
"end_data",
|
|
193
|
+
"end_data",
|
|
194
|
+
"date_histogram",
|
|
195
|
+
field="timestamp",
|
|
196
|
+
interval=interval,
|
|
196
197
|
).metric("timeAggregation", timeAggregation).pipeline(
|
|
197
198
|
"timeAggregation_avg_bucket", "avg_bucket", buckets_path="timeAggregation>total", gap_policy="insert_zeros"
|
|
198
199
|
)
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
The WebAppHandler module provides a class to handle web requests from the DIRAC WebApp.
|
|
3
3
|
It is not indented to be used in diracx
|
|
4
4
|
"""
|
|
5
|
+
|
|
5
6
|
from DIRAC import S_ERROR, S_OK
|
|
6
7
|
from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations
|
|
7
8
|
from DIRAC.ConfigurationSystem.Client.Helpers.Resources import getSites
|
|
@@ -28,19 +29,29 @@ class WebAppHandler(RequestHandler):
|
|
|
28
29
|
result = ObjectLoader().loadObject("WorkloadManagementSystem.DB.PilotAgentsDB", "PilotAgentsDB")
|
|
29
30
|
if not result["OK"]:
|
|
30
31
|
return result
|
|
31
|
-
|
|
32
|
+
try:
|
|
33
|
+
cls.pilotAgentsDB = result["Value"](parentLogger=cls.log)
|
|
34
|
+
except RuntimeError:
|
|
35
|
+
cls.log.warn("Could not connect to PilotAgentsDB")
|
|
32
36
|
|
|
33
37
|
result = ObjectLoader().loadObject("WorkloadManagementSystem.DB.JobDB", "JobDB")
|
|
34
38
|
if not result["OK"]:
|
|
35
39
|
return result
|
|
36
|
-
|
|
40
|
+
try:
|
|
41
|
+
cls.jobDB = result["Value"](parentLogger=cls.log)
|
|
42
|
+
except RuntimeError:
|
|
43
|
+
cls.log.warn("Could not connect to JobDB")
|
|
37
44
|
|
|
38
45
|
result = ObjectLoader().loadObject("TransformationSystem.DB.TransformationDB", "TransformationDB")
|
|
39
46
|
if not result["OK"]:
|
|
40
47
|
return result
|
|
41
|
-
|
|
48
|
+
try:
|
|
49
|
+
cls.transformationDB = result["Value"](parentLogger=cls.log)
|
|
50
|
+
except RuntimeError:
|
|
51
|
+
cls.log.warn("Could not connect to TransformationDB")
|
|
42
52
|
|
|
43
53
|
except RuntimeError as excp:
|
|
54
|
+
cls.log.exception()
|
|
44
55
|
return S_ERROR(f"Can't connect to DB: {excp}")
|
|
45
56
|
|
|
46
57
|
return S_OK()
|
|
@@ -517,13 +528,21 @@ class WebAppHandler(RequestHandler):
|
|
|
517
528
|
ops = Operations()
|
|
518
529
|
# Prepare the standard structure now within the resultDict dictionary
|
|
519
530
|
resultDict = {}
|
|
520
|
-
|
|
531
|
+
# Reconstruct just the values list
|
|
532
|
+
trList = [
|
|
533
|
+
[str(item) if not isinstance(item, int) else item for item in trans_dict.values()]
|
|
534
|
+
for trans_dict in res["Value"]
|
|
535
|
+
]
|
|
536
|
+
|
|
521
537
|
# Create the total records entry
|
|
522
538
|
nTrans = len(trList)
|
|
523
539
|
resultDict["TotalRecords"] = nTrans
|
|
524
540
|
# Create the ParameterNames entry
|
|
525
|
-
|
|
526
|
-
|
|
541
|
+
try:
|
|
542
|
+
resultDict["ParameterNames"] = list(res["Value"][0].keys())
|
|
543
|
+
except IndexError:
|
|
544
|
+
# As this list is a reference to the list in the DB, we cannot extend it, therefore copy it
|
|
545
|
+
resultDict["ParameterNames"] = list(cls.transformationDB.TRANSPARAMS)
|
|
527
546
|
# Add the job states to the ParameterNames entry
|
|
528
547
|
taskStateNames = TASKS_STATE_NAMES + ops.getValue("Transformations/AdditionalTaskStates", [])
|
|
529
548
|
resultDict["ParameterNames"] += ["Jobs_" + x for x in taskStateNames]
|
|
@@ -5,7 +5,6 @@ import hashlib
|
|
|
5
5
|
import re
|
|
6
6
|
|
|
7
7
|
from DIRAC import S_OK, S_ERROR, gConfig
|
|
8
|
-
from DIRAC.ConfigurationSystem.Client.Helpers import CSGlobals
|
|
9
8
|
from DIRAC.ConfigurationSystem.Client.PathFinder import getServiceSection
|
|
10
9
|
from DIRAC.MonitoringSystem.private.Plotters.BasePlotter import BasePlotter as myBasePlotter
|
|
11
10
|
from DIRAC.Core.Utilities.ObjectLoader import loadObjects
|
|
@@ -56,7 +55,6 @@ class MainReporter:
|
|
|
56
55
|
:param str setup: DIRAC setup
|
|
57
56
|
"""
|
|
58
57
|
self.__db = db
|
|
59
|
-
self.__setup = CSGlobals.getSetup().lower()
|
|
60
58
|
self.__csSection = getServiceSection("Monitoring/Monitoring")
|
|
61
59
|
self.__plotterList = PlottersList()
|
|
62
60
|
|
|
@@ -75,7 +73,6 @@ class MainReporter:
|
|
|
75
73
|
requestToHash[key] = epoch - epoch % granularity
|
|
76
74
|
md5Hash = hashlib.md5()
|
|
77
75
|
md5Hash.update(repr(requestToHash).encode())
|
|
78
|
-
md5Hash.update(self.__setup.encode())
|
|
79
76
|
return md5Hash.hexdigest()
|
|
80
77
|
|
|
81
78
|
def generate(self, reportRequest):
|
|
@@ -48,13 +48,13 @@ from DIRAC.RequestManagementSystem.private.RequestTask import RequestTask
|
|
|
48
48
|
# # agent name
|
|
49
49
|
AGENT_NAME = "RequestManagement/RequestExecutingAgent"
|
|
50
50
|
# # requests/cycle
|
|
51
|
-
REQUESTSPERCYCLE =
|
|
51
|
+
REQUESTSPERCYCLE = 300
|
|
52
52
|
# # minimal nb of subprocess running
|
|
53
|
-
MINPROCESS =
|
|
53
|
+
MINPROCESS = 50
|
|
54
54
|
# # maximal nb of subprocess executed same time
|
|
55
|
-
MAXPROCESS =
|
|
55
|
+
MAXPROCESS = 50
|
|
56
56
|
# # ProcessPool queue size
|
|
57
|
-
QUEUESIZE =
|
|
57
|
+
QUEUESIZE = 100
|
|
58
58
|
# # file timeout
|
|
59
59
|
FILETIMEOUT = 300
|
|
60
60
|
# # operation timeout
|
|
@@ -62,7 +62,9 @@ OPERATIONTIMEOUT = 300
|
|
|
62
62
|
# # ProcessPool finalization timeout
|
|
63
63
|
POOLTIMEOUT = 900
|
|
64
64
|
# # ProcessPool sleep time
|
|
65
|
-
POOLSLEEP =
|
|
65
|
+
POOLSLEEP = 1
|
|
66
|
+
# # Fetch multiple requests at once from the DB. Otherwise, one by one
|
|
67
|
+
BULKREQUEST = 300
|
|
66
68
|
|
|
67
69
|
|
|
68
70
|
class AgentConfigError(Exception):
|
|
@@ -108,7 +110,7 @@ class RequestExecutingAgent(AgentModule):
|
|
|
108
110
|
self.__poolSleep = POOLSLEEP
|
|
109
111
|
self.__requestClient = None
|
|
110
112
|
# Size of the bulk if use of getRequests. If 0, use getRequest
|
|
111
|
-
self.__bulkRequest =
|
|
113
|
+
self.__bulkRequest = BULKREQUEST
|
|
112
114
|
self.__rmsMonitoring = False
|
|
113
115
|
|
|
114
116
|
def processPool(self):
|
|
@@ -44,19 +44,19 @@ Agents
|
|
|
44
44
|
{
|
|
45
45
|
PollingTime = 60
|
|
46
46
|
# number of Requests to execute per cycle
|
|
47
|
-
RequestsPerCycle =
|
|
47
|
+
RequestsPerCycle = 300
|
|
48
48
|
# minimum number of workers process in the ProcessPool
|
|
49
|
-
MinProcess =
|
|
49
|
+
MinProcess = 50
|
|
50
50
|
# maximum number of workers process in the ProcessPool; recommended to set it to the same value as MinProcess
|
|
51
|
-
MaxProcess =
|
|
51
|
+
MaxProcess = 50
|
|
52
52
|
# queue depth of the ProcessPool
|
|
53
|
-
ProcessPoolQueueSize =
|
|
53
|
+
ProcessPoolQueueSize = 100
|
|
54
54
|
# timeout for the ProcessPool finalization
|
|
55
55
|
ProcessPoolTimeout = 900
|
|
56
56
|
# sleep time before retrying to get a free slot in the ProcessPool
|
|
57
|
-
ProcessPoolSleep =
|
|
57
|
+
ProcessPoolSleep = 1
|
|
58
58
|
# If a positive integer n is given, we fetch n requests at once from the DB. Otherwise, one by one
|
|
59
|
-
BulkRequest =
|
|
59
|
+
BulkRequest = 300
|
|
60
60
|
OperationHandlers
|
|
61
61
|
{
|
|
62
62
|
ForwardDISET
|
|
@@ -121,7 +121,9 @@ class FreeDiskSpaceCommand(Command):
|
|
|
121
121
|
"Site": siteRes["Value"] if siteRes["Value"] else "unassigned",
|
|
122
122
|
}
|
|
123
123
|
|
|
124
|
-
|
|
124
|
+
# There are sometimes small discrepencies which can lead to negative
|
|
125
|
+
# used values.
|
|
126
|
+
results["Used"] = max(0, results["Total"] - results["Free"])
|
|
125
127
|
|
|
126
128
|
for sType in ["Total", "Free", "Used"]:
|
|
127
129
|
spaceTokenAccounting = StorageOccupancy()
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""AREX Computing Element (ARC REST interface)
|
|
2
2
|
|
|
3
3
|
Allows interacting with ARC AREX services via a REST interface.
|
|
4
4
|
|
|
@@ -807,7 +807,23 @@ class AREXComputingElement(ComputingElement):
|
|
|
807
807
|
return S_ERROR(f"Failed decoding the status of the CE")
|
|
808
808
|
|
|
809
809
|
# Look only in the relevant section out of the headache
|
|
810
|
-
|
|
810
|
+
# This "safe_get" function allows to go down the dictionary
|
|
811
|
+
# even if some elements are lists instead of dictionaries
|
|
812
|
+
# and returns None if any element is not found
|
|
813
|
+
# FIXME: this is a temporary measure to be removed after https://github.com/DIRACGrid/DIRAC/issues/8354
|
|
814
|
+
def safe_get(d, *keys):
|
|
815
|
+
for k in keys:
|
|
816
|
+
if isinstance(d, list):
|
|
817
|
+
d = d[0] # assume first element
|
|
818
|
+
d = d.get(k) if isinstance(d, dict) else None
|
|
819
|
+
if d is None:
|
|
820
|
+
break
|
|
821
|
+
return d
|
|
822
|
+
|
|
823
|
+
queueInfo = safe_get(ceData, "Domains", "AdminDomain", "Services", "ComputingService", "ComputingShare")
|
|
824
|
+
if queueInfo is None:
|
|
825
|
+
self.log.error("Failed to extract queue info")
|
|
826
|
+
|
|
811
827
|
if not isinstance(queueInfo, list):
|
|
812
828
|
queueInfo = [queueInfo]
|
|
813
829
|
|
|
@@ -203,9 +203,6 @@ class Condor(object):
|
|
|
203
203
|
resultDict["Jobs"] = []
|
|
204
204
|
for i in range(submittedJobs):
|
|
205
205
|
resultDict["Jobs"].append(".".join([cluster, str(i)]))
|
|
206
|
-
# Executable is transferred afterward
|
|
207
|
-
# Inform the caller that Condor cannot delete it before the end of the execution
|
|
208
|
-
resultDict["ExecutableToKeep"] = executable
|
|
209
206
|
else:
|
|
210
207
|
resultDict["Status"] = status
|
|
211
208
|
resultDict["Message"] = error
|
|
@@ -35,8 +35,10 @@ if __name__ == "__main__":
|
|
|
35
35
|
from urllib.parse import unquote as urlunquote
|
|
36
36
|
|
|
37
37
|
|
|
38
|
-
|
|
39
|
-
|
|
38
|
+
# Read options from JSON file
|
|
39
|
+
optionsFilePath = sys.argv[1]
|
|
40
|
+
with open(optionsFilePath, 'r') as f:
|
|
41
|
+
inputDict = json.load(f)
|
|
40
42
|
|
|
41
43
|
method = inputDict.pop('Method')
|
|
42
44
|
batchSystem = inputDict.pop('BatchSystem')
|
|
@@ -45,9 +47,15 @@ if __name__ == "__main__":
|
|
|
45
47
|
try:
|
|
46
48
|
result = getattr(batch, method)(**inputDict)
|
|
47
49
|
except Exception:
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
50
|
+
# Wrap the traceback in a proper error structure
|
|
51
|
+
result = {
|
|
52
|
+
'Status': -1,
|
|
53
|
+
'Message': 'Exception during batch method execution',
|
|
54
|
+
'Traceback': traceback.format_exc()
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
# Write result to JSON file
|
|
58
|
+
resultFilePath = optionsFilePath.replace('.json', '_result.json')
|
|
59
|
+
with open(resultFilePath, 'w') as f:
|
|
60
|
+
json.dump(result, f)
|
|
53
61
|
"""
|
|
@@ -182,8 +182,6 @@ class LocalComputingElement(ComputingElement):
|
|
|
182
182
|
batchSystemName = self.batchSystem.__class__.__name__.lower()
|
|
183
183
|
jobIDs = ["ssh" + batchSystemName + "://" + self.ceName + "/" + _id for _id in resultSubmit["Jobs"]]
|
|
184
184
|
result = S_OK(jobIDs)
|
|
185
|
-
if "ExecutableToKeep" in resultSubmit:
|
|
186
|
-
result["ExecutableToKeep"] = resultSubmit["ExecutableToKeep"]
|
|
187
185
|
else:
|
|
188
186
|
result = S_ERROR(resultSubmit["Message"])
|
|
189
187
|
|
|
@@ -67,9 +67,10 @@ import json
|
|
|
67
67
|
import os
|
|
68
68
|
import shutil
|
|
69
69
|
import stat
|
|
70
|
+
import tempfile
|
|
70
71
|
import uuid
|
|
71
72
|
from shlex import quote as shlex_quote
|
|
72
|
-
from urllib.parse import
|
|
73
|
+
from urllib.parse import urlparse
|
|
73
74
|
|
|
74
75
|
import pexpect
|
|
75
76
|
|
|
@@ -484,47 +485,69 @@ class SSHComputingElement(ComputingElement):
|
|
|
484
485
|
options["User"] = self.user
|
|
485
486
|
options["Queue"] = self.queue
|
|
486
487
|
|
|
487
|
-
|
|
488
|
-
|
|
488
|
+
localOptionsFile = None
|
|
489
|
+
remoteOptionsFile = None
|
|
490
|
+
localResultFile = None
|
|
491
|
+
remoteResultFile = None
|
|
492
|
+
try:
|
|
493
|
+
# Write options to a local temporary file
|
|
494
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
|
|
495
|
+
json.dump(options, f)
|
|
496
|
+
localOptionsFile = f.name
|
|
497
|
+
|
|
498
|
+
# Upload the options file to the remote host
|
|
499
|
+
remoteOptionsFile = f"{self.sharedArea}/batch_options_{uuid.uuid4().hex}.json"
|
|
500
|
+
result = ssh.scpCall(30, localOptionsFile, remoteOptionsFile)
|
|
501
|
+
if not result["OK"]:
|
|
502
|
+
return result
|
|
489
503
|
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
504
|
+
# Execute the batch command with the options file path
|
|
505
|
+
cmd = (
|
|
506
|
+
f"bash --login -c 'python3 {self.sharedArea}/execute_batch {remoteOptionsFile} || "
|
|
507
|
+
f"python {self.sharedArea}/execute_batch {remoteOptionsFile} || "
|
|
508
|
+
f"python2 {self.sharedArea}/execute_batch {remoteOptionsFile}'"
|
|
509
|
+
)
|
|
494
510
|
|
|
495
|
-
|
|
511
|
+
self.log.verbose(f"CE submission command: {cmd}")
|
|
496
512
|
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
513
|
+
result = ssh.sshCall(120, cmd)
|
|
514
|
+
if not result["OK"]:
|
|
515
|
+
self.log.error(f"{self.ceType} CE job submission failed", result["Message"])
|
|
516
|
+
return result
|
|
501
517
|
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
518
|
+
sshStatus = result["Value"][0]
|
|
519
|
+
if sshStatus != 0:
|
|
520
|
+
sshStdout = result["Value"][1]
|
|
521
|
+
sshStderr = result["Value"][2]
|
|
522
|
+
return S_ERROR(f"CE job submission command failed with status {sshStatus}: {sshStdout} {sshStderr}")
|
|
523
|
+
|
|
524
|
+
# The result should be written to a JSON file by execute_batch
|
|
525
|
+
# Compute the expected result file path
|
|
526
|
+
remoteResultFile = remoteOptionsFile.replace(".json", "_result.json")
|
|
527
|
+
|
|
528
|
+
# Try to download the result file
|
|
529
|
+
with tempfile.NamedTemporaryFile(mode="r", suffix=".json", delete=False) as f:
|
|
530
|
+
localResultFile = f.name
|
|
531
|
+
|
|
532
|
+
result = ssh.scpCall(30, localResultFile, remoteResultFile, upload=False)
|
|
533
|
+
if not result["OK"]:
|
|
534
|
+
return result
|
|
535
|
+
|
|
536
|
+
# Read the result from the downloaded file
|
|
537
|
+
with open(localResultFile) as f:
|
|
538
|
+
result = json.load(f)
|
|
539
|
+
return S_OK(result)
|
|
540
|
+
finally:
|
|
541
|
+
# Clean up local temporary file
|
|
542
|
+
if localOptionsFile and os.path.exists(localOptionsFile):
|
|
543
|
+
os.remove(localOptionsFile)
|
|
544
|
+
if localResultFile and os.path.exists(localResultFile):
|
|
545
|
+
os.remove(localResultFile)
|
|
546
|
+
# Clean up remote temporary files
|
|
547
|
+
if remoteOptionsFile:
|
|
548
|
+
ssh.sshCall(30, f"rm -f {remoteOptionsFile}")
|
|
549
|
+
if remoteResultFile:
|
|
550
|
+
ssh.sshCall(30, f"rm -f {remoteResultFile}")
|
|
528
551
|
|
|
529
552
|
def submitJob(self, executableFile, proxy, numberOfJobs=1):
|
|
530
553
|
# self.log.verbose( "Executable file path: %s" % executableFile )
|
|
@@ -26,3 +26,16 @@ class CheckInIdProvider(OAuth2IdProvider):
|
|
|
26
26
|
|
|
27
27
|
idPScope = f"eduperson_entitlement?value=urn:mace:egi.eu:group:{vo}:role={groupElements[1]}#aai.egi.eu"
|
|
28
28
|
return scope_to_list(idPScope)
|
|
29
|
+
|
|
30
|
+
def fetchToken(self, **kwargs):
|
|
31
|
+
"""Fetch token
|
|
32
|
+
|
|
33
|
+
:param kwargs:
|
|
34
|
+
:return: dict
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
if "audience" in kwargs:
|
|
38
|
+
kwargs["resource"] = kwargs["audience"]
|
|
39
|
+
kwargs.pop("audience")
|
|
40
|
+
|
|
41
|
+
return super().fetchToken(**kwargs)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
"""
|
|
2
|
-
|
|
1
|
+
"""The Identity Provider Factory instantiates IdProvider objects
|
|
2
|
+
according to their configuration
|
|
3
3
|
"""
|
|
4
|
+
|
|
4
5
|
import jwt
|
|
5
6
|
|
|
6
7
|
from DIRAC import S_OK, S_ERROR, gLogger, gConfig
|
|
@@ -40,11 +41,12 @@ class IdProviderFactory:
|
|
|
40
41
|
return result
|
|
41
42
|
return self.getIdProvider(result["Value"])
|
|
42
43
|
|
|
43
|
-
def getIdProvider(self, name, **kwargs):
|
|
44
|
+
def getIdProvider(self, name, client_name_prefix="", **kwargs):
|
|
44
45
|
"""This method returns a IdProvider instance corresponding to the supplied
|
|
45
46
|
name.
|
|
46
47
|
|
|
47
48
|
:param str name: the name of the Identity Provider client
|
|
49
|
+
:param str client_name_prefix: name of the client of the IdP
|
|
48
50
|
|
|
49
51
|
:return: S_OK(IdProvider)/S_ERROR()
|
|
50
52
|
"""
|
|
@@ -68,8 +70,16 @@ class IdProviderFactory:
|
|
|
68
70
|
if not result["OK"]:
|
|
69
71
|
self.log.error("Failed to read configuration", f"{name}: {result['Message']}")
|
|
70
72
|
return result
|
|
73
|
+
|
|
71
74
|
pDict = result["Value"]
|
|
72
75
|
|
|
76
|
+
if client_name_prefix:
|
|
77
|
+
client_name_prefix = client_name_prefix + "_"
|
|
78
|
+
if f"{client_name_prefix}client_id" in pDict:
|
|
79
|
+
pDict["client_id"] = pDict[f"{client_name_prefix}client_id"]
|
|
80
|
+
if f"{client_name_prefix}client_secret" in pDict:
|
|
81
|
+
pDict["client_secret"] = pDict[f"{client_name_prefix}client_secret"]
|
|
82
|
+
|
|
73
83
|
pDict.update(kwargs)
|
|
74
84
|
pDict["ProviderName"] = name
|
|
75
85
|
|
|
@@ -63,6 +63,12 @@ Resources
|
|
|
63
63
|
client_secret = IdP_client_secret
|
|
64
64
|
scope = openid+profile+offline_access+eduperson_entitlement
|
|
65
65
|
}
|
|
66
|
+
SomeIdP3.partial
|
|
67
|
+
{
|
|
68
|
+
ProviderType = OAuth2
|
|
69
|
+
issuer = https://and-another-idp.url/
|
|
70
|
+
scope = openid+profile+offline_access+eduperson_entitlement
|
|
71
|
+
}
|
|
66
72
|
}
|
|
67
73
|
}
|
|
68
74
|
"""
|
|
@@ -77,6 +83,7 @@ idps = IdProviderFactory()
|
|
|
77
83
|
("SomeIdP1.2", {"OK": True}, "https://idp.url/", "IdP_client_id2", "IdP_client_secret"),
|
|
78
84
|
("SomeIdP2", {"OK": True}, "https://another-idp.url/", "IdP_client_id1", "IdP_client_secret"),
|
|
79
85
|
("SomeIdP3", {"OK": True}, "https://and-another-idp.url/", "IdP_client_id3", "IdP_client_secret"),
|
|
86
|
+
("SomeIdP3.partial", {"OK": True}, "https://and-another-idp.url/", None, None),
|
|
80
87
|
# Try to get an unknown DIRAC client
|
|
81
88
|
("DIRACUnknown", {"OK": False, "Message": "DIRACUnknown does not exist"}, None, None, None),
|
|
82
89
|
],
|
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
"""
|
|
2
|
-
|
|
3
|
-
|
|
2
|
+
This is the File StorageClass, only meant to be used localy
|
|
3
|
+
"""
|
|
4
|
+
|
|
4
5
|
import os
|
|
5
6
|
import shutil
|
|
6
7
|
import errno
|
|
7
8
|
import stat
|
|
9
|
+
import struct
|
|
10
|
+
import time
|
|
8
11
|
|
|
9
12
|
from DIRAC import gLogger, S_OK, S_ERROR
|
|
10
13
|
from DIRAC.Resources.Storage.Utilities import checkArgumentFormat
|
|
@@ -12,6 +15,116 @@ from DIRAC.Resources.Storage.StorageBase import StorageBase
|
|
|
12
15
|
from DIRAC.Core.Utilities.Adler import fileAdler
|
|
13
16
|
|
|
14
17
|
|
|
18
|
+
def set_xattr_adler32(path, checksum):
|
|
19
|
+
"""
|
|
20
|
+
Set the adler32 checksum extended attribute on a file.
|
|
21
|
+
|
|
22
|
+
This is needed for case where you write the data on a locally mounted
|
|
23
|
+
file system, but then want to access it from outside via xroot (like the HLT farm)
|
|
24
|
+
|
|
25
|
+
Hopefully, this whole function will be part of xroot at some point
|
|
26
|
+
https://github.com/xrootd/xrootd/pull/2650
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
This function replicates the exact behavior of the C++ function fSetXattrAdler32
|
|
30
|
+
|
|
31
|
+
It writes the checksum in XrdCksData binary format with the following structure:
|
|
32
|
+
- Name[16]: Algorithm name ("adler32"), null-padded
|
|
33
|
+
- fmTime (8): File modification time (network byte order, int64)
|
|
34
|
+
- csTime (4): Time delta from mtime (network byte order, int32)
|
|
35
|
+
- Rsvd1 (2): Reserved (int16)
|
|
36
|
+
- Rsvd2 (1): Reserved (uint8)
|
|
37
|
+
- Length (1): Checksum length in bytes (uint8)
|
|
38
|
+
- Value[64]: Binary checksum value (4 bytes for adler32)
|
|
39
|
+
|
|
40
|
+
Total structure size: 96 bytes
|
|
41
|
+
|
|
42
|
+
Parameters
|
|
43
|
+
----------
|
|
44
|
+
path : str
|
|
45
|
+
Path to the file (must be a regular file on local filesystem)
|
|
46
|
+
checksum : str
|
|
47
|
+
8-character hexadecimal adler32 checksum (e.g., "deadbeef")
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
Notes
|
|
51
|
+
-----
|
|
52
|
+
- The attribute is stored as "user.XrdCks.adler32"
|
|
53
|
+
|
|
54
|
+
"""
|
|
55
|
+
# Validate checksum format
|
|
56
|
+
if not isinstance(checksum, str) or len(checksum) != 8:
|
|
57
|
+
raise ValueError("Checksum must be exactly 8 characters")
|
|
58
|
+
|
|
59
|
+
# Validate it's valid hex
|
|
60
|
+
try:
|
|
61
|
+
int(checksum, 16)
|
|
62
|
+
except ValueError:
|
|
63
|
+
raise ValueError(f"Checksum must be valid hexadecimal: {checksum}")
|
|
64
|
+
|
|
65
|
+
# Check file exists and is regular
|
|
66
|
+
|
|
67
|
+
st = os.stat(path)
|
|
68
|
+
|
|
69
|
+
# Import xattr module
|
|
70
|
+
try:
|
|
71
|
+
import xattr
|
|
72
|
+
except ImportError:
|
|
73
|
+
raise ImportError("The 'xattr' module is required. Install it with: pip install xattr")
|
|
74
|
+
|
|
75
|
+
# Build XrdCksData structure (96 bytes total)
|
|
76
|
+
# Reference: src/XrdCks/XrdCksData.hh
|
|
77
|
+
|
|
78
|
+
# 1. Name[16] - Algorithm name, null-padded
|
|
79
|
+
name = b"adler32"
|
|
80
|
+
name_field = name.ljust(16, b"\x00")
|
|
81
|
+
|
|
82
|
+
# 2. fmTime (8 bytes) - File modification time (network byte order = big-endian)
|
|
83
|
+
fm_time = int(st.st_mtime)
|
|
84
|
+
fm_time_field = struct.pack(">q", fm_time) # signed 64-bit big-endian
|
|
85
|
+
|
|
86
|
+
# 3. csTime (4 bytes) - Delta from mtime to now (network byte order)
|
|
87
|
+
cs_time = int(time.time()) - fm_time
|
|
88
|
+
cs_time_field = struct.pack(">i", cs_time) # signed 32-bit big-endian
|
|
89
|
+
|
|
90
|
+
# 4. Rsvd1 (2 bytes) - Reserved, set to 0
|
|
91
|
+
rsvd1_field = struct.pack(">h", 0) # signed 16-bit big-endian
|
|
92
|
+
|
|
93
|
+
# 5. Rsvd2 (1 byte) - Reserved, set to 0
|
|
94
|
+
rsvd2_field = struct.pack("B", 0) # unsigned 8-bit
|
|
95
|
+
|
|
96
|
+
# 6. Length (1 byte) - Checksum length in bytes
|
|
97
|
+
# Adler32 is 4 bytes (8 hex chars / 2)
|
|
98
|
+
length_field = struct.pack("B", 4) # unsigned 8-bit
|
|
99
|
+
|
|
100
|
+
# 7. Value[64] - Binary checksum value
|
|
101
|
+
# Convert hex string to 4 bytes, pad rest with zeros
|
|
102
|
+
checksum_bytes = bytes.fromhex(checksum)
|
|
103
|
+
value_field = checksum_bytes + b"\x00" * (64 - len(checksum_bytes))
|
|
104
|
+
|
|
105
|
+
# Assemble complete structure
|
|
106
|
+
xrd_cks_data = (
|
|
107
|
+
name_field # 16 bytes
|
|
108
|
+
+ fm_time_field # 8 bytes
|
|
109
|
+
+ cs_time_field # 4 bytes
|
|
110
|
+
+ rsvd1_field # 2 bytes
|
|
111
|
+
+ rsvd2_field # 1 byte
|
|
112
|
+
+ length_field # 1 byte
|
|
113
|
+
+ value_field # 64 bytes
|
|
114
|
+
) # Total: 96 bytes
|
|
115
|
+
|
|
116
|
+
assert len(xrd_cks_data) == 96, f"Structure size mismatch: {len(xrd_cks_data)}"
|
|
117
|
+
|
|
118
|
+
# Set the extended attribute
|
|
119
|
+
# XRootD uses "XrdCks.adler32" which becomes "user.XrdCks.adler32" on Linux
|
|
120
|
+
attr_name = "user.XrdCks.adler32"
|
|
121
|
+
|
|
122
|
+
try:
|
|
123
|
+
xattr.setxattr(path, attr_name, xrd_cks_data)
|
|
124
|
+
except OSError as e:
|
|
125
|
+
raise OSError(f"Failed to set extended attribute on {path}: {e}") from e
|
|
126
|
+
|
|
127
|
+
|
|
15
128
|
class FileStorage(StorageBase):
|
|
16
129
|
""".. class:: FileStorage
|
|
17
130
|
|
|
@@ -165,6 +278,12 @@ class FileStorage(StorageBase):
|
|
|
165
278
|
os.makedirs(dirname)
|
|
166
279
|
shutil.copy2(src_file, dest_url)
|
|
167
280
|
fileSize = os.path.getsize(dest_url)
|
|
281
|
+
try:
|
|
282
|
+
src_cks = fileAdler(src_file)
|
|
283
|
+
set_xattr_adler32(dest_url, src_cks)
|
|
284
|
+
except Exception as e:
|
|
285
|
+
gLogger.warn("Could not set checksum", f"{e!r}")
|
|
286
|
+
|
|
168
287
|
if sourceSize and (sourceSize != fileSize):
|
|
169
288
|
try:
|
|
170
289
|
os.unlink(dest_url)
|