PyPI - DIRAC - Versions diffs - 9.0.14__py3-none-any.whl → 9.0.16__py3-none-any.whl - Mend

DIRAC 9.0.14py3-none-any.whl → 9.0.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

DIRAC/WorkloadManagementSystem/DB/TaskQueueDB.py CHANGED Viewed

@@ -1,5 +1,5 @@
-""" TaskQueueDB class is a front-end to the task queues db
-"""
+"""TaskQueueDB class is a front-end to the task queues db"""
 import random
 import string
 from collections import defaultdict
@@ -22,12 +22,13 @@ TQ_MIN_SHARE = 0.001
 # For checks at insertion time, and not only
 singleValueDefFields = ("Owner", "OwnerGroup", "CPUTime")
 multiValueDefFields = ("Sites", "GridCEs", "BannedSites", "Platforms", "JobTypes", "Tags")
+rangeValueDefFields = ("MinRAM", "MaxRAM")
 # Used for matching
 multiValueMatchFields = ("GridCE", "Site", "Platform", "JobType", "Tag")
 bannedJobMatchFields = ("Site",)
 mandatoryMatchFields = ("CPUTime",)
-priorityIgnoredFields = ("Sites", "BannedSites")
+priorityIgnoredFields = ("Sites", "BannedSites", "MinRAM", "MaxRAM")
 def _lowerAndRemovePunctuation(s):
@@ -129,6 +130,16 @@ class TaskQueueDB(DB):
             "ForeignKeys": {"TQId": "tq_TaskQueues.TQId"},
         }
+        self.__tablesDesc["tq_RAM_requirements"] = {
+            "Fields": {
+                "TQId": "INTEGER(11) UNSIGNED NOT NULL",
+                "MinRAM": "INTEGER UNSIGNED NOT NULL DEFAULT 0",
+                "MaxRAM": "INTEGER UNSIGNED NOT NULL DEFAULT 0",
+            },
+            "PrimaryKey": "TQId",
+            "ForeignKeys": {"TQId": "tq_TaskQueues.TQId"},
+        }
         for multiField in multiValueDefFields:
             tableName = f"tq_TQTo{multiField}"
             self.__tablesDesc[tableName] = {
@@ -206,6 +217,20 @@ class TaskQueueDB(DB):
                 return result
             tqDefDict[field] = result["Value"]
+        # Check range value fields (RAM requirements)
+        for field in rangeValueDefFields:
+            if field not in tqDefDict:
+                continue
+            if not isinstance(tqDefDict[field], int):
+                return S_ERROR(f"Range value field {field} value type is not valid: {type(tqDefDict[field])}")
+            if tqDefDict[field] < 0:
+                return S_ERROR(f"Range value field {field} must be non-negative: {tqDefDict[field]}")
+        # Validate that MinRAM <= MaxRAM if both are specified
+        if "MinRAM" in tqDefDict and "MaxRAM" in tqDefDict:
+            if tqDefDict["MaxRAM"] > 0 and tqDefDict["MinRAM"] > tqDefDict["MaxRAM"]:
+                return S_ERROR(f"MinRAM ({tqDefDict['MinRAM']}) cannot be greater than MaxRAM ({tqDefDict['MaxRAM']})")
         return S_OK(tqDefDict)
     def _checkMatchDefinition(self, tqMatchDict):
@@ -251,6 +276,13 @@ class TaskQueueDB(DB):
                         return S_ERROR(f"Match definition field {field} failed : {result['Message']}")
                     tqMatchDict[field] = result["Value"]
+        # Check range value fields (RAM requirements for matching)
+        if "MaxRAM" in tqMatchDict:
+            result = travelAndCheckType(tqMatchDict["MaxRAM"], int, escapeValues=False)
+            if not result["OK"]:
+                return S_ERROR(f"Match definition field RAM failed : {result['Message']}")
+            tqMatchDict["MaxRAM"] = result["Value"]
         return S_OK(tqMatchDict)
     def __createTaskQueue(self, tqDefDict, priority=1, connObj=False):
@@ -303,6 +335,20 @@ class TaskQueueDB(DB):
                 self.log.error("Failed to insert condition", f"{field} : {result['Message']}")
                 self.cleanOrphanedTaskQueues(connObj=connObj)
                 return S_ERROR(f"Can't insert values {values} for field {field}: {result['Message']}")
+        # Insert RAM requirements if specified and not both zero
+        if "MinRAM" in tqDefDict or "MaxRAM" in tqDefDict:
+            minRAM = tqDefDict.get("MinRAM", 0)
+            maxRAM = tqDefDict.get("MaxRAM", 0)
+            # Only insert if at least one value is non-zero (optimization: avoid unnecessary rows)
+            if minRAM > 0 or maxRAM > 0:
+                cmd = f"INSERT INTO `tq_RAM_requirements` (TQId, MinRAM, MaxRAM) VALUES ({tqId}, {minRAM}, {maxRAM})"
+                result = self._update(cmd, conn=connObj)
+                if not result["OK"]:
+                    self.log.error("Failed to insert RAM requirements", result["Message"])
+                    self.cleanOrphanedTaskQueues(connObj=connObj)
+                    return S_ERROR(f"Can't insert RAM requirements: {result['Message']}")
         self.log.info("Created TQ", tqId)
         return S_OK(tqId)
@@ -327,6 +373,13 @@ class TaskQueueDB(DB):
             if not result["OK"]:
                 return result
+        # Delete RAM requirements for orphaned TQs
+        result = self._update(
+            f"DELETE FROM `tq_RAM_requirements` WHERE TQId in ( {','.join(orphanedTQs)} )", conn=connObj
+        )
+        if not result["OK"]:
+            return result
         result = self._update(f"DELETE FROM `tq_TaskQueues` WHERE TQId in ( {','.join(orphanedTQs)} )", conn=connObj)
         if not result["OK"]:
             return result
@@ -473,6 +526,26 @@ class TaskQueueDB(DB):
                 sqlCondList.append(f"{numValues} = ({secondQuery} {grouping})")
             else:
                 sqlCondList.append(f"`tq_TaskQueues`.TQId not in ( SELECT DISTINCT {tableName}.TQId from {tableName} )")
+        # Handle RAM requirements matching
+        hasRAMRequirements = "MinRAM" in tqDefDict or "MaxRAM" in tqDefDict
+        if hasRAMRequirements:
+            minRAM = tqDefDict.get("MinRAM", 0)
+            maxRAM = tqDefDict.get("MaxRAM", 0)
+            # Only match TQs with the same RAM requirements if at least one is non-zero
+            if minRAM > 0 or maxRAM > 0:
+                # Match TQs that have the exact same RAM requirements
+                sqlCondList.append(
+                    f"`tq_TaskQueues`.TQId IN ( SELECT TQId FROM `tq_RAM_requirements` "
+                    f"WHERE MinRAM = {minRAM} AND MaxRAM = {maxRAM} )"
+                )
+            else:
+                # Both are 0, so match TQs with no RAM requirements row
+                sqlCondList.append("`tq_TaskQueues`.TQId NOT IN ( SELECT DISTINCT TQId FROM `tq_RAM_requirements` )")
+        else:
+            # Match TQs that have no RAM requirements
+            sqlCondList.append("`tq_TaskQueues`.TQId NOT IN ( SELECT DISTINCT TQId FROM `tq_RAM_requirements` )")
         # END MAGIC: That was easy ;)
         return S_OK(" AND ".join(sqlCondList))
@@ -722,6 +795,19 @@ WHERE `tq_Jobs`.TQId = %s ORDER BY RAND() / `tq_Jobs`.RealPriority ASC LIMIT 1"
         if "CPUTime" in tqMatchDict:
             sqlCondList.append(self.__generateSQLSubCond("tq.%s <= %%s" % "CPUTime", tqMatchDict["CPUTime"]))
+        # RAM matching logic
+        if "MaxRAM" in tqMatchDict:
+            ram = tqMatchDict["MaxRAM"]
+            # Join with tq_RAM_requirements table
+            sqlTables["tq_RAM_requirements"] = "ram_req"
+            # Match if:
+            # 1. No RAM requirement exists for this TQ (LEFT JOIN will give NULL)
+            # 2. OR the resource has at least MinRAM
+            # Note: MinRAM is used for matching, MaxRAM is informational for post-match scheduling
+            #       A job requiring MinRAM=2GB can run on any machine with 2GB or more
+            ramCond = f"( ram_req.TQId IS NULL OR {ram} >= ram_req.MinRAM )"
+            sqlCondList.append(ramCond)
         tag_fv = []
         # Match multi value fields
@@ -844,10 +930,14 @@ WHERE `tq_Jobs`.TQId = %s ORDER BY RAND() / `tq_Jobs`.RealPriority ASC LIMIT 1"
         if negativeCond:
             sqlCondList.append(self.__generateNotSQL(negativeCond))
-        # Generate the final query string
-        tqSqlCmd = "SELECT tq.TQId, tq.Owner, tq.OwnerGroup FROM `tq_TaskQueues` tq WHERE %s" % (
-            " AND ".join(sqlCondList)
-        )
+        # Generate the final query string with proper JOINs
+        fromClause = "`tq_TaskQueues` tq"
+        # Add LEFT JOIN for RAM requirements if needed
+        if "tq_RAM_requirements" in sqlTables:
+            fromClause += " LEFT JOIN `tq_RAM_requirements` ram_req ON tq.TQId = ram_req.TQId"
+        tqSqlCmd = f"SELECT tq.TQId, tq.Owner, tq.OwnerGroup FROM {fromClause} WHERE {' AND '.join(sqlCondList)}"
         # Apply priorities
         tqSqlCmd = f"{tqSqlCmd} ORDER BY RAND() / tq.Priority ASC"
@@ -994,6 +1084,12 @@ WHERE j.JobId = %s AND t.TQId = j.TQId"
                 retVal = self._update(f"DELETE FROM `tq_TQTo{mvField}` WHERE TQId = {tqId}", conn=connObj)
                 if not retVal["OK"]:
                     return retVal
+            # Delete RAM requirements if they exist
+            retVal = self._update(f"DELETE FROM `tq_RAM_requirements` WHERE TQId = {tqId}", conn=connObj)
+            if not retVal["OK"]:
+                return retVal
             retVal = self._update(f"DELETE FROM `tq_TaskQueues` WHERE TQId = {tqId}", conn=connObj)
             if not retVal["OK"]:
                 return retVal
@@ -1065,6 +1161,40 @@ WHERE j.JobId = %s AND t.TQId = j.TQId"
                     if field not in tqData[tqId]:
                         tqData[tqId][field] = []
                     tqData[tqId][field].append(value)
+        # Retrieve RAM requirements (if table exists)
+        # Note: The table should be auto-created by __initializeDB, but we check for safety
+        sqlCmd = "SELECT TQId, MinRAM, MaxRAM FROM `tq_RAM_requirements`"
+        if tqIdList is not None:
+            if tqIdList:
+                # Only retrieve RAM requirements for specific TQIds
+                sqlCmd += f" WHERE TQId IN ( {', '.join([str(id_) for id_ in tqIdList])} )"
+            # else: empty list was already handled earlier with fast-track return
+        retVal = self._query(sqlCmd)
+        if not retVal["OK"]:
+            # If table doesn't exist (e.g., old installation), log a warning but continue
+            # This provides backward compatibility
+            if "doesn't exist" in retVal["Message"] or "Table" in retVal["Message"]:
+                self.log.warn("RAM requirements table not found, skipping RAM data retrieval", retVal["Message"])
+            else:
+                self.log.error("Can't retrieve RAM requirements", retVal["Message"])
+                return retVal
+        else:
+            for record in retVal["Value"]:
+                tqId = record[0]
+                minRAM = record[1]
+                maxRAM = record[2]
+                if tqId not in tqData:
+                    if tqIdList is None or tqId in tqIdList:
+                        self.log.verbose(
+                            "Task Queue has RAM requirements but does not exist: triggering a cleaning",
+                            f"TQID: {tqId}",
+                        )
+                        tqNeedCleaning = True
+                else:
+                    tqData[tqId]["MinRAM"] = minRAM
+                    tqData[tqId]["MaxRAM"] = maxRAM
         if tqNeedCleaning:
             self.cleanOrphanedTaskQueues()
         return S_OK(tqData)

DIRAC/WorkloadManagementSystem/Executor/JobScheduling.py CHANGED Viewed

@@ -1,12 +1,12 @@
-"""   The Job Scheduling Executor takes the information gained from all previous
-      optimizers and makes a scheduling decision for the jobs.
+"""The Job Scheduling Executor takes the information gained from all previous
+optimizers and makes a scheduling decision for the jobs.
-      Subsequent to this jobs are added into a Task Queue and pilot agents can be submitted.
+Subsequent to this jobs are added into a Task Queue and pilot agents can be submitted.
-      All issues preventing the successful resolution of a site candidate are discovered
-      here where all information is available.
+All issues preventing the successful resolution of a site candidate are discovered
+here where all information is available.
-      This Executor will fail affected jobs meaningfully.
+This Executor will fail affected jobs meaningfully.
 """
 import random
@@ -249,7 +249,7 @@ class JobScheduling(OptimizerExecutor):
         # Get stageSites[0] because it has already been randomized and it's as good as any in stageSites
         stageSite = stageSites[0]
-        self.jobLog.verbose(" Staging site will be", stageSite)
+        self.jobLog.verbose("Staging site will be", stageSite)
         stageData = idSites[stageSite]
         # Set as if everything has already been staged
         stageData["disk"] += stageData["tape"]
@@ -351,12 +351,6 @@ class JobScheduling(OptimizerExecutor):
                 tagList.append("WholeNode")
                 tagList.append("MultiProcessor")
-        # sorting out the RAM (this should be probably coded ~same as number of processors)
-        if "MaxRAM" in jobManifest:
-            maxRAM = jobManifest.getOption("MaxRAM", 0)
-            if maxRAM:
-                tagList.append("%dGB" % maxRAM)
         # other tags? Just add them
         if "Tags" in jobManifest:
             tagList.extend(jobManifest.getOption("Tags", []))
@@ -391,7 +385,7 @@ class JobScheduling(OptimizerExecutor):
         # Job multivalue requirement keys are specified as singles in the job descriptions
         # but for backward compatibility can be also plurals
-        for key in ("JobType", "GridRequiredCEs", "GridCE", "Tags"):
+        for key in ("JobType", "GridRequiredCEs", "GridCE", "MinRAM", "MaxRAM", "Tags"):
             reqKey = key
             if key == "JobType":
                 reqKey = "JobTypes"

DIRAC/WorkloadManagementSystem/Executor/test/Test_Executor.py CHANGED Viewed

@@ -1,5 +1,5 @@
-""" pytest(s) for Executors
-"""
+"""pytest(s) for Executors"""
 # pylint: disable=protected-access, missing-docstring
 from unittest.mock import MagicMock
@@ -54,9 +54,7 @@ def test__applySiteFilter(sites, banned, expected):
         ({}, []),
         ({"Tag": "bof"}, ["bof"]),
         ({"Tags": "bof, bif"}, ["bof", "bif"]),
-        ({"MaxRAM": 2}, ["2GB"]),
-        ({"Tags": "bof, bif", "MaxRAM": 2}, ["bof", "bif", "2GB"]),
-        ({"WholeNode": "yes", "MaxRAM": 2}, ["WholeNode", "MultiProcessor", "2GB"]),
+        ({"WholeNode": "yes"}, ["WholeNode", "MultiProcessor"]),
         ({"NumberOfProcessors": 1}, []),
         ({"NumberOfProcessors": 4}, ["MultiProcessor", "4Processors"]),
         ({"NumberOfProcessors": 4, "MinNumberOfProcessors": 2}, ["MultiProcessor", "4Processors"]),

DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapper.py CHANGED Viewed

@@ -546,7 +546,7 @@ class JobWrapper:
             self.__report(status=JobStatus.FAILED, minorStatus=JobMinorStatus.APP_THREAD_FAILED, sendFlag=True)
             applicationErrorStatus = "None reported"
             if payloadStatus:
-                applicationErrorStatus = payloadStatus
+                applicationErrorStatus = str(payloadStatus)
             self.__setJobParam("ApplicationError", applicationErrorStatus, sendFlag=True)
         # This might happen if process() and postProcess() are called on different machines
@@ -1544,7 +1544,7 @@ class JobWrapper:
     #############################################################################
     def __setJobParam(self, name, value, sendFlag=False):
         """Wraps around setJobParameter of JobReport client"""
-        jobParam = self.jobReport.setJobParameter(str(name), str(value), sendFlag)
+        jobParam = self.jobReport.setJobParameter(str(name), value, sendFlag)
         if not jobParam["OK"]:
             self.log.warn("Failed setting job parameter", jobParam["Message"])
         if self.jobID:

DIRAC/WorkloadManagementSystem/JobWrapper/test/Test_JobWrapper.py CHANGED Viewed

@@ -565,7 +565,7 @@ def test_postProcess_executor_failed_status_defined(setup_job_wrapper, mocker, m
     assert result["OK"]
     assert report_args[-1]["status"] == JobStatus.COMPLETING
     assert report_args[-1]["minorStatus"] == JobMinorStatus.APP_ERRORS
-    assert set_param_args[-3][0][1] == 126
+    assert set_param_args[-3][0][1] == "126"
 def test_postProcess_subprocess_not_complete(setup_job_wrapper, mocker, mock_report_and_set_param):

DIRAC/WorkloadManagementSystem/Service/JobManagerHandler.py CHANGED Viewed

@@ -354,7 +354,13 @@ class JobManagerHandlerMixin:
         validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights(
             jobList, RIGHT_RESCHEDULE
         )
-        res = rescheduleJobs(validJobList, source="JobManager")
+        res = rescheduleJobs(
+            validJobList,
+            source="JobManager",
+            jobDB=self.jobDB,
+            taskQueueDB=self.taskQueueDB,
+            jobLoggingDB=self.jobLoggingDB,
+        )
         if not res["OK"]:
             self.log.error(res["Message"])

DIRAC/WorkloadManagementSystem/Utilities/JobParameters.py CHANGED Viewed

@@ -10,7 +10,7 @@ def getMemoryFromProc():
     meminfo = {i.split()[0].rstrip(":"): int(i.split()[1]) for i in open("/proc/meminfo").readlines()}
     maxRAM = meminfo["MemTotal"]
     if maxRAM:
-        return int(maxRAM / 1024)
+        return int(maxRAM / 1024)  # from KB to MB
 def getNumberOfProcessors(siteName=None, gridCE=None, queue=None):
@@ -57,7 +57,7 @@ def getNumberOfProcessors(siteName=None, gridCE=None, queue=None):
             return numberOfProcessors
     # 3) looks in CS for tags
-    gLogger.info(f"Getting tagsfor {siteName}: {gridCE}: {queue}")
+    gLogger.info(f"Getting tags for {siteName}: {gridCE}: {queue}")
     # Tags of the CE
     tags = fromChar(
         gConfig.getValue(f"/Resources/Sites/{siteName.split('.')[0]}/{siteName}/CEs/{gridCE}/Tag", "")
@@ -201,3 +201,82 @@ def getJobParameters(jobIDs: list[int], parName: str | None, vo: str = "") -> di
         if jobID not in final:
             final[jobID] = parameters[jobID]
     return S_OK(final)
+def getAvailableRAM(siteName=None, gridCE=None, queue=None):
+    """Gets the available RAM on a certain CE/queue/node (what the pilot administers)
+    The siteName/gridCE/queue parameters are normally not necessary.
+    Tries to find it in this order:
+    1) from the /Resources/Computing/CEDefaults/MaxRAM (which is what the pilot might fill up)
+    2) if not present looks in CS for "MemoryLimitMB" Queue or CE or site option
+    3) if not present but there's WholeNode tag, look what the WN provides using _getMemoryFromProc()
+    4) return 0
+    """
+    # 1) from /Resources/Computing/CEDefaults/MaxRAM
+    gLogger.info("Getting MaxRAM from /Resources/Computing/CEDefaults/MaxRAM")
+    availableRAM = gConfig.getValue("/Resources/Computing/CEDefaults/MaxRAM", None)
+    if availableRAM:
+        return availableRAM
+    # 2) looks in CS for "MaxRAM" Queue or CE or site option
+    if not siteName:
+        siteName = gConfig.getValue("/LocalSite/Site", "")
+    if not gridCE:
+        gridCE = gConfig.getValue("/LocalSite/GridCE", "")
+    if not queue:
+        queue = gConfig.getValue("/LocalSite/CEQueue", "")
+    if not (siteName and gridCE and queue):
+        gLogger.warn("Could not find AvailableRAM: missing siteName or gridCE or queue. Returning 0")
+        return 0
+    grid = siteName.split(".")[0]
+    csPaths = [
+        f"/Resources/Sites/{grid}/{siteName}/CEs/{gridCE}/Queues/{queue}/MemoryLimitMB",
+        f"/Resources/Sites/{grid}/{siteName}/CEs/{gridCE}/MemoryLimitMB",
+        f"/Resources/Sites/{grid}/{siteName}/MemoryLimitMB",
+    ]
+    for csPath in csPaths:
+        gLogger.info("Looking in", csPath)
+        availableRAM = gConfig.getValue(csPath, None)
+        if availableRAM:
+            return int(availableRAM)
+    # 3) checks if 'WholeNode' is one of the used tags
+    # Tags of the CE
+    tags = fromChar(
+        gConfig.getValue(f"/Resources/Sites/{siteName.split('.')[0]}/{siteName}/CEs/{gridCE}/Tag", "")
+    ) + fromChar(gConfig.getValue(f"/Resources/Sites/{siteName.split('.')[0]}/{siteName}/Cloud/{gridCE}/Tag", ""))
+    # Tags of the Queue
+    tags += fromChar(
+        gConfig.getValue(f"/Resources/Sites/{siteName.split('.')[0]}/{siteName}/CEs/{gridCE}/Queues/{queue}/Tag", "")
+    ) + fromChar(
+        gConfig.getValue(f"/Resources/Sites/{siteName.split('.')[0]}/{siteName}/Cloud/{gridCE}/VMTypes/{queue}/Tag", "")
+    )
+    if "WholeNode" in tags:
+        gLogger.info("Found WholeNode tag, using getMemoryFromProc()")
+        return getMemoryFromProc()
+    # 4) return 0
+    gLogger.info("RAM limits could not be found in CS, and WholeNode tag not found")
+    return 0
+def getRAMForJob(jobID):
+    """Gets the RAM allowed for the job.
+    This can be used to communicate to your job payload the RAM it's allowed to use,
+    so this function should be called from your extension.
+    If the JobAgent is using "InProcess" CE (which is the default),
+    then what's returned will basically be the same of what's returned by the getAvailableRAM() function above
+    """
+    # from /Resources/Computing/JobLimits/jobID/MaxRAM (set by PoolComputingElement)
+    ram = gConfig.getValue(f"Resources/Computing/JobLimits/{jobID}/MaxRAM")
+    if ram:
+        return int(ram)
+    return getAvailableRAM()

DIRAC/WorkloadManagementSystem/Utilities/PilotCStoJSONSynchronizer.py CHANGED Viewed

@@ -208,12 +208,13 @@ class PilotCStoJSONSynchronizer:
         if defaultSetup:
             pilotDict["DefaultSetup"] = defaultSetup
-        self.log.debug("From DIRAC/Configuration")
-        configurationServers = gConfig.getServersList()
-        if not includeMasterCS:
-            masterCS = gConfigurationData.getMasterServer()
-            configurationServers = exclude_master_cs_aliases(configurationServers, masterCS)
+        configurationServers = Operations().getValue("Pilot/OverrideConfigurationServers", [])
+        if not configurationServers:
+            self.log.debug("From DIRAC/Configuration")
+            configurationServers = gConfig.getServersList()
+            if not includeMasterCS:
+                masterCS = gConfigurationData.getMasterServer()
+                configurationServers = exclude_master_cs_aliases(configurationServers, masterCS)
         pilotDict["ConfigurationServers"] = configurationServers
         preferredURLPatterns = gConfigurationData.extractOptionFromCFG("/DIRAC/PreferredURLPatterns")

DIRAC/WorkloadManagementSystem/Utilities/QueueUtilities.py CHANGED Viewed

@@ -1,5 +1,5 @@
-"""Utilities to help Computing Element Queues manipulation
-"""
+"""Utilities to help Computing Element Queues manipulation"""
 import hashlib
 from DIRAC import S_OK, S_ERROR
@@ -222,10 +222,10 @@ def matchQueue(jobJDL, queueDict, fullMatch=False):
             return S_OK({"Match": False, "Reason": noMatchReasons[0]})
     # 5. RAM
-    ram = job.getAttributeInt("RAM")
+    ram = job.getAttributeInt("MaxRAM")
     # If MaxRAM is not specified in the queue description, assume 2GB
-    if ram and ram > int(queueDict.get("MaxRAM", 2048) / 1024):
-        noMatchReasons.append("Job RAM %d requirement not satisfied" % ram)
+    if ram and ram > int(queueDict.get("MaxRAM", 2048)):
+        noMatchReasons.append(f"Job RAM {ram} requirement not satisfied")
         if not fullMatch:
             return S_OK({"Match": False, "Reason": noMatchReasons[0]})

DIRAC/WorkloadManagementSystem/Utilities/RemoteRunner.py CHANGED Viewed

@@ -1,4 +1,4 @@
-""" RemoteRunner
+"""RemoteRunner
 RemoteRunner has been designed to send scripts/applications and input files on remote worker nodes having
 no outbound connectivity (e.g. supercomputers)
@@ -6,6 +6,7 @@ no outbound connectivity (e.g. supercomputers)
 Mostly called by workflow modules, RemoteRunner is generally the last component to get through before
 the script/application execution on a remote machine.
 """
 import hashlib
 import os
 import shlex

DIRAC/WorkloadManagementSystem/Utilities/Utils.py CHANGED Viewed

@@ -118,12 +118,21 @@ def createJobWrapper(
     return S_OK(generatedFiles)
-def rescheduleJobs(jobIDs: list[int], source: str = "") -> dict:
+def rescheduleJobs(
+    jobIDs: list[int],
+    source: str = "",
+    jobDB: JobDB | None = None,
+    taskQueueDB: TaskQueueDB | None = None,
+    jobLoggingDB: JobLoggingDB | None = None,
+) -> dict:
     """Utility to reschedule jobs (not atomic, nor bulk)
     Requires direct access to the JobDB and TaskQueueDB
     :param jobIDs: list of jobIDs
     :param source: source of the reschedule
+    :param jobDB: optional JobDB instance to reuse (creates new if not provided)
+    :param taskQueueDB: optional TaskQueueDB instance to reuse (creates new if not provided)
+    :param jobLoggingDB: optional JobLoggingDB instance to reuse (creates new if not provided)
     :return: S_OK/S_ERROR
     :rtype: dict
@@ -131,13 +140,21 @@ def rescheduleJobs(jobIDs: list[int], source: str = "") -> dict:
     failedJobs = []
+    # Reuse provided DB instances or create new ones
+    if jobDB is None:
+        jobDB = JobDB()
+    if taskQueueDB is None:
+        taskQueueDB = TaskQueueDB()
+    if jobLoggingDB is None:
+        jobLoggingDB = JobLoggingDB()
     for jobID in jobIDs:
-        result = JobDB().rescheduleJob(jobID)
+        result = jobDB.rescheduleJob(jobID)
         if not result["OK"]:
             failedJobs.append(jobID)
             continue
-        TaskQueueDB().deleteJob(jobID)
-        JobLoggingDB().addLoggingRecord(
+        taskQueueDB.deleteJob(jobID)
+        jobLoggingDB.addLoggingRecord(
             result["JobID"],
             status=result["Status"],
             minorStatus=result["MinorStatus"],

DIRAC/WorkloadManagementSystem/Utilities/test/Test_RemoteRunner.py CHANGED Viewed

@@ -1,5 +1,4 @@
-""" Test class for Job Agent
-"""
+"""Test class for Job Agent"""
 # imports
 import pytest
@@ -60,11 +59,16 @@ def test__wrapCommand(command, workingDirectory, expectedContent):
         (1, 1, True, 1),
         (2, 2, True, 2),
         (1, 2, True, 1),
+        (
+            1,
+            0,
+            True,
+            1,
+        ),  # if ceNumberOfProcessors is 0, it will be interpreted as needing local evaluation. That will return 1.
         # CE has less processors than the payload requests
         (2, 1, False, "Not enough processors to execute the command"),
         # Specific case: we should not have 0
         (0, 1, False, "Inappropriate NumberOfProcessors value"),
-        (1, 0, False, "Inappropriate NumberOfProcessors value"),
         (-4, 1, False, "Inappropriate NumberOfProcessors value"),
         (1, -4, False, "Inappropriate NumberOfProcessors value"),
         (0, 0, False, "Inappropriate NumberOfProcessors value"),

DIRAC/WorkloadManagementSystem/scripts/dirac_wms_get_wn_parameters.py CHANGED Viewed

@@ -39,12 +39,12 @@ def main():
     gLogger.info("Getting number of processors")
     numberOfProcessor = JobParameters.getNumberOfProcessors(Site, ceName, Queue)
-    gLogger.info("Getting memory (RAM)")
-    maxRAM = JobParameters.getMemoryFromProc()
     gLogger.info("Getting number of GPUs")
     numberOfGPUs = JobParameters.getNumberOfGPUs(Site, ceName, Queue)
+    gLogger.info("Getting maximum RAM")
+    maxRAM = JobParameters.getAvailableRAM(Site, ceName, Queue)
     # just communicating it back
     gLogger.notice(" ".join(str(wnPar) for wnPar in [numberOfProcessor, maxRAM, numberOfGPUs]))

DIRAC/__init__.py CHANGED Viewed

@@ -139,7 +139,7 @@ def _computeRootPath(rootPath):
     if versionsPath.parent.name != "versions":
         return str(rootPath)
     # VERSION-INSTALL_TIME
-    pattern1 = re.compile(r"v(\d+\.\d+\.\d+[^\-]*)\-(\d+)")
+    pattern1 = re.compile(r"(v\d+\.\d+\.\d+[^\-]*|[^-]+)-(\d+)")
     # $(uname -s)-$(uname -m)
     pattern2 = re.compile(r"([^\-]+)-([^\-]+)")
     if pattern1.fullmatch(versionsPath.name) and pattern2.fullmatch(rootPath.name):

DIRAC 9.0.14__py3-none-any.whl → 9.0.16__py3-none-any.whl

DIRAC 9.0.14py3-none-any.whl → 9.0.16py3-none-any.whl