PyPI - diracx-db - Versions diffs - 0.0.1a19__py3-none-any.whl → 0.0.1a21__py3-none-any.whl - Mend

diracx-db 0.0.1a19py3-none-any.whl → 0.0.1a21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

diracx/db/sql/job/db.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from __future__ import annotations
-import logging
 from datetime import datetime, timezone
 from typing import TYPE_CHECKING, Any
@@ -9,14 +8,9 @@ from sqlalchemy.exc import IntegrityError, NoResultFound
 if TYPE_CHECKING:
     from sqlalchemy.sql.elements import BindParameter
 from diracx.core.exceptions import InvalidQueryError, JobNotFound
 from diracx.core.models import (
-    JobMinorStatus,
-    JobStatus,
     LimitedJobStatusReturn,
-    ScalarSearchOperator,
-    ScalarSearchSpec,
     SearchSpec,
     SortSpec,
 )
@@ -50,11 +44,6 @@ class JobDB(BaseSQLDB):
     # to find a way to make it dynamic
     jdl2DBParameters = ["JobName", "JobType", "JobGroup"]
-    # TODO: set maxRescheduling value from CS
-    # maxRescheduling = self.getCSOption("MaxRescheduling", 3)
-    # For now:
-    maxRescheduling = 3
     async def summary(self, group_by, search) -> list[dict[str, str | int]]:
         columns = _get_columns(Jobs.__table__, group_by)
@@ -81,6 +70,7 @@ class JobDB(BaseSQLDB):
     ) -> tuple[int, list[dict[Any, Any]]]:
         # Find which columns to select
         columns = _get_columns(Jobs.__table__, parameters)
         stmt = select(*columns)
         stmt = apply_search_filters(Jobs.__table__.columns.__getitem__, stmt, search)
@@ -107,23 +97,18 @@ class JobDB(BaseSQLDB):
             dict(row._mapping) async for row in (await self.conn.stream(stmt))
         ]
-    async def _insertNewJDL(self, jdl) -> int:
-        from DIRAC.WorkloadManagementSystem.DB.JobDBUtils import compressJDL
-        stmt = insert(JobJDLs).values(
-            JDL="", JobRequirements="", OriginalJDL=compressJDL(jdl)
+    async def insert_input_data(self, lfns: dict[int, list[str]]):
+        await self.conn.execute(
+            InputData.__table__.insert(),
+            [
+                {
+                    "JobID": job_id,
+                    "LFN": lfn,
+                }
+                for job_id, lfns_ in lfns.items()
+                for lfn in lfns_
+            ],
         )
-        result = await self.conn.execute(stmt)
-        # await self.engine.commit()
-        return result.lastrowid
-    async def _insertJob(self, jobData: dict[str, Any]):
-        stmt = insert(Jobs).values(jobData)
-        await self.conn.execute(stmt)
-    async def _insertInputData(self, job_id: int, lfns: list[str]):
-        stmt = insert(InputData).values([{"JobID": job_id, "LFN": lfn} for lfn in lfns])
-        await self.conn.execute(stmt)
     async def setJobAttributes(self, job_id, jobData):
         """TODO: add myDate and force parameters."""
@@ -132,7 +117,49 @@ class JobDB(BaseSQLDB):
         stmt = update(Jobs).where(Jobs.JobID == job_id).values(jobData)
         await self.conn.execute(stmt)
-    async def _checkAndPrepareJob(
+    async def create_job(self, original_jdl):
+        """Used to insert a new job with original JDL. Returns inserted job id."""
+        from DIRAC.WorkloadManagementSystem.DB.JobDBUtils import compressJDL
+        result = await self.conn.execute(
+            JobJDLs.__table__.insert().values(
+                JDL="",
+                JobRequirements="",
+                OriginalJDL=compressJDL(original_jdl),
+            )
+        )
+        return result.lastrowid
+    async def insert_job_attributes(self, jobs_to_update: dict[int, dict]):
+        await self.conn.execute(
+            Jobs.__table__.insert(),
+            [
+                {
+                    "JobID": job_id,
+                    **attrs,
+                }
+                for job_id, attrs in jobs_to_update.items()
+            ],
+        )
+    async def update_job_jdls(self, jdls_to_update: dict[int, str]):
+        """Used to update the JDL, typically just after inserting the original JDL, or rescheduling, for example."""
+        from DIRAC.WorkloadManagementSystem.DB.JobDBUtils import compressJDL
+        await self.conn.execute(
+            JobJDLs.__table__.update().where(
+                JobJDLs.__table__.c.JobID == bindparam("b_JobID")
+            ),
+            [
+                {
+                    "b_JobID": job_id,
+                    "JDL": compressJDL(jdl),
+                }
+                for job_id, jdl in jdls_to_update.items()
+            ],
+        )
+    async def checkAndPrepareJob(
         self,
         jobID,
         class_ad_job,
@@ -175,6 +202,31 @@ class JobDB(BaseSQLDB):
         )
         await self.conn.execute(stmt)
+    async def setJobJDLsBulk(self, jdls):
+        from DIRAC.WorkloadManagementSystem.DB.JobDBUtils import compressJDL
+        await self.conn.execute(
+            JobJDLs.__table__.update().where(
+                JobJDLs.__table__.c.JobID == bindparam("b_JobID")
+            ),
+            [{"b_JobID": jid, "JDL": compressJDL(jdl)} for jid, jdl in jdls.items()],
+        )
+    async def setJobAttributesBulk(self, jobData):
+        """TODO: add myDate and force parameters."""
+        for job_id in jobData.keys():
+            if "Status" in jobData[job_id]:
+                jobData[job_id].update(
+                    {"LastUpdateTime": datetime.now(tz=timezone.utc)}
+                )
+        await self.conn.execute(
+            Jobs.__table__.update().where(
+                Jobs.__table__.c.JobID == bindparam("b_JobID")
+            ),
+            [{"b_JobID": job_id, **attrs} for job_id, attrs in jobData.items()],
+        )
     async def getJobJDL(self, job_id: int, original: bool = False) -> str:
         from DIRAC.WorkloadManagementSystem.DB.JobDBUtils import extractJDL
@@ -189,243 +241,21 @@ class JobDB(BaseSQLDB):
         return jdl
-    async def insert(
-        self,
-        jdl,
-        owner,
-        owner_group,
-        initial_status,
-        initial_minor_status,
-        vo,
-    ):
-        from DIRAC.Core.Utilities.ClassAd.ClassAdLight import ClassAd
-        from DIRAC.Core.Utilities.ReturnValues import returnValueOrRaise
-        from DIRAC.WorkloadManagementSystem.DB.JobDBUtils import (
-            checkAndAddOwner,
-            createJDLWithInitialStatus,
-            fixJDL,
-        )
-        job_attrs = {
-            "LastUpdateTime": datetime.now(tz=timezone.utc),
-            "SubmissionTime": datetime.now(tz=timezone.utc),
-            "Owner": owner,
-            "OwnerGroup": owner_group,
-            "VO": vo,
-        }
-        jobManifest = returnValueOrRaise(checkAndAddOwner(jdl, owner, owner_group))
-        jdl = fixJDL(jdl)
-        job_id = await self._insertNewJDL(jdl)
-        jobManifest.setOption("JobID", job_id)
-        job_attrs["JobID"] = job_id
-        # 2.- Check JDL and Prepare DIRAC JDL
-        jobJDL = jobManifest.dumpAsJDL()
-        # Replace the JobID placeholder if any
-        if jobJDL.find("%j") != -1:
-            jobJDL = jobJDL.replace("%j", str(job_id))
-        class_ad_job = ClassAd(jobJDL)
-        class_ad_req = ClassAd("[]")
-        if not class_ad_job.isOK():
-            job_attrs["Status"] = JobStatus.FAILED
-            job_attrs["MinorStatus"] = "Error in JDL syntax"
-            await self._insertJob(job_attrs)
-            return {
-                "JobID": job_id,
-                "Status": JobStatus.FAILED,
-                "MinorStatus": "Error in JDL syntax",
-            }
-        class_ad_job.insertAttributeInt("JobID", job_id)
-        await self._checkAndPrepareJob(
-            job_id,
-            class_ad_job,
-            class_ad_req,
-            owner,
-            owner_group,
-            job_attrs,
-            vo,
-        )
-        jobJDL = createJDLWithInitialStatus(
-            class_ad_job,
-            class_ad_req,
-            self.jdl2DBParameters,
-            job_attrs,
-            initial_status,
-            initial_minor_status,
-            modern=True,
-        )
-        await self.setJobJDL(job_id, jobJDL)
-        # Adding the job in the Jobs table
-        await self._insertJob(job_attrs)
-        # TODO: check if that is actually true
-        if class_ad_job.lookupAttribute("Parameters"):
-            raise NotImplementedError("Parameters in the JDL are not supported")
-        # Looking for the Input Data
-        inputData = []
-        if class_ad_job.lookupAttribute("InputData"):
-            inputData = class_ad_job.getListFromExpression("InputData")
-            lfns = [lfn for lfn in inputData if lfn]
-            if lfns:
-                await self._insertInputData(job_id, lfns)
-        return {
-            "JobID": job_id,
-            "Status": initial_status,
-            "MinorStatus": initial_minor_status,
-            "TimeStamp": datetime.now(tz=timezone.utc),
-        }
-    async def rescheduleJob(self, job_id) -> dict[str, Any]:
-        """Reschedule given job."""
-        from DIRAC.Core.Utilities.ClassAd.ClassAdLight import ClassAd
-        from DIRAC.Core.Utilities.ReturnValues import SErrorException
-        _, result = await self.search(
-            parameters=[
-                "Status",
-                "MinorStatus",
-                "VerifiedFlag",
-                "RescheduleCounter",
-                "Owner",
-                "OwnerGroup",
-            ],
-            search=[
-                ScalarSearchSpec(
-                    parameter="JobID", operator=ScalarSearchOperator.EQUAL, value=job_id
-                )
-            ],
-            sorts=[],
-        )
-        if not result:
-            raise ValueError(f"Job {job_id} not found.")
-        jobAttrs = result[0]
-        if "VerifiedFlag" not in jobAttrs:
-            raise ValueError(f"Job {job_id} not found in the system")
-        if not jobAttrs["VerifiedFlag"]:
-            raise ValueError(
-                f"Job {job_id} not Verified: Status {jobAttrs['Status']}, Minor Status: {jobAttrs['MinorStatus']}"
-            )
-        reschedule_counter = int(jobAttrs["RescheduleCounter"]) + 1
-        # TODO: update maxRescheduling:
-        # self.maxRescheduling = self.getCSOption("MaxRescheduling", self.maxRescheduling)
-        if reschedule_counter > self.maxRescheduling:
-            logging.warn(f"Job {job_id}: Maximum number of reschedulings is reached.")
-            self.setJobAttributes(
-                job_id,
-                {
-                    "Status": JobStatus.FAILED,
-                    "MinorStatus": JobMinorStatus.MAX_RESCHEDULING,
-                },
-            )
-            raise ValueError(
-                f"Maximum number of reschedulings is reached: {self.maxRescheduling}"
-            )
-        new_job_attributes = {"RescheduleCounter": reschedule_counter}
-        # TODO: get the job parameters from JobMonitoringClient
-        # result = JobMonitoringClient().getJobParameters(jobID)
-        # if result["OK"]:
-        #     parDict = result["Value"]
-        #     for key, value in parDict.get(jobID, {}).items():
-        #         result = self.setAtticJobParameter(jobID, key, value, rescheduleCounter - 1)
-        #         if not result["OK"]:
-        #             break
-        # TODO: IF we keep JobParameters and OptimizerParameters: Delete job in those tables.
-        # await self.delete_job_parameters(job_id)
-        # await self.delete_job_optimizer_parameters(job_id)
-        job_jdl = await self.getJobJDL(job_id, original=True)
-        if not job_jdl.strip().startswith("["):
-            job_jdl = f"[{job_jdl}]"
-        classAdJob = ClassAd(job_jdl)
-        classAdReq = ClassAd("[]")
-        retVal = {}
-        retVal["JobID"] = job_id
-        classAdJob.insertAttributeInt("JobID", job_id)
+    async def getJobJDLs(self, job_ids, original: bool = False) -> dict[int | str, str]:
+        from DIRAC.WorkloadManagementSystem.DB.JobDBUtils import extractJDL
-        try:
-            result = await self._checkAndPrepareJob(
-                job_id,
-                classAdJob,
-                classAdReq,
-                jobAttrs["Owner"],
-                jobAttrs["OwnerGroup"],
-                new_job_attributes,
-                classAdJob.getAttributeString("VirtualOrganization"),
+        if original:
+            stmt = select(JobJDLs.JobID, JobJDLs.OriginalJDL).where(
+                JobJDLs.JobID.in_(job_ids)
             )
-        except SErrorException as e:
-            raise ValueError(e) from e
-        priority = classAdJob.getAttributeInt("Priority")
-        if priority is None:
-            priority = 0
-        jobAttrs["UserPriority"] = priority
-        siteList = classAdJob.getListFromExpression("Site")
-        if not siteList:
-            site = "ANY"
-        elif len(siteList) > 1:
-            site = "Multiple"
         else:
-            site = siteList[0]
-        jobAttrs["Site"] = site
-        jobAttrs["Status"] = JobStatus.RECEIVED
-        jobAttrs["MinorStatus"] = JobMinorStatus.RESCHEDULED
-        jobAttrs["ApplicationStatus"] = "Unknown"
+            stmt = select(JobJDLs.JobID, JobJDLs.JDL).where(JobJDLs.JobID.in_(job_ids))
-        jobAttrs["LastUpdateTime"] = datetime.now(tz=timezone.utc)
-        jobAttrs["RescheduleTime"] = datetime.now(tz=timezone.utc)
-        reqJDL = classAdReq.asJDL()
-        classAdJob.insertAttributeInt("JobRequirements", reqJDL)
-        jobJDL = classAdJob.asJDL()
-        # Replace the JobID placeholder if any
-        jobJDL = jobJDL.replace("%j", str(job_id))
-        result = await self.setJobJDL(job_id, jobJDL)
-        result = await self.setJobAttributes(job_id, jobAttrs)
-        retVal["InputData"] = classAdJob.lookupAttribute("InputData")
-        retVal["RescheduleCounter"] = reschedule_counter
-        retVal["Status"] = JobStatus.RECEIVED
-        retVal["MinorStatus"] = JobMinorStatus.RESCHEDULED
-        return retVal
+        return {
+            jobid: extractJDL(jdl)
+            for jobid, jdl in (await self.conn.execute(stmt))
+            if jdl
+        }
     async def get_job_status(self, job_id: int) -> LimitedJobStatusReturn:
         try:
@@ -451,6 +281,22 @@ class JobDB(BaseSQLDB):
         except IntegrityError as e:
             raise JobNotFound(job_id) from e
+    async def set_job_command_bulk(self, commands):
+        """Store a command to be passed to the job together with the next heart beat."""
+        self.conn.execute(
+            insert(JobCommands),
+            [
+                {
+                    "JobID": job_id,
+                    "Command": command,
+                    "Arguments": arguments,
+                    "ReceptionTime": datetime.now(tz=timezone.utc),
+                }
+                for job_id, command, arguments in commands
+            ],
+        )
+        # FIXME handle IntegrityError
     async def delete_jobs(self, job_ids: list[int]):
         """Delete jobs from the database."""
         stmt = delete(JobJDLs).where(JobJDLs.JobID.in_(job_ids))

diracx/db/sql/job_logging/db.py CHANGED Viewed

@@ -4,11 +4,14 @@ import time
 from datetime import datetime, timezone
 from typing import TYPE_CHECKING
+from pydantic import BaseModel
 from sqlalchemy import delete, func, insert, select
 if TYPE_CHECKING:
     pass
+from collections import defaultdict
 from diracx.core.exceptions import JobNotFound
 from diracx.core.models import (
     JobStatus,
@@ -24,6 +27,15 @@ from .schema import (
 MAGIC_EPOC_NUMBER = 1270000000
+class JobLoggingRecord(BaseModel):
+    job_id: int
+    status: JobStatus
+    minor_status: str
+    application_status: str
+    date: datetime
+    source: str
 class JobLoggingDB(BaseSQLDB):
     """Frontend for the JobLoggingDB. Provides the ability to store changes with timestamps."""
@@ -69,6 +81,49 @@ class JobLoggingDB(BaseSQLDB):
         )
         await self.conn.execute(stmt)
+    async def bulk_insert_record(
+        self,
+        records: list[JobLoggingRecord],
+    ):
+        """Bulk insert entries to the JobLoggingDB table."""
+        def get_epoc(date):
+            return (
+                time.mktime(date.timetuple())
+                + date.microsecond / 1000000.0
+                - MAGIC_EPOC_NUMBER
+            )
+        # First, fetch the maximum SeqNums for the given job_ids
+        seqnum_stmt = (
+            select(
+                LoggingInfo.JobID, func.coalesce(func.max(LoggingInfo.SeqNum) + 1, 1)
+            )
+            .where(LoggingInfo.JobID.in_([record.job_id for record in records]))
+            .group_by(LoggingInfo.JobID)
+        )
+        seqnum = {jid: seqnum for jid, seqnum in (await self.conn.execute(seqnum_stmt))}
+        # IF a seqnum is not found, then assume it does not exist and the first sequence number is 1.
+        # https://docs.sqlalchemy.org/en/20/orm/queryguide/dml.html#orm-bulk-insert-statements
+        await self.conn.execute(
+            insert(LoggingInfo),
+            [
+                {
+                    "JobID": record.job_id,
+                    "SeqNum": seqnum.get(record.job_id, 1),
+                    "Status": record.status,
+                    "MinorStatus": record.minor_status,
+                    "ApplicationStatus": record.application_status[:255],
+                    "StatusTime": record.date,
+                    "StatusTimeOrder": get_epoc(record.date),
+                    "Source": record.source[:32],
+                }
+                for record in records
+            ],
+        )
     async def get_records(self, job_id: int) -> list[JobStatusReturn]:
         """Returns a Status,MinorStatus,ApplicationStatus,StatusTime,Source tuple
         for each record found for job specified by its jobID in historical order.
@@ -159,3 +214,22 @@ class JobLoggingDB(BaseSQLDB):
             result[event] = str(etime + MAGIC_EPOC_NUMBER)
         return result
+    async def get_wms_time_stamps_bulk(self, job_ids):
+        """Get TimeStamps for job MajorState transitions for multiple jobs at once
+        return a {JobID: {State:timestamp}} dictionary.
+        """
+        result = defaultdict(dict)
+        stmt = select(
+            LoggingInfo.JobID,
+            LoggingInfo.Status,
+            LoggingInfo.StatusTimeOrder,
+        ).where(LoggingInfo.JobID.in_(job_ids))
+        rows = await self.conn.execute(stmt)
+        if not rows.rowcount:
+            return {}
+        for job_id, event, etime in rows:
+            result[job_id][event] = str(etime + MAGIC_EPOC_NUMBER)
+        return result

diracx/db/sql/utils/__init__.py CHANGED Viewed

@@ -16,7 +16,7 @@ from typing import TYPE_CHECKING, Self, cast
 import sqlalchemy.types as types
 from pydantic import TypeAdapter
 from sqlalchemy import Column as RawColumn
-from sqlalchemy import DateTime, Enum, MetaData, select
+from sqlalchemy import DateTime, Enum, MetaData, func, select
 from sqlalchemy.exc import OperationalError
 from sqlalchemy.ext.asyncio import AsyncConnection, AsyncEngine, create_async_engine
 from sqlalchemy.ext.compiler import compiles
@@ -100,7 +100,9 @@ def mysql_date_trunc(element, compiler, **kw):
         "MONTH": "%Y-%m",
         "YEAR": "%Y",
     }[element._time_resolution]
-    return f"DATE_FORMAT({compiler.process(element.clauses)}, '{pattern}')"
+    (dt_col,) = list(element.clauses)
+    return compiler.process(func.date_format(dt_col, pattern))
 @compiles(date_trunc, "sqlite")
@@ -113,7 +115,13 @@ def sqlite_date_trunc(element, compiler, **kw):
         "MONTH": "%Y-%m",
         "YEAR": "%Y",
     }[element._time_resolution]
-    return f"strftime('{pattern}', {compiler.process(element.clauses)})"
+    (dt_col,) = list(element.clauses)
+    return compiler.process(
+        func.strftime(
+            pattern,
+            dt_col,
+        )
+    )
 def substract_date(**kwargs: float) -> datetime:

diracx-db 0.0.1a19__py3-none-any.whl → 0.0.1a21__py3-none-any.whl

diracx-db 0.0.1a19py3-none-any.whl → 0.0.1a21py3-none-any.whl