diracx-db 0.0.1a21__py3-none-any.whl → 0.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of diracx-db might be problematic. Click here for more details.

@@ -1,39 +1,15 @@
1
1
  from __future__ import annotations
2
2
 
3
- import time
3
+ from collections import defaultdict
4
4
  from datetime import datetime, timezone
5
- from typing import TYPE_CHECKING
6
-
7
- from pydantic import BaseModel
8
- from sqlalchemy import delete, func, insert, select
5
+ from typing import Iterable
9
6
 
10
- if TYPE_CHECKING:
11
- pass
7
+ from sqlalchemy import delete, func, select
12
8
 
13
- from collections import defaultdict
14
-
15
- from diracx.core.exceptions import JobNotFound
16
- from diracx.core.models import (
17
- JobStatus,
18
- JobStatusReturn,
19
- )
9
+ from diracx.core.models import JobLoggingRecord, JobStatusReturn
20
10
 
21
11
  from ..utils import BaseSQLDB
22
- from .schema import (
23
- JobLoggingDBBase,
24
- LoggingInfo,
25
- )
26
-
27
- MAGIC_EPOC_NUMBER = 1270000000
28
-
29
-
30
- class JobLoggingRecord(BaseModel):
31
- job_id: int
32
- status: JobStatus
33
- minor_status: str
34
- application_status: str
35
- date: datetime
36
- source: str
12
+ from .schema import JobLoggingDBBase, LoggingInfo
37
13
 
38
14
 
39
15
  class JobLoggingDB(BaseSQLDB):
@@ -41,115 +17,79 @@ class JobLoggingDB(BaseSQLDB):
41
17
 
42
18
  metadata = JobLoggingDBBase.metadata
43
19
 
44
- async def insert_record(
45
- self,
46
- job_id: int,
47
- status: JobStatus,
48
- minor_status: str,
49
- application_status: str,
50
- date: datetime,
51
- source: str,
52
- ):
53
- """Add a new entry to the JobLoggingDB table. One, two or all the three status
54
- components (status, minorStatus, applicationStatus) can be specified.
55
- Optionally the time stamp of the status can
56
- be provided in a form of a string in a format '%Y-%m-%d %H:%M:%S' or
57
- as datetime.datetime object. If the time stamp is not provided the current
58
- UTC time is used.
59
- """
60
- # First, fetch the maximum SeqNum for the given job_id
61
- seqnum_stmt = select(func.coalesce(func.max(LoggingInfo.SeqNum) + 1, 1)).where(
62
- LoggingInfo.JobID == job_id
63
- )
64
- seqnum = await self.conn.scalar(seqnum_stmt)
65
-
66
- epoc = (
67
- time.mktime(date.timetuple())
68
- + date.microsecond / 1000000.0
69
- - MAGIC_EPOC_NUMBER
70
- )
71
-
72
- stmt = insert(LoggingInfo).values(
73
- JobID=int(job_id),
74
- SeqNum=seqnum,
75
- Status=status,
76
- MinorStatus=minor_status,
77
- ApplicationStatus=application_status[:255],
78
- StatusTime=date,
79
- StatusTimeOrder=epoc,
80
- Source=source[:32],
81
- )
82
- await self.conn.execute(stmt)
83
-
84
- async def bulk_insert_record(
20
+ async def insert_records(
85
21
  self,
86
22
  records: list[JobLoggingRecord],
87
23
  ):
88
24
  """Bulk insert entries to the JobLoggingDB table."""
89
-
90
- def get_epoc(date):
91
- return (
92
- time.mktime(date.timetuple())
93
- + date.microsecond / 1000000.0
94
- - MAGIC_EPOC_NUMBER
95
- )
96
-
97
25
  # First, fetch the maximum SeqNums for the given job_ids
98
26
  seqnum_stmt = (
99
27
  select(
100
- LoggingInfo.JobID, func.coalesce(func.max(LoggingInfo.SeqNum) + 1, 1)
28
+ LoggingInfo.job_id, func.coalesce(func.max(LoggingInfo.seq_num) + 1, 1)
101
29
  )
102
- .where(LoggingInfo.JobID.in_([record.job_id for record in records]))
103
- .group_by(LoggingInfo.JobID)
30
+ .where(LoggingInfo.job_id.in_([record.job_id for record in records]))
31
+ .group_by(LoggingInfo.job_id)
104
32
  )
105
33
 
106
- seqnum = {jid: seqnum for jid, seqnum in (await self.conn.execute(seqnum_stmt))}
34
+ seqnums = {
35
+ jid: seqnum for jid, seqnum in (await self.conn.execute(seqnum_stmt))
36
+ }
107
37
  # IF a seqnum is not found, then assume it does not exist and the first sequence number is 1.
108
-
109
38
  # https://docs.sqlalchemy.org/en/20/orm/queryguide/dml.html#orm-bulk-insert-statements
110
- await self.conn.execute(
111
- insert(LoggingInfo),
112
- [
39
+ values = []
40
+ for record in records:
41
+ if record.job_id not in seqnums:
42
+ seqnums[record.job_id] = 1
43
+
44
+ values.append(
113
45
  {
114
46
  "JobID": record.job_id,
115
- "SeqNum": seqnum.get(record.job_id, 1),
47
+ "SeqNum": seqnums[record.job_id],
116
48
  "Status": record.status,
117
49
  "MinorStatus": record.minor_status,
118
50
  "ApplicationStatus": record.application_status[:255],
119
51
  "StatusTime": record.date,
120
- "StatusTimeOrder": get_epoc(record.date),
121
- "Source": record.source[:32],
52
+ "StatusTimeOrder": record.date,
53
+ "StatusSource": record.source[:32],
122
54
  }
123
- for record in records
124
- ],
55
+ )
56
+ seqnums[record.job_id] = seqnums[record.job_id] + 1
57
+
58
+ await self.conn.execute(
59
+ LoggingInfo.__table__.insert(),
60
+ values,
125
61
  )
126
62
 
127
- async def get_records(self, job_id: int) -> list[JobStatusReturn]:
63
+ async def get_records(self, job_ids: list[int]) -> dict[int, JobStatusReturn]:
128
64
  """Returns a Status,MinorStatus,ApplicationStatus,StatusTime,Source tuple
129
65
  for each record found for job specified by its jobID in historical order.
130
66
  """
67
+ # We could potentially use a group_by here, but we need to post-process the
68
+ # results later.
131
69
  stmt = (
132
70
  select(
133
- LoggingInfo.Status,
134
- LoggingInfo.MinorStatus,
135
- LoggingInfo.ApplicationStatus,
136
- LoggingInfo.StatusTime,
137
- LoggingInfo.Source,
71
+ LoggingInfo.job_id,
72
+ LoggingInfo.status,
73
+ LoggingInfo.minor_status,
74
+ LoggingInfo.application_status,
75
+ LoggingInfo.status_time,
76
+ LoggingInfo.source,
138
77
  )
139
- .where(LoggingInfo.JobID == int(job_id))
140
- .order_by(LoggingInfo.StatusTimeOrder, LoggingInfo.StatusTime)
78
+ .where(LoggingInfo.job_id.in_(job_ids))
79
+ .order_by(LoggingInfo.status_time_order, LoggingInfo.status_time)
141
80
  )
142
81
  rows = await self.conn.execute(stmt)
143
82
 
144
- values = []
83
+ values = defaultdict(list)
145
84
  for (
85
+ job_id,
146
86
  status,
147
87
  minor_status,
148
88
  application_status,
149
89
  status_time,
150
90
  status_source,
151
91
  ) in rows:
152
- values.append(
92
+ values[job_id].append(
153
93
  [
154
94
  status,
155
95
  minor_status,
@@ -161,16 +101,16 @@ class JobLoggingDB(BaseSQLDB):
161
101
 
162
102
  # If no value has been set for the application status in the first place,
163
103
  # We put this status to unknown
164
- res = []
165
- if values:
166
- if values[0][2] == "idem":
167
- values[0][2] = "Unknown"
104
+ res: dict = defaultdict(list)
105
+ for job_id, history in values.items():
106
+ if history[0][2] == "idem":
107
+ history[0][2] = "Unknown"
168
108
 
169
109
  # We replace "idem" values by the value previously stated
170
- for i in range(1, len(values)):
110
+ for i in range(1, len(history)):
171
111
  for j in range(3):
172
- if values[i][j] == "idem":
173
- values[i][j] = values[i - 1][j]
112
+ if history[i][j] == "idem":
113
+ history[i][j] = history[i - 1][j]
174
114
 
175
115
  # And we replace arrays with tuples
176
116
  for (
@@ -179,8 +119,8 @@ class JobLoggingDB(BaseSQLDB):
179
119
  application_status,
180
120
  status_time,
181
121
  status_source,
182
- ) in values:
183
- res.append(
122
+ ) in history:
123
+ res[job_id].append(
184
124
  JobStatusReturn(
185
125
  Status=status,
186
126
  MinorStatus=minor_status,
@@ -194,42 +134,19 @@ class JobLoggingDB(BaseSQLDB):
194
134
 
195
135
  async def delete_records(self, job_ids: list[int]):
196
136
  """Delete logging records for given jobs."""
197
- stmt = delete(LoggingInfo).where(LoggingInfo.JobID.in_(job_ids))
137
+ stmt = delete(LoggingInfo).where(LoggingInfo.job_id.in_(job_ids))
198
138
  await self.conn.execute(stmt)
199
139
 
200
- async def get_wms_time_stamps(self, job_id):
201
- """Get TimeStamps for job MajorState transitions
202
- return a {State:timestamp} dictionary.
203
- """
204
- result = {}
205
- stmt = select(
206
- LoggingInfo.Status,
207
- LoggingInfo.StatusTimeOrder,
208
- ).where(LoggingInfo.JobID == job_id)
209
- rows = await self.conn.execute(stmt)
210
- if not rows.rowcount:
211
- raise JobNotFound(job_id) from None
212
-
213
- for event, etime in rows:
214
- result[event] = str(etime + MAGIC_EPOC_NUMBER)
215
-
216
- return result
217
-
218
- async def get_wms_time_stamps_bulk(self, job_ids):
140
+ async def get_wms_time_stamps(
141
+ self, job_ids: Iterable[int]
142
+ ) -> dict[int, dict[str, datetime]]:
219
143
  """Get TimeStamps for job MajorState transitions for multiple jobs at once
220
144
  return a {JobID: {State:timestamp}} dictionary.
221
145
  """
222
- result = defaultdict(dict)
146
+ result: defaultdict[int, dict[str, datetime]] = defaultdict(dict)
223
147
  stmt = select(
224
- LoggingInfo.JobID,
225
- LoggingInfo.Status,
226
- LoggingInfo.StatusTimeOrder,
227
- ).where(LoggingInfo.JobID.in_(job_ids))
228
- rows = await self.conn.execute(stmt)
229
- if not rows.rowcount:
230
- return {}
231
-
232
- for job_id, event, etime in rows:
233
- result[job_id][event] = str(etime + MAGIC_EPOC_NUMBER)
234
-
235
- return result
148
+ LoggingInfo.job_id, LoggingInfo.status, LoggingInfo.status_time_order
149
+ ).where(LoggingInfo.job_id.in_(job_ids))
150
+ for job_id, event, etime in await self.conn.execute(stmt):
151
+ result[job_id][event] = etime
152
+ return dict(result)
@@ -1,9 +1,8 @@
1
- from sqlalchemy import (
2
- Integer,
3
- Numeric,
4
- PrimaryKeyConstraint,
5
- String,
6
- )
1
+ from __future__ import annotations
2
+
3
+ from datetime import UTC, datetime
4
+
5
+ from sqlalchemy import Integer, Numeric, PrimaryKeyConstraint, String, TypeDecorator
7
6
  from sqlalchemy.orm import declarative_base
8
7
 
9
8
  from ..utils import Column, DateNowColumn
@@ -11,15 +10,55 @@ from ..utils import Column, DateNowColumn
11
10
  JobLoggingDBBase = declarative_base()
12
11
 
13
12
 
13
+ class MagicEpochDateTime(TypeDecorator):
14
+ """A SQLAlchemy type that stores a datetime as a numeric value representing the
15
+ seconds elapsed since MAGIC_EPOC_NUMBER. The underlying column is defined as
16
+ Numeric(12,3) which provides a fixed-precision representation.
17
+ """
18
+
19
+ impl = Numeric(12, 3)
20
+ cache_ok = True
21
+
22
+ MAGIC_EPOC_NUMBER = 1270000000
23
+
24
+ def process_bind_param(self, value, dialect):
25
+ """Convert a Python datetime to a numeric value: (timestamp - MAGIC_EPOC_NUMBER).
26
+ The result is rounded to three decimal places.
27
+ """
28
+ if value is None:
29
+ return None
30
+ if isinstance(value, datetime):
31
+ # Convert datetime to seconds since the Unix epoch, subtract our magic epoch,
32
+ # and round to three decimal places.
33
+ epoch_seconds = (
34
+ value.replace(tzinfo=UTC).timestamp() - self.MAGIC_EPOC_NUMBER
35
+ )
36
+ return round(epoch_seconds, 3)
37
+ raise ValueError(
38
+ "Expected a datetime object for MagicEpochDateTime bind parameter."
39
+ )
40
+
41
+ def process_result_value(self, value, dialect):
42
+ """Convert the numeric database value back into a Python datetime by reversing the
43
+ stored difference (adding MAGIC_EPOC_NUMBER).
44
+ """
45
+ if value is None:
46
+ return None
47
+ # Carefully convert from Decimal to datetime to avoid losing precision
48
+ value += self.MAGIC_EPOC_NUMBER
49
+ value_int = int(value)
50
+ result = datetime.fromtimestamp(value_int, tz=UTC)
51
+ return result.replace(microsecond=int((value - value_int) * 1_000_000))
52
+
53
+
14
54
  class LoggingInfo(JobLoggingDBBase):
15
55
  __tablename__ = "LoggingInfo"
16
- JobID = Column(Integer)
17
- SeqNum = Column(Integer)
18
- Status = Column(String(32), default="")
19
- MinorStatus = Column(String(128), default="")
20
- ApplicationStatus = Column(String(255), default="")
21
- StatusTime = DateNowColumn()
22
- # TODO: Check that this corresponds to the DOUBLE(12,3) type in MySQL
23
- StatusTimeOrder = Column(Numeric(precision=12, scale=3), default=0)
24
- Source = Column(String(32), default="Unknown", name="StatusSource")
56
+ job_id = Column("JobID", Integer)
57
+ seq_num = Column("SeqNum", Integer)
58
+ status = Column("Status", String(32), default="")
59
+ minor_status = Column("MinorStatus", String(128), default="")
60
+ application_status = Column("ApplicationStatus", String(255), default="")
61
+ status_time = DateNowColumn("StatusTime")
62
+ status_time_order = Column("StatusTimeOrder", MagicEpochDateTime, default=0)
63
+ source = Column("StatusSource", String(32), default="Unknown")
25
64
  __table_args__ = (PrimaryKeyConstraint("JobID", "SeqNum"),)
@@ -20,7 +20,6 @@ class PilotAgentsDB(BaseSQLDB):
20
20
  grid_type: str = "DIRAC",
21
21
  pilot_stamps: dict | None = None,
22
22
  ) -> None:
23
-
24
23
  if pilot_stamps is None:
25
24
  pilot_stamps = {}
26
25
 
@@ -1,5 +1,6 @@
1
+ from __future__ import annotations
2
+
1
3
  from sqlalchemy import (
2
- DateTime,
3
4
  Double,
4
5
  Index,
5
6
  Integer,
@@ -8,6 +9,8 @@ from sqlalchemy import (
8
9
  )
9
10
  from sqlalchemy.orm import declarative_base
10
11
 
12
+ from diracx.db.sql.utils.types import SmarterDateTime
13
+
11
14
  from ..utils import Column, EnumBackedBool, NullColumn
12
15
 
13
16
  PilotAgentsDBBase = declarative_base()
@@ -16,22 +19,22 @@ PilotAgentsDBBase = declarative_base()
16
19
  class PilotAgents(PilotAgentsDBBase):
17
20
  __tablename__ = "PilotAgents"
18
21
 
19
- PilotID = Column("PilotID", Integer, autoincrement=True, primary_key=True)
20
- InitialJobID = Column("InitialJobID", Integer, default=0)
21
- CurrentJobID = Column("CurrentJobID", Integer, default=0)
22
- PilotJobReference = Column("PilotJobReference", String(255), default="Unknown")
23
- PilotStamp = Column("PilotStamp", String(32), default="")
24
- DestinationSite = Column("DestinationSite", String(128), default="NotAssigned")
25
- Queue = Column("Queue", String(128), default="Unknown")
26
- GridSite = Column("GridSite", String(128), default="Unknown")
27
- VO = Column("VO", String(128))
28
- GridType = Column("GridType", String(32), default="LCG")
29
- BenchMark = Column("BenchMark", Double, default=0.0)
30
- SubmissionTime = NullColumn("SubmissionTime", DateTime)
31
- LastUpdateTime = NullColumn("LastUpdateTime", DateTime)
32
- Status = Column("Status", String(32), default="Unknown")
33
- StatusReason = Column("StatusReason", String(255), default="Unknown")
34
- AccountingSent = Column("AccountingSent", EnumBackedBool(), default=False)
22
+ pilot_id = Column("PilotID", Integer, autoincrement=True, primary_key=True)
23
+ initial_job_id = Column("InitialJobID", Integer, default=0)
24
+ current_job_id = Column("CurrentJobID", Integer, default=0)
25
+ pilot_job_reference = Column("PilotJobReference", String(255), default="Unknown")
26
+ pilot_stamp = Column("PilotStamp", String(32), default="")
27
+ destination_site = Column("DestinationSite", String(128), default="NotAssigned")
28
+ queue = Column("Queue", String(128), default="Unknown")
29
+ grid_site = Column("GridSite", String(128), default="Unknown")
30
+ vo = Column("VO", String(128))
31
+ grid_type = Column("GridType", String(32), default="LCG")
32
+ benchmark = Column("BenchMark", Double, default=0.0)
33
+ submission_time = NullColumn("SubmissionTime", SmarterDateTime)
34
+ last_update_time = NullColumn("LastUpdateTime", SmarterDateTime)
35
+ status = Column("Status", String(32), default="Unknown")
36
+ status_reason = Column("StatusReason", String(255), default="Unknown")
37
+ accounting_sent = Column("AccountingSent", EnumBackedBool(), default=False)
35
38
 
36
39
  __table_args__ = (
37
40
  Index("PilotJobReference", "PilotJobReference"),
@@ -43,9 +46,9 @@ class PilotAgents(PilotAgentsDBBase):
43
46
  class JobToPilotMapping(PilotAgentsDBBase):
44
47
  __tablename__ = "JobToPilotMapping"
45
48
 
46
- PilotID = Column("PilotID", Integer, primary_key=True)
47
- JobID = Column("JobID", Integer, primary_key=True)
48
- StartTime = Column("StartTime", DateTime)
49
+ pilot_id = Column("PilotID", Integer, primary_key=True)
50
+ job_id = Column("JobID", Integer, primary_key=True)
51
+ start_time = Column("StartTime", SmarterDateTime)
49
52
 
50
53
  __table_args__ = (Index("JobID", "JobID"), Index("PilotID", "PilotID"))
51
54
 
@@ -53,6 +56,6 @@ class JobToPilotMapping(PilotAgentsDBBase):
53
56
  class PilotOutput(PilotAgentsDBBase):
54
57
  __tablename__ = "PilotOutput"
55
58
 
56
- PilotID = Column("PilotID", Integer, primary_key=True)
57
- StdOutput = Column("StdOutput", Text)
58
- StdError = Column("StdError", Text)
59
+ pilot_id = Column("PilotID", Integer, primary_key=True)
60
+ std_output = Column("StdOutput", Text)
61
+ std_error = Column("StdError", Text)