diracx-db 0.0.1a46__tar.gz → 0.0.1a48__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/PKG-INFO +1 -1
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/src/diracx/db/os/job_parameters.py +9 -3
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/src/diracx/db/sql/dummy/db.py +3 -14
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/src/diracx/db/sql/job/db.py +27 -74
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/src/diracx/db/sql/job/schema.py +42 -15
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/src/diracx/db/sql/pilot_agents/schema.py +5 -4
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/src/diracx/db/sql/utils/__init__.py +2 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/src/diracx/db/sql/utils/base.py +94 -4
- diracx_db-0.0.1a48/src/diracx/db/sql/utils/types.py +137 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/tests/jobs/test_job_db.py +102 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/tests/test_dummy_db.py +1 -1
- diracx_db-0.0.1a46/src/diracx/db/sql/utils/types.py +0 -43
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/.gitignore +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/README.md +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/pyproject.toml +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/src/diracx/db/__init__.py +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/src/diracx/db/__main__.py +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/src/diracx/db/exceptions.py +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/src/diracx/db/os/__init__.py +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/src/diracx/db/os/utils.py +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/src/diracx/db/py.typed +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/src/diracx/db/sql/__init__.py +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/src/diracx/db/sql/auth/__init__.py +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/src/diracx/db/sql/auth/db.py +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/src/diracx/db/sql/auth/schema.py +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/src/diracx/db/sql/dummy/__init__.py +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/src/diracx/db/sql/dummy/schema.py +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/src/diracx/db/sql/job/__init__.py +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/src/diracx/db/sql/job_logging/__init__.py +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/src/diracx/db/sql/job_logging/db.py +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/src/diracx/db/sql/job_logging/schema.py +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/src/diracx/db/sql/pilot_agents/__init__.py +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/src/diracx/db/sql/pilot_agents/db.py +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/src/diracx/db/sql/sandbox_metadata/__init__.py +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/src/diracx/db/sql/sandbox_metadata/db.py +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/src/diracx/db/sql/sandbox_metadata/schema.py +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/src/diracx/db/sql/task_queue/__init__.py +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/src/diracx/db/sql/task_queue/db.py +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/src/diracx/db/sql/task_queue/schema.py +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/src/diracx/db/sql/utils/functions.py +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/tests/auth/test_authorization_flow.py +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/tests/auth/test_device_flow.py +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/tests/auth/test_refresh_token.py +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/tests/jobs/test_job_logging_db.py +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/tests/jobs/test_sandbox_metadata.py +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/tests/opensearch/test_connection.py +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/tests/opensearch/test_index_template.py +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/tests/opensearch/test_search.py +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/tests/pilot_agents/__init__.py +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/tests/pilot_agents/test_pilot_agents_db.py +0 -0
- {diracx_db-0.0.1a46 → diracx_db-0.0.1a48}/tests/test_freeze_time.py +0 -0
@@ -9,13 +9,19 @@ class JobParametersDB(BaseOSDB):
|
|
9
9
|
fields = {
|
10
10
|
"JobID": {"type": "long"},
|
11
11
|
"timestamp": {"type": "date"},
|
12
|
+
"PilotAgent": {"type": "keyword"},
|
13
|
+
"Pilot_Reference": {"type": "keyword"},
|
14
|
+
"JobGroup": {"type": "keyword"},
|
12
15
|
"CPUNormalizationFactor": {"type": "long"},
|
13
16
|
"NormCPUTime(s)": {"type": "long"},
|
14
|
-
"Memory(
|
17
|
+
"Memory(MB)": {"type": "long"},
|
18
|
+
"LocalAccount": {"type": "keyword"},
|
15
19
|
"TotalCPUTime(s)": {"type": "long"},
|
16
|
-
"
|
17
|
-
"HostName": {"type": "
|
20
|
+
"PayloadPID": {"type": "long"},
|
21
|
+
"HostName": {"type": "text"},
|
18
22
|
"GridCE": {"type": "keyword"},
|
23
|
+
"CEQueue": {"type": "keyword"},
|
24
|
+
"BatchSystem": {"type": "keyword"},
|
19
25
|
"ModelName": {"type": "keyword"},
|
20
26
|
"Status": {"type": "keyword"},
|
21
27
|
"JobType": {"type": "keyword"},
|
@@ -1,9 +1,9 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
from sqlalchemy import
|
3
|
+
from sqlalchemy import insert
|
4
4
|
from uuid_utils import UUID
|
5
5
|
|
6
|
-
from diracx.db.sql.utils import BaseSQLDB
|
6
|
+
from diracx.db.sql.utils import BaseSQLDB
|
7
7
|
|
8
8
|
from .schema import Base as DummyDBBase
|
9
9
|
from .schema import Cars, Owners
|
@@ -22,18 +22,7 @@ class DummyDB(BaseSQLDB):
|
|
22
22
|
metadata = DummyDBBase.metadata
|
23
23
|
|
24
24
|
async def summary(self, group_by, search) -> list[dict[str, str | int]]:
|
25
|
-
|
26
|
-
|
27
|
-
stmt = select(*columns, func.count(Cars.license_plate).label("count"))
|
28
|
-
stmt = apply_search_filters(Cars.__table__.columns.__getitem__, stmt, search)
|
29
|
-
stmt = stmt.group_by(*columns)
|
30
|
-
|
31
|
-
# Execute the query
|
32
|
-
return [
|
33
|
-
dict(row._mapping)
|
34
|
-
async for row in (await self.conn.stream(stmt))
|
35
|
-
if row.count > 0 # type: ignore
|
36
|
-
]
|
25
|
+
return await self._summary(Cars, group_by, search)
|
37
26
|
|
38
27
|
async def insert_owner(self, name: str) -> int:
|
39
28
|
stmt = insert(Owners).values(name=name)
|
@@ -5,15 +5,16 @@ __all__ = ["JobDB"]
|
|
5
5
|
from datetime import datetime, timezone
|
6
6
|
from typing import TYPE_CHECKING, Any, Iterable
|
7
7
|
|
8
|
-
from sqlalchemy import bindparam, case, delete,
|
8
|
+
from sqlalchemy import bindparam, case, delete, literal, select, update
|
9
9
|
|
10
10
|
if TYPE_CHECKING:
|
11
11
|
from sqlalchemy.sql.elements import BindParameter
|
12
|
+
from sqlalchemy.sql import expression
|
12
13
|
|
13
14
|
from diracx.core.exceptions import InvalidQueryError
|
14
15
|
from diracx.core.models import JobCommand, SearchSpec, SortSpec
|
15
16
|
|
16
|
-
from ..utils import BaseSQLDB,
|
17
|
+
from ..utils import BaseSQLDB, _get_columns
|
17
18
|
from ..utils.functions import utcnow
|
18
19
|
from .schema import (
|
19
20
|
HeartBeatLoggingInfo,
|
@@ -25,17 +26,6 @@ from .schema import (
|
|
25
26
|
)
|
26
27
|
|
27
28
|
|
28
|
-
def _get_columns(table, parameters):
|
29
|
-
columns = [x for x in table.columns]
|
30
|
-
if parameters:
|
31
|
-
if unrecognised_parameters := set(parameters) - set(table.columns.keys()):
|
32
|
-
raise InvalidQueryError(
|
33
|
-
f"Unrecognised parameters requested {unrecognised_parameters}"
|
34
|
-
)
|
35
|
-
columns = [c for c in columns if c.name in parameters]
|
36
|
-
return columns
|
37
|
-
|
38
|
-
|
39
29
|
class JobDB(BaseSQLDB):
|
40
30
|
metadata = JobDBBase.metadata
|
41
31
|
|
@@ -54,20 +44,11 @@ class JobDB(BaseSQLDB):
|
|
54
44
|
# to find a way to make it dynamic
|
55
45
|
jdl_2_db_parameters = ["JobName", "JobType", "JobGroup"]
|
56
46
|
|
57
|
-
async def summary(
|
47
|
+
async def summary(
|
48
|
+
self, group_by: list[str], search: list[SearchSpec]
|
49
|
+
) -> list[dict[str, str | int]]:
|
58
50
|
"""Get a summary of the jobs."""
|
59
|
-
|
60
|
-
|
61
|
-
stmt = select(*columns, func.count(Jobs.job_id).label("count"))
|
62
|
-
stmt = apply_search_filters(Jobs.__table__.columns.__getitem__, stmt, search)
|
63
|
-
stmt = stmt.group_by(*columns)
|
64
|
-
|
65
|
-
# Execute the query
|
66
|
-
return [
|
67
|
-
dict(row._mapping)
|
68
|
-
async for row in (await self.conn.stream(stmt))
|
69
|
-
if row.count > 0 # type: ignore
|
70
|
-
]
|
51
|
+
return await self._summary(table=Jobs, group_by=group_by, search=search)
|
71
52
|
|
72
53
|
async def search(
|
73
54
|
self,
|
@@ -80,34 +61,15 @@ class JobDB(BaseSQLDB):
|
|
80
61
|
page: int | None = None,
|
81
62
|
) -> tuple[int, list[dict[Any, Any]]]:
|
82
63
|
"""Search for jobs in the database."""
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
stmt = stmt.distinct()
|
93
|
-
|
94
|
-
# Calculate total count before applying pagination
|
95
|
-
total_count_subquery = stmt.alias()
|
96
|
-
total_count_stmt = select(func.count()).select_from(total_count_subquery)
|
97
|
-
total = (await self.conn.execute(total_count_stmt)).scalar_one()
|
98
|
-
|
99
|
-
# Apply pagination
|
100
|
-
if page is not None:
|
101
|
-
if page < 1:
|
102
|
-
raise InvalidQueryError("Page must be a positive integer")
|
103
|
-
if per_page < 1:
|
104
|
-
raise InvalidQueryError("Per page must be a positive integer")
|
105
|
-
stmt = stmt.offset((page - 1) * per_page).limit(per_page)
|
106
|
-
|
107
|
-
# Execute the query
|
108
|
-
return total, [
|
109
|
-
dict(row._mapping) async for row in (await self.conn.stream(stmt))
|
110
|
-
]
|
64
|
+
return await self._search(
|
65
|
+
table=Jobs,
|
66
|
+
parameters=parameters,
|
67
|
+
search=search,
|
68
|
+
sorts=sorts,
|
69
|
+
distinct=distinct,
|
70
|
+
per_page=per_page,
|
71
|
+
page=page,
|
72
|
+
)
|
111
73
|
|
112
74
|
async def create_job(self, compressed_original_jdl: str):
|
113
75
|
"""Used to insert a new job with original JDL. Returns inserted job id."""
|
@@ -167,27 +129,14 @@ class JobDB(BaseSQLDB):
|
|
167
129
|
],
|
168
130
|
)
|
169
131
|
|
170
|
-
@staticmethod
|
171
|
-
def _set_job_attributes_fix_value(column, value):
|
172
|
-
"""Apply corrections to the values before inserting them into the database.
|
173
|
-
|
174
|
-
TODO: Move this logic into the sqlalchemy model.
|
175
|
-
"""
|
176
|
-
if column == "VerifiedFlag":
|
177
|
-
value_str = str(value)
|
178
|
-
if value_str in ("True", "False"):
|
179
|
-
return value_str
|
180
|
-
if column == "AccountedFlag":
|
181
|
-
value_str = str(value)
|
182
|
-
if value_str in ("True", "False", "Failed"):
|
183
|
-
return value_str
|
184
|
-
else:
|
185
|
-
return value
|
186
|
-
raise NotImplementedError(f"Unrecognized value for column {column}: {value}")
|
187
|
-
|
188
132
|
async def set_job_attributes(self, job_data):
|
189
133
|
"""Update the parameters of the given jobs."""
|
190
134
|
# TODO: add myDate and force parameters.
|
135
|
+
|
136
|
+
if not job_data:
|
137
|
+
# nothing to do!
|
138
|
+
raise ValueError("job_data is empty")
|
139
|
+
|
191
140
|
for job_id in job_data.keys():
|
192
141
|
if "Status" in job_data[job_id]:
|
193
142
|
job_data[job_id].update(
|
@@ -199,7 +148,11 @@ class JobDB(BaseSQLDB):
|
|
199
148
|
*[
|
200
149
|
(
|
201
150
|
Jobs.__table__.c.JobID == job_id,
|
202
|
-
|
151
|
+
# Since the setting of the new column value is obscured by the CASE statement,
|
152
|
+
# ensure that SQLAlchemy renders the new column value with the correct type
|
153
|
+
literal(attrs[column], type_=Jobs.__table__.c[column].type)
|
154
|
+
if not isinstance(attrs[column], expression.FunctionElement)
|
155
|
+
else attrs[column],
|
203
156
|
)
|
204
157
|
for job_id, attrs in job_data.items()
|
205
158
|
if column in attrs
|
@@ -232,7 +185,7 @@ class JobDB(BaseSQLDB):
|
|
232
185
|
async def set_job_commands(self, commands: list[tuple[int, str, str]]) -> None:
|
233
186
|
"""Store a command to be passed to the job together with the next heart beat."""
|
234
187
|
await self.conn.execute(
|
235
|
-
insert(
|
188
|
+
JobCommands.__table__.insert(),
|
236
189
|
[
|
237
190
|
{
|
238
191
|
"JobID": job_id,
|
@@ -2,7 +2,6 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
import sqlalchemy.types as types
|
4
4
|
from sqlalchemy import (
|
5
|
-
DateTime,
|
6
5
|
ForeignKey,
|
7
6
|
Index,
|
8
7
|
Integer,
|
@@ -11,6 +10,8 @@ from sqlalchemy import (
|
|
11
10
|
)
|
12
11
|
from sqlalchemy.orm import declarative_base
|
13
12
|
|
13
|
+
from diracx.db.sql.utils.types import SmarterDateTime
|
14
|
+
|
14
15
|
from ..utils import Column, EnumBackedBool, NullColumn
|
15
16
|
|
16
17
|
JobDBBase = declarative_base()
|
@@ -19,11 +20,8 @@ JobDBBase = declarative_base()
|
|
19
20
|
class AccountedFlagEnum(types.TypeDecorator):
|
20
21
|
"""Maps a ``AccountedFlagEnum()`` column to True/False in Python."""
|
21
22
|
|
22
|
-
impl = types.Enum
|
23
|
-
cache_ok
|
24
|
-
|
25
|
-
def __init__(self) -> None:
|
26
|
-
super().__init__("True", "False", "Failed")
|
23
|
+
impl = types.Enum("True", "False", "Failed", name="accounted_flag_enum")
|
24
|
+
cache_ok = True
|
27
25
|
|
28
26
|
def process_bind_param(self, value, dialect) -> str:
|
29
27
|
if value is True:
|
@@ -63,12 +61,30 @@ class Jobs(JobDBBase):
|
|
63
61
|
owner = Column("Owner", String(64), default="Unknown")
|
64
62
|
owner_group = Column("OwnerGroup", String(128), default="Unknown")
|
65
63
|
vo = Column("VO", String(32))
|
66
|
-
submission_time = NullColumn(
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
64
|
+
submission_time = NullColumn(
|
65
|
+
"SubmissionTime",
|
66
|
+
SmarterDateTime(),
|
67
|
+
)
|
68
|
+
reschedule_time = NullColumn(
|
69
|
+
"RescheduleTime",
|
70
|
+
SmarterDateTime(),
|
71
|
+
)
|
72
|
+
last_update_time = NullColumn(
|
73
|
+
"LastUpdateTime",
|
74
|
+
SmarterDateTime(),
|
75
|
+
)
|
76
|
+
start_exec_time = NullColumn(
|
77
|
+
"StartExecTime",
|
78
|
+
SmarterDateTime(),
|
79
|
+
)
|
80
|
+
heart_beat_time = NullColumn(
|
81
|
+
"HeartBeatTime",
|
82
|
+
SmarterDateTime(),
|
83
|
+
)
|
84
|
+
end_exec_time = NullColumn(
|
85
|
+
"EndExecTime",
|
86
|
+
SmarterDateTime(),
|
87
|
+
)
|
72
88
|
status = Column("Status", String(32), default="Received")
|
73
89
|
minor_status = Column("MinorStatus", String(128), default="Unknown")
|
74
90
|
application_status = Column("ApplicationStatus", String(255), default="Unknown")
|
@@ -143,7 +159,11 @@ class HeartBeatLoggingInfo(JobDBBase):
|
|
143
159
|
)
|
144
160
|
name = Column("Name", String(100), primary_key=True)
|
145
161
|
value = Column("Value", Text)
|
146
|
-
heart_beat_time = Column(
|
162
|
+
heart_beat_time = Column(
|
163
|
+
"HeartBeatTime",
|
164
|
+
SmarterDateTime(),
|
165
|
+
primary_key=True,
|
166
|
+
)
|
147
167
|
|
148
168
|
|
149
169
|
class JobCommands(JobDBBase):
|
@@ -154,5 +174,12 @@ class JobCommands(JobDBBase):
|
|
154
174
|
command = Column("Command", String(100))
|
155
175
|
arguments = Column("Arguments", String(100))
|
156
176
|
status = Column("Status", String(64), default="Received")
|
157
|
-
reception_time = Column(
|
158
|
-
|
177
|
+
reception_time = Column(
|
178
|
+
"ReceptionTime",
|
179
|
+
SmarterDateTime(),
|
180
|
+
primary_key=True,
|
181
|
+
)
|
182
|
+
execution_time = NullColumn(
|
183
|
+
"ExecutionTime",
|
184
|
+
SmarterDateTime(),
|
185
|
+
)
|
@@ -1,7 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
from sqlalchemy import (
|
4
|
-
DateTime,
|
5
4
|
Double,
|
6
5
|
Index,
|
7
6
|
Integer,
|
@@ -10,6 +9,8 @@ from sqlalchemy import (
|
|
10
9
|
)
|
11
10
|
from sqlalchemy.orm import declarative_base
|
12
11
|
|
12
|
+
from diracx.db.sql.utils.types import SmarterDateTime
|
13
|
+
|
13
14
|
from ..utils import Column, EnumBackedBool, NullColumn
|
14
15
|
|
15
16
|
PilotAgentsDBBase = declarative_base()
|
@@ -29,8 +30,8 @@ class PilotAgents(PilotAgentsDBBase):
|
|
29
30
|
vo = Column("VO", String(128))
|
30
31
|
grid_type = Column("GridType", String(32), default="LCG")
|
31
32
|
benchmark = Column("BenchMark", Double, default=0.0)
|
32
|
-
submission_time = NullColumn("SubmissionTime",
|
33
|
-
last_update_time = NullColumn("LastUpdateTime",
|
33
|
+
submission_time = NullColumn("SubmissionTime", SmarterDateTime)
|
34
|
+
last_update_time = NullColumn("LastUpdateTime", SmarterDateTime)
|
34
35
|
status = Column("Status", String(32), default="Unknown")
|
35
36
|
status_reason = Column("StatusReason", String(255), default="Unknown")
|
36
37
|
accounting_sent = Column("AccountingSent", EnumBackedBool(), default=False)
|
@@ -47,7 +48,7 @@ class JobToPilotMapping(PilotAgentsDBBase):
|
|
47
48
|
|
48
49
|
pilot_id = Column("PilotID", Integer, primary_key=True)
|
49
50
|
job_id = Column("JobID", Integer, primary_key=True)
|
50
|
-
start_time = Column("StartTime",
|
51
|
+
start_time = Column("StartTime", SmarterDateTime)
|
51
52
|
|
52
53
|
__table_args__ = (Index("JobID", "JobID"), Index("PilotID", "PilotID"))
|
53
54
|
|
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
3
3
|
from .base import (
|
4
4
|
BaseSQLDB,
|
5
5
|
SQLDBUnavailableError,
|
6
|
+
_get_columns,
|
6
7
|
apply_search_filters,
|
7
8
|
apply_sort_constraints,
|
8
9
|
)
|
@@ -10,6 +11,7 @@ from .functions import hash, substract_date, utcnow
|
|
10
11
|
from .types import Column, DateNowColumn, EnumBackedBool, EnumColumn, NullColumn
|
11
12
|
|
12
13
|
__all__ = (
|
14
|
+
"_get_columns",
|
13
15
|
"utcnow",
|
14
16
|
"Column",
|
15
17
|
"NullColumn",
|
@@ -8,18 +8,23 @@ from abc import ABCMeta
|
|
8
8
|
from collections.abc import AsyncIterator
|
9
9
|
from contextvars import ContextVar
|
10
10
|
from datetime import datetime
|
11
|
-
from typing import Self, cast
|
11
|
+
from typing import Any, Self, cast
|
12
12
|
|
13
13
|
from pydantic import TypeAdapter
|
14
|
-
from sqlalchemy import DateTime, MetaData, select
|
14
|
+
from sqlalchemy import DateTime, MetaData, func, select
|
15
15
|
from sqlalchemy.exc import OperationalError
|
16
16
|
from sqlalchemy.ext.asyncio import AsyncConnection, AsyncEngine, create_async_engine
|
17
17
|
|
18
18
|
from diracx.core.exceptions import InvalidQueryError
|
19
19
|
from diracx.core.extensions import select_from_extension
|
20
|
-
from diracx.core.models import
|
20
|
+
from diracx.core.models import (
|
21
|
+
SearchSpec,
|
22
|
+
SortDirection,
|
23
|
+
SortSpec,
|
24
|
+
)
|
21
25
|
from diracx.core.settings import SqlalchemyDsn
|
22
26
|
from diracx.db.exceptions import DBUnavailableError
|
27
|
+
from diracx.db.sql.utils.types import SmarterDateTime
|
23
28
|
|
24
29
|
from .functions import date_trunc
|
25
30
|
|
@@ -227,6 +232,71 @@ class BaseSQLDB(metaclass=ABCMeta):
|
|
227
232
|
except OperationalError as e:
|
228
233
|
raise SQLDBUnavailableError("Cannot ping the DB") from e
|
229
234
|
|
235
|
+
async def _search(
|
236
|
+
self,
|
237
|
+
table: Any,
|
238
|
+
parameters: list[str] | None,
|
239
|
+
search: list[SearchSpec],
|
240
|
+
sorts: list[SortSpec],
|
241
|
+
*,
|
242
|
+
distinct: bool = False,
|
243
|
+
per_page: int = 100,
|
244
|
+
page: int | None = None,
|
245
|
+
) -> tuple[int, list[dict[str, Any]]]:
|
246
|
+
"""Search for elements in a table."""
|
247
|
+
# Find which columns to select
|
248
|
+
columns = _get_columns(table.__table__, parameters)
|
249
|
+
|
250
|
+
stmt = select(*columns)
|
251
|
+
|
252
|
+
stmt = apply_search_filters(table.__table__.columns.__getitem__, stmt, search)
|
253
|
+
stmt = apply_sort_constraints(table.__table__.columns.__getitem__, stmt, sorts)
|
254
|
+
|
255
|
+
if distinct:
|
256
|
+
stmt = stmt.distinct()
|
257
|
+
|
258
|
+
# Calculate total count before applying pagination
|
259
|
+
total_count_subquery = stmt.alias()
|
260
|
+
total_count_stmt = select(func.count()).select_from(total_count_subquery)
|
261
|
+
total = (await self.conn.execute(total_count_stmt)).scalar_one()
|
262
|
+
|
263
|
+
# Apply pagination
|
264
|
+
if page is not None:
|
265
|
+
if page < 1:
|
266
|
+
raise InvalidQueryError("Page must be a positive integer")
|
267
|
+
if per_page < 1:
|
268
|
+
raise InvalidQueryError("Per page must be a positive integer")
|
269
|
+
stmt = stmt.offset((page - 1) * per_page).limit(per_page)
|
270
|
+
|
271
|
+
# Execute the query
|
272
|
+
return total, [
|
273
|
+
dict(row._mapping) async for row in (await self.conn.stream(stmt))
|
274
|
+
]
|
275
|
+
|
276
|
+
async def _summary(
|
277
|
+
self, table: Any, group_by: list[str], search: list[SearchSpec]
|
278
|
+
) -> list[dict[str, str | int]]:
|
279
|
+
"""Get a summary of the elements of a table."""
|
280
|
+
columns = _get_columns(table.__table__, group_by)
|
281
|
+
|
282
|
+
pk_columns = list(table.__table__.primary_key.columns)
|
283
|
+
if not pk_columns:
|
284
|
+
raise ValueError(
|
285
|
+
"Model has no primary key and no count_column was provided."
|
286
|
+
)
|
287
|
+
count_col = pk_columns[0]
|
288
|
+
|
289
|
+
stmt = select(*columns, func.count(count_col).label("count"))
|
290
|
+
stmt = apply_search_filters(table.__table__.columns.__getitem__, stmt, search)
|
291
|
+
stmt = stmt.group_by(*columns)
|
292
|
+
|
293
|
+
# Execute the query
|
294
|
+
return [
|
295
|
+
dict(row._mapping)
|
296
|
+
async for row in (await self.conn.stream(stmt))
|
297
|
+
if row.count > 0 # type: ignore
|
298
|
+
]
|
299
|
+
|
230
300
|
|
231
301
|
def find_time_resolution(value):
|
232
302
|
if isinstance(value, datetime):
|
@@ -258,6 +328,17 @@ def find_time_resolution(value):
|
|
258
328
|
raise InvalidQueryError(f"Cannot parse {value=}")
|
259
329
|
|
260
330
|
|
331
|
+
def _get_columns(table, parameters):
|
332
|
+
columns = [x for x in table.columns]
|
333
|
+
if parameters:
|
334
|
+
if unrecognised_parameters := set(parameters) - set(table.columns.keys()):
|
335
|
+
raise InvalidQueryError(
|
336
|
+
f"Unrecognised parameters requested {unrecognised_parameters}"
|
337
|
+
)
|
338
|
+
columns = [c for c in columns if c.name in parameters]
|
339
|
+
return columns
|
340
|
+
|
341
|
+
|
261
342
|
def apply_search_filters(column_mapping, stmt, search):
|
262
343
|
for query in search:
|
263
344
|
try:
|
@@ -265,7 +346,7 @@ def apply_search_filters(column_mapping, stmt, search):
|
|
265
346
|
except KeyError as e:
|
266
347
|
raise InvalidQueryError(f"Unknown column {query['parameter']}") from e
|
267
348
|
|
268
|
-
if isinstance(column.type, DateTime):
|
349
|
+
if isinstance(column.type, (DateTime, SmarterDateTime)):
|
269
350
|
if "value" in query and isinstance(query["value"], str):
|
270
351
|
resolution, value = find_time_resolution(query["value"])
|
271
352
|
if resolution:
|
@@ -300,6 +381,15 @@ def apply_search_filters(column_mapping, stmt, search):
|
|
300
381
|
expr = column.like(query["value"])
|
301
382
|
elif query["operator"] in "ilike":
|
302
383
|
expr = column.ilike(query["value"])
|
384
|
+
elif query["operator"] == "not like":
|
385
|
+
expr = column.not_like(query["value"])
|
386
|
+
elif query["operator"] == "regex":
|
387
|
+
# We check the regex validity here
|
388
|
+
try:
|
389
|
+
re.compile(query["value"])
|
390
|
+
except re.error as e:
|
391
|
+
raise InvalidQueryError(f"Invalid regex {query['value']}") from e
|
392
|
+
expr = column.regexp_match(query["value"])
|
303
393
|
else:
|
304
394
|
raise InvalidQueryError(f"Unknown filter {query=}")
|
305
395
|
stmt = stmt.where(expr)
|
@@ -0,0 +1,137 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from datetime import datetime
|
4
|
+
from functools import partial
|
5
|
+
from zoneinfo import ZoneInfo
|
6
|
+
|
7
|
+
import sqlalchemy.types as types
|
8
|
+
from sqlalchemy import Column as RawColumn
|
9
|
+
from sqlalchemy import DateTime, Enum
|
10
|
+
|
11
|
+
from .functions import utcnow
|
12
|
+
|
13
|
+
Column: partial[RawColumn] = partial(RawColumn, nullable=False)
|
14
|
+
NullColumn: partial[RawColumn] = partial(RawColumn, nullable=True)
|
15
|
+
DateNowColumn = partial(Column, type_=DateTime(timezone=True), server_default=utcnow())
|
16
|
+
|
17
|
+
|
18
|
+
def EnumColumn(name, enum_type, **kwargs): # noqa: N802
|
19
|
+
return Column(name, Enum(enum_type, native_enum=False, length=16), **kwargs)
|
20
|
+
|
21
|
+
|
22
|
+
class EnumBackedBool(types.TypeDecorator):
|
23
|
+
"""Maps a ``EnumBackedBool()`` column to True/False in Python."""
|
24
|
+
|
25
|
+
impl = types.Enum("True", "False", name="enum_backed_bool")
|
26
|
+
cache_ok = True
|
27
|
+
|
28
|
+
def process_bind_param(self, value, dialect) -> str:
|
29
|
+
if value is True:
|
30
|
+
return "True"
|
31
|
+
elif value is False:
|
32
|
+
return "False"
|
33
|
+
else:
|
34
|
+
raise NotImplementedError(value, dialect)
|
35
|
+
|
36
|
+
def process_result_value(self, value, dialect) -> bool:
|
37
|
+
if value == "True":
|
38
|
+
return True
|
39
|
+
elif value == "False":
|
40
|
+
return False
|
41
|
+
else:
|
42
|
+
raise NotImplementedError(f"Unknown {value=}")
|
43
|
+
|
44
|
+
|
45
|
+
class SmarterDateTime(types.TypeDecorator):
|
46
|
+
"""A DateTime type that also accepts ISO8601 strings.
|
47
|
+
|
48
|
+
Takes into account converting timezone aware datetime objects into
|
49
|
+
naive form and back when needed.
|
50
|
+
|
51
|
+
"""
|
52
|
+
|
53
|
+
impl = DateTime()
|
54
|
+
cache_ok = True
|
55
|
+
|
56
|
+
def __init__(
|
57
|
+
self,
|
58
|
+
stored_tz: ZoneInfo | None = ZoneInfo("UTC"),
|
59
|
+
returned_tz: ZoneInfo = ZoneInfo("UTC"),
|
60
|
+
stored_naive_sqlite=True,
|
61
|
+
stored_naive_mysql=True,
|
62
|
+
stored_naive_postgres=False, # Forces timezone-awareness
|
63
|
+
):
|
64
|
+
self._stored_naive_dialect = {
|
65
|
+
"sqlite": stored_naive_sqlite,
|
66
|
+
"mysql": stored_naive_mysql,
|
67
|
+
"postgres": stored_naive_postgres,
|
68
|
+
}
|
69
|
+
self._stored_tz: ZoneInfo | None = stored_tz # None = Local timezone
|
70
|
+
self._returned_tz: ZoneInfo = returned_tz
|
71
|
+
|
72
|
+
def _stored_naive(self, dialect):
|
73
|
+
if dialect.name not in self._stored_naive_dialect:
|
74
|
+
raise NotImplementedError(dialect.name)
|
75
|
+
return self._stored_naive_dialect.get(dialect.name)
|
76
|
+
|
77
|
+
def process_bind_param(self, value, dialect):
|
78
|
+
if value is None:
|
79
|
+
return None
|
80
|
+
|
81
|
+
if isinstance(value, str):
|
82
|
+
try:
|
83
|
+
value: datetime = datetime.fromisoformat(value)
|
84
|
+
except ValueError as err:
|
85
|
+
raise ValueError(f"Unable to parse datetime string: {value}") from err
|
86
|
+
|
87
|
+
if not isinstance(value, datetime):
|
88
|
+
raise ValueError(f"Expected datetime or ISO8601 string, but got {value!r}")
|
89
|
+
|
90
|
+
if not value.tzinfo:
|
91
|
+
raise ValueError(
|
92
|
+
f"Provided timestamp {value=} has no tzinfo -"
|
93
|
+
" this is problematic and may cause inconsistencies in stored timestamps.\n"
|
94
|
+
" Please always work with tz-aware datetimes / attach tzinfo to your datetime objects:"
|
95
|
+
" e.g. datetime.now(tz=timezone.utc) or use datetime_obj.astimezone() with no arguments if you need to "
|
96
|
+
"attach the local timezone to a local naive timestamp."
|
97
|
+
)
|
98
|
+
|
99
|
+
# Check that we need to convert the timezone to match self._stored_tz timezone:
|
100
|
+
if self._stored_naive(dialect):
|
101
|
+
# if self._stored_tz is None, we use our local/system timezone.
|
102
|
+
stored_tz = self._stored_tz
|
103
|
+
|
104
|
+
# astimezone converts to the stored timezone (local timezone if None)
|
105
|
+
# replace strips the TZ info --> naive datetime object
|
106
|
+
value = value.astimezone(tz=stored_tz).replace(tzinfo=None)
|
107
|
+
|
108
|
+
return value
|
109
|
+
|
110
|
+
def process_result_value(self, value, dialect):
|
111
|
+
if value is None:
|
112
|
+
return None
|
113
|
+
if not isinstance(value, datetime):
|
114
|
+
raise NotImplementedError(f"{value=} not a datetime object")
|
115
|
+
|
116
|
+
if self._stored_naive(dialect):
|
117
|
+
# Here we add back the tzinfo to the naive timestamp
|
118
|
+
# from the DB to make it aware again.
|
119
|
+
if value.tzinfo is None:
|
120
|
+
# we are definitely given a naive timestamp, so handle it.
|
121
|
+
# add back the timezone info if stored_tz is set
|
122
|
+
if self._stored_tz:
|
123
|
+
value = value.replace(tzinfo=self._stored_tz)
|
124
|
+
else:
|
125
|
+
# if stored as a local time, add back the system timezone info...
|
126
|
+
value = value.astimezone()
|
127
|
+
else:
|
128
|
+
raise ValueError(
|
129
|
+
f"stored_naive is True for {dialect.name=}, but the database engine returned "
|
130
|
+
"a tz-aware datetime. You need to check the SQLAlchemy model is consistent with the DB schema."
|
131
|
+
)
|
132
|
+
|
133
|
+
# finally, convert the datetime according to the "returned_tz"
|
134
|
+
value = value.astimezone(self._returned_tz)
|
135
|
+
|
136
|
+
# phew...
|
137
|
+
return value
|
@@ -1,6 +1,10 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
from datetime import datetime
|
4
|
+
from zoneinfo import ZoneInfo
|
5
|
+
|
3
6
|
import pytest
|
7
|
+
import sqlalchemy
|
4
8
|
from sqlalchemy.exc import IntegrityError
|
5
9
|
|
6
10
|
from diracx.core.exceptions import InvalidQueryError
|
@@ -47,6 +51,55 @@ async def populated_job_db(job_db):
|
|
47
51
|
yield job_db
|
48
52
|
|
49
53
|
|
54
|
+
async def test_bad_naive_datetime_used(populated_job_db):
|
55
|
+
async with populated_job_db as db:
|
56
|
+
compressed_jdl = "CompressedJDL0001111BadJob"
|
57
|
+
job_id = await db.create_job(compressed_jdl)
|
58
|
+
jobs_to_insert = {}
|
59
|
+
jobs_to_insert[job_id] = {
|
60
|
+
"JobID": job_id,
|
61
|
+
"Status": "New",
|
62
|
+
"Owner": "owner0101010101",
|
63
|
+
"OwnerGroup": "owner_group1",
|
64
|
+
"VO": "lhcb",
|
65
|
+
"HeartBeatTime": datetime.now().replace(tzinfo=None), # noqa
|
66
|
+
}
|
67
|
+
with pytest.raises(sqlalchemy.exc.StatementError):
|
68
|
+
await db.insert_job_attributes(jobs_to_insert) # should complain
|
69
|
+
|
70
|
+
|
71
|
+
async def test_timezone_converted_back_to_utc(populated_job_db):
|
72
|
+
current_utc_dt = datetime.now(tz=ZoneInfo("UTC"))
|
73
|
+
|
74
|
+
async with populated_job_db as db:
|
75
|
+
compressed_jdl = "CompressedJDL0001111BadJob"
|
76
|
+
job_id = await db.create_job(compressed_jdl)
|
77
|
+
jobs_to_insert = {}
|
78
|
+
jobs_to_insert[job_id] = {
|
79
|
+
"JobID": job_id,
|
80
|
+
"Status": "New",
|
81
|
+
"Owner": "owner0101010101",
|
82
|
+
"OwnerGroup": "owner_group1",
|
83
|
+
"VO": "lhcb",
|
84
|
+
"HeartBeatTime": current_utc_dt.astimezone(ZoneInfo("Asia/Tokyo")),
|
85
|
+
}
|
86
|
+
await db.insert_job_attributes(jobs_to_insert)
|
87
|
+
|
88
|
+
total, result = await db.search(
|
89
|
+
["JobID", "HeartBeatTime"],
|
90
|
+
[
|
91
|
+
ScalarSearchSpec(
|
92
|
+
parameter="JobID",
|
93
|
+
operator=ScalarSearchOperator.EQUAL,
|
94
|
+
value=int(job_id),
|
95
|
+
)
|
96
|
+
],
|
97
|
+
[],
|
98
|
+
)
|
99
|
+
assert total == 1
|
100
|
+
assert result[0]["HeartBeatTime"] == current_utc_dt, result
|
101
|
+
|
102
|
+
|
50
103
|
async def test_search_parameters(populated_job_db):
|
51
104
|
"""Test that we can search specific parameters for jobs in the database."""
|
52
105
|
async with populated_job_db as job_db:
|
@@ -187,6 +240,55 @@ async def test_search_conditions(populated_job_db):
|
|
187
240
|
assert total == 0
|
188
241
|
assert not result
|
189
242
|
|
243
|
+
# Search for a specific scalar condition: Owner not like 'owner1%'
|
244
|
+
condition = ScalarSearchSpec(
|
245
|
+
parameter="Owner", operator=ScalarSearchOperator.NOT_LIKE, value="owner1%"
|
246
|
+
)
|
247
|
+
total, result = await job_db.search([], [condition], [])
|
248
|
+
assert total == 100 - 11
|
249
|
+
assert result
|
250
|
+
assert len(result) == 100 - 11
|
251
|
+
assert all(not r["Owner"].startswith("owner1") for r in result)
|
252
|
+
|
253
|
+
# Search for a specific scalar condition: OwnerGroup not like 'owner_group2'
|
254
|
+
condition = ScalarSearchSpec(
|
255
|
+
parameter="OwnerGroup",
|
256
|
+
operator=ScalarSearchOperator.NOT_LIKE,
|
257
|
+
value="owner_group2",
|
258
|
+
)
|
259
|
+
total, result = await job_db.search([], [condition], [])
|
260
|
+
assert total == 100 - 50
|
261
|
+
assert result
|
262
|
+
assert len(result) == 100 - 50
|
263
|
+
assert all(not r["OwnerGroup"] == "owner_group2" for r in result)
|
264
|
+
|
265
|
+
# Search for a specific scalar condition: Owner regex '^owner\d+$'
|
266
|
+
condition = ScalarSearchSpec(
|
267
|
+
parameter="Owner", operator=ScalarSearchOperator.REGEX, value="^owner\\d+$"
|
268
|
+
)
|
269
|
+
total, result = await job_db.search([], [condition], [])
|
270
|
+
assert total == 100
|
271
|
+
assert result
|
272
|
+
assert len(result) == 100
|
273
|
+
|
274
|
+
# Search for a specific scalar condition: JobID regex 'owner[0-3]+'
|
275
|
+
# owner0, owner1, owner2, owner3 (4 jobs)
|
276
|
+
# owner11 -> owner39 (30 jobs)
|
277
|
+
condition = ScalarSearchSpec(
|
278
|
+
parameter="Owner", operator=ScalarSearchOperator.REGEX, value="owner[0-3]+"
|
279
|
+
)
|
280
|
+
total, result = await job_db.search([], [condition], [])
|
281
|
+
assert total == 34
|
282
|
+
assert result
|
283
|
+
assert len(result) == 34
|
284
|
+
|
285
|
+
# Search for a specific scalar condition: JobID regex 'owner[1-'
|
286
|
+
condition = ScalarSearchSpec(
|
287
|
+
parameter="Owner", operator=ScalarSearchOperator.REGEX, value="owner[1-"
|
288
|
+
)
|
289
|
+
with pytest.raises(InvalidQueryError):
|
290
|
+
await job_db.search([], [condition], [])
|
291
|
+
|
190
292
|
|
191
293
|
async def test_search_sorts(populated_job_db):
|
192
294
|
"""Test that we can search for jobs in the database and sort the results."""
|
@@ -129,7 +129,7 @@ async def test_failed_transaction(dummy_db):
|
|
129
129
|
|
130
130
|
# The connection is created when the context manager is entered
|
131
131
|
# This is our transaction
|
132
|
-
with pytest.raises(
|
132
|
+
with pytest.raises(InvalidQueryError):
|
133
133
|
async with dummy_db as dummy_db:
|
134
134
|
assert dummy_db.conn
|
135
135
|
|
@@ -1,43 +0,0 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
from functools import partial
|
4
|
-
|
5
|
-
import sqlalchemy.types as types
|
6
|
-
from sqlalchemy import Column as RawColumn
|
7
|
-
from sqlalchemy import DateTime, Enum
|
8
|
-
|
9
|
-
from .functions import utcnow
|
10
|
-
|
11
|
-
Column: partial[RawColumn] = partial(RawColumn, nullable=False)
|
12
|
-
NullColumn: partial[RawColumn] = partial(RawColumn, nullable=True)
|
13
|
-
DateNowColumn = partial(Column, type_=DateTime(timezone=True), server_default=utcnow())
|
14
|
-
|
15
|
-
|
16
|
-
def EnumColumn(name, enum_type, **kwargs): # noqa: N802
|
17
|
-
return Column(name, Enum(enum_type, native_enum=False, length=16), **kwargs)
|
18
|
-
|
19
|
-
|
20
|
-
class EnumBackedBool(types.TypeDecorator):
|
21
|
-
"""Maps a ``EnumBackedBool()`` column to True/False in Python."""
|
22
|
-
|
23
|
-
impl = types.Enum
|
24
|
-
cache_ok: bool = True
|
25
|
-
|
26
|
-
def __init__(self) -> None:
|
27
|
-
super().__init__("True", "False")
|
28
|
-
|
29
|
-
def process_bind_param(self, value, dialect) -> str:
|
30
|
-
if value is True:
|
31
|
-
return "True"
|
32
|
-
elif value is False:
|
33
|
-
return "False"
|
34
|
-
else:
|
35
|
-
raise NotImplementedError(value, dialect)
|
36
|
-
|
37
|
-
def process_result_value(self, value, dialect) -> bool:
|
38
|
-
if value == "True":
|
39
|
-
return True
|
40
|
-
elif value == "False":
|
41
|
-
return False
|
42
|
-
else:
|
43
|
-
raise NotImplementedError(f"Unknown {value=}")
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|