mlrun 1.10.0rc40__py3-none-any.whl → 1.11.0rc16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +3 -2
- mlrun/__main__.py +0 -4
- mlrun/artifacts/dataset.py +2 -2
- mlrun/artifacts/plots.py +1 -1
- mlrun/{model_monitoring/db/tsdb/tdengine → auth}/__init__.py +2 -3
- mlrun/auth/nuclio.py +89 -0
- mlrun/auth/providers.py +429 -0
- mlrun/auth/utils.py +415 -0
- mlrun/common/constants.py +7 -0
- mlrun/common/model_monitoring/helpers.py +41 -4
- mlrun/common/runtimes/constants.py +28 -0
- mlrun/common/schemas/__init__.py +13 -3
- mlrun/common/schemas/alert.py +2 -2
- mlrun/common/schemas/api_gateway.py +3 -0
- mlrun/common/schemas/auth.py +10 -10
- mlrun/common/schemas/client_spec.py +4 -0
- mlrun/common/schemas/constants.py +25 -0
- mlrun/common/schemas/frontend_spec.py +1 -8
- mlrun/common/schemas/function.py +24 -0
- mlrun/common/schemas/hub.py +3 -2
- mlrun/common/schemas/model_monitoring/__init__.py +1 -1
- mlrun/common/schemas/model_monitoring/constants.py +2 -2
- mlrun/common/schemas/secret.py +17 -2
- mlrun/common/secrets.py +95 -1
- mlrun/common/types.py +10 -10
- mlrun/config.py +53 -15
- mlrun/data_types/infer.py +2 -2
- mlrun/datastore/__init__.py +2 -3
- mlrun/datastore/base.py +274 -10
- mlrun/datastore/datastore.py +1 -1
- mlrun/datastore/datastore_profile.py +49 -17
- mlrun/datastore/model_provider/huggingface_provider.py +6 -2
- mlrun/datastore/model_provider/model_provider.py +2 -2
- mlrun/datastore/model_provider/openai_provider.py +2 -2
- mlrun/datastore/s3.py +15 -16
- mlrun/datastore/sources.py +1 -1
- mlrun/datastore/store_resources.py +4 -4
- mlrun/datastore/storeytargets.py +16 -10
- mlrun/datastore/targets.py +1 -1
- mlrun/datastore/utils.py +16 -3
- mlrun/datastore/v3io.py +1 -1
- mlrun/db/base.py +36 -12
- mlrun/db/httpdb.py +316 -101
- mlrun/db/nopdb.py +29 -11
- mlrun/errors.py +4 -2
- mlrun/execution.py +11 -12
- mlrun/feature_store/api.py +1 -1
- mlrun/feature_store/common.py +1 -1
- mlrun/feature_store/feature_vector_utils.py +1 -1
- mlrun/feature_store/steps.py +8 -6
- mlrun/frameworks/_common/utils.py +3 -3
- mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -1
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
- mlrun/frameworks/_ml_common/utils.py +2 -1
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +4 -3
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +2 -1
- mlrun/frameworks/onnx/dataset.py +2 -1
- mlrun/frameworks/onnx/mlrun_interface.py +2 -1
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +5 -4
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +2 -1
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +2 -1
- mlrun/frameworks/pytorch/utils.py +2 -1
- mlrun/frameworks/sklearn/metric.py +2 -1
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +5 -4
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +2 -1
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +2 -1
- mlrun/hub/__init__.py +37 -0
- mlrun/hub/base.py +142 -0
- mlrun/hub/module.py +67 -76
- mlrun/hub/step.py +113 -0
- mlrun/launcher/base.py +2 -1
- mlrun/launcher/local.py +2 -1
- mlrun/model.py +12 -2
- mlrun/model_monitoring/__init__.py +0 -1
- mlrun/model_monitoring/api.py +2 -2
- mlrun/model_monitoring/applications/base.py +20 -6
- mlrun/model_monitoring/applications/context.py +1 -0
- mlrun/model_monitoring/controller.py +7 -17
- mlrun/model_monitoring/db/_schedules.py +2 -16
- mlrun/model_monitoring/db/_stats.py +2 -13
- mlrun/model_monitoring/db/tsdb/__init__.py +9 -7
- mlrun/model_monitoring/db/tsdb/base.py +2 -4
- mlrun/model_monitoring/db/tsdb/preaggregate.py +234 -0
- mlrun/model_monitoring/db/tsdb/stream_graph_steps.py +63 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_metrics_queries.py +414 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_predictions_queries.py +376 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_results_queries.py +590 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connection.py +434 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connector.py +541 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_operations.py +808 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_schema.py +502 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream.py +163 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream_graph_steps.py +60 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_dataframe_processor.py +141 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_query_builder.py +585 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/writer_graph_steps.py +73 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +4 -6
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +147 -79
- mlrun/model_monitoring/features_drift_table.py +2 -1
- mlrun/model_monitoring/helpers.py +2 -1
- mlrun/model_monitoring/stream_processing.py +18 -16
- mlrun/model_monitoring/writer.py +4 -3
- mlrun/package/__init__.py +2 -1
- mlrun/platforms/__init__.py +0 -44
- mlrun/platforms/iguazio.py +1 -1
- mlrun/projects/operations.py +11 -10
- mlrun/projects/project.py +81 -82
- mlrun/run.py +4 -7
- mlrun/runtimes/__init__.py +2 -204
- mlrun/runtimes/base.py +89 -21
- mlrun/runtimes/constants.py +225 -0
- mlrun/runtimes/daskjob.py +4 -2
- mlrun/runtimes/databricks_job/databricks_runtime.py +2 -1
- mlrun/runtimes/mounts.py +5 -0
- mlrun/runtimes/nuclio/__init__.py +12 -8
- mlrun/runtimes/nuclio/api_gateway.py +36 -6
- mlrun/runtimes/nuclio/application/application.py +200 -32
- mlrun/runtimes/nuclio/function.py +154 -49
- mlrun/runtimes/nuclio/serving.py +55 -42
- mlrun/runtimes/pod.py +59 -10
- mlrun/secrets.py +46 -2
- mlrun/serving/__init__.py +2 -0
- mlrun/serving/remote.py +5 -5
- mlrun/serving/routers.py +3 -3
- mlrun/serving/server.py +46 -43
- mlrun/serving/serving_wrapper.py +6 -2
- mlrun/serving/states.py +554 -207
- mlrun/serving/steps.py +1 -1
- mlrun/serving/system_steps.py +42 -33
- mlrun/track/trackers/mlflow_tracker.py +29 -31
- mlrun/utils/helpers.py +89 -16
- mlrun/utils/http.py +9 -2
- mlrun/utils/notifications/notification/git.py +1 -1
- mlrun/utils/notifications/notification/mail.py +39 -16
- mlrun/utils/notifications/notification_pusher.py +2 -2
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +3 -4
- {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/METADATA +39 -49
- {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/RECORD +144 -130
- mlrun/db/auth_utils.py +0 -152
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +0 -343
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -75
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +0 -281
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +0 -1368
- mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py +0 -51
- {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,434 @@
|
|
|
1
|
+
# Copyright 2025 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import random
|
|
16
|
+
import time
|
|
17
|
+
from collections.abc import Callable
|
|
18
|
+
from typing import Any, Optional, Union
|
|
19
|
+
|
|
20
|
+
import pandas as pd
|
|
21
|
+
import psycopg
|
|
22
|
+
import semver
|
|
23
|
+
from psycopg_pool import ConnectionPool
|
|
24
|
+
|
|
25
|
+
import mlrun.errors
|
|
26
|
+
from mlrun.config import config
|
|
27
|
+
from mlrun.model_monitoring.db.tsdb.preaggregate import PreAggregateManager
|
|
28
|
+
from mlrun.utils import logger
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class QueryResult:
|
|
32
|
+
"""Container for query results with field metadata."""
|
|
33
|
+
|
|
34
|
+
def __init__(self, data: list[tuple], fields: list[str]):
|
|
35
|
+
self.data = data
|
|
36
|
+
self.fields = fields
|
|
37
|
+
|
|
38
|
+
def __eq__(self, other):
|
|
39
|
+
return self.data == other.data and self.fields == other.fields
|
|
40
|
+
|
|
41
|
+
def __repr__(self):
|
|
42
|
+
return f"QueryResult(rows={len(self.data)}, fields={self.fields})"
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class Statement:
|
|
46
|
+
"""
|
|
47
|
+
Represents a parameterized statement for TimescaleDB.
|
|
48
|
+
|
|
49
|
+
This class encapsulates SQL statements with parameters, providing a clean
|
|
50
|
+
interface
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
def __init__(
|
|
54
|
+
self,
|
|
55
|
+
sql: str,
|
|
56
|
+
parameters: Optional[Union[tuple, list, dict]] = None,
|
|
57
|
+
execute_many: bool = False,
|
|
58
|
+
):
|
|
59
|
+
"""
|
|
60
|
+
Initialize a parameterized statement.
|
|
61
|
+
|
|
62
|
+
:param sql: SQL query with parameter placeholders. Use %(name)s for named parameters
|
|
63
|
+
or %s for positional parameters.
|
|
64
|
+
:param parameters: Parameters for the SQL statement. Can be:
|
|
65
|
+
- tuple/list for positional parameters
|
|
66
|
+
- dict for named parameters
|
|
67
|
+
- list of tuples/dicts for execute_many=True
|
|
68
|
+
:param execute_many: If True, expects parameters to be a sequence of parameter sets
|
|
69
|
+
for batch execution using executemany()
|
|
70
|
+
"""
|
|
71
|
+
self.sql = sql
|
|
72
|
+
self.parameters = parameters
|
|
73
|
+
self.execute_many = execute_many
|
|
74
|
+
|
|
75
|
+
def execute(self, cursor) -> None:
|
|
76
|
+
"""Execute the statement using the provided cursor."""
|
|
77
|
+
if self.execute_many:
|
|
78
|
+
if not isinstance(self.parameters, list | tuple):
|
|
79
|
+
raise ValueError(
|
|
80
|
+
"execute_many=True requires parameters to be a sequence"
|
|
81
|
+
)
|
|
82
|
+
cursor.executemany(self.sql, self.parameters)
|
|
83
|
+
else:
|
|
84
|
+
cursor.execute(self.sql, self.parameters)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class TimescaleDBConnection:
|
|
88
|
+
"""
|
|
89
|
+
TimescaleDB connection with shared connection pool and parameterized query support.
|
|
90
|
+
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
# TimescaleDB version requirements
|
|
94
|
+
MIN_TIMESCALEDB_VERSION = (
|
|
95
|
+
"2.7.0" # Minimum version with finalized continuous aggregates
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# Deadlock retry configuration
|
|
99
|
+
MAX_DEADLOCK_RETRIES = 3 # Maximum deadlock-specific retry attempts
|
|
100
|
+
|
|
101
|
+
def __init__(
|
|
102
|
+
self,
|
|
103
|
+
dsn: str,
|
|
104
|
+
min_connections: int = 1,
|
|
105
|
+
max_connections: int = 10,
|
|
106
|
+
max_retries: int = 3,
|
|
107
|
+
retry_delay: float = 1.0,
|
|
108
|
+
autocommit: bool = False,
|
|
109
|
+
):
|
|
110
|
+
self._dsn = dsn
|
|
111
|
+
self._min_connections = min_connections
|
|
112
|
+
self._max_connections = max_connections
|
|
113
|
+
self._max_retries = max_retries
|
|
114
|
+
self._retry_delay = retry_delay
|
|
115
|
+
self.prefix_statements: list[Union[str, Statement]] = []
|
|
116
|
+
self._autocommit = autocommit
|
|
117
|
+
|
|
118
|
+
# Connection pools (lazy initialization)
|
|
119
|
+
self._pool: Optional[ConnectionPool] = None
|
|
120
|
+
self._timescaledb_version: Optional[str] = None
|
|
121
|
+
self._version_checked: bool = False
|
|
122
|
+
|
|
123
|
+
@property
|
|
124
|
+
def pool(self) -> ConnectionPool:
|
|
125
|
+
"""Get or create the synchronous connection pool."""
|
|
126
|
+
if self._pool is None:
|
|
127
|
+
self._pool = ConnectionPool(
|
|
128
|
+
conninfo=self._dsn,
|
|
129
|
+
min_size=self._min_connections,
|
|
130
|
+
max_size=self._max_connections,
|
|
131
|
+
timeout=float(
|
|
132
|
+
config.model_endpoint_monitoring.tsdb.connection_pool_timeout
|
|
133
|
+
),
|
|
134
|
+
)
|
|
135
|
+
return self._pool
|
|
136
|
+
|
|
137
|
+
def close(self) -> None:
|
|
138
|
+
"""Close the connection pool if it exists."""
|
|
139
|
+
if self._pool is not None:
|
|
140
|
+
self._pool.close()
|
|
141
|
+
self._pool = None
|
|
142
|
+
|
|
143
|
+
def _parse_version(self, version_string: str) -> semver.VersionInfo:
|
|
144
|
+
"""Parse TimescaleDB version string using semver."""
|
|
145
|
+
try:
|
|
146
|
+
# Handle versions like "2.22.0", "2.7.1-dev", etc.
|
|
147
|
+
# semver.VersionInfo.parse handles pre-release versions automatically
|
|
148
|
+
return semver.VersionInfo.parse(version_string)
|
|
149
|
+
except ValueError as e:
|
|
150
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
151
|
+
f"Invalid TimescaleDB version format: {version_string}"
|
|
152
|
+
) from e
|
|
153
|
+
|
|
154
|
+
def _check_timescaledb_version(self) -> None:
|
|
155
|
+
if self._version_checked:
|
|
156
|
+
return
|
|
157
|
+
|
|
158
|
+
try:
|
|
159
|
+
with self.pool.connection() as conn:
|
|
160
|
+
with conn.cursor() as cursor:
|
|
161
|
+
# Check if TimescaleDB extension is installed
|
|
162
|
+
cursor.execute(
|
|
163
|
+
"SELECT extversion FROM pg_extension WHERE extname = %s",
|
|
164
|
+
("timescaledb",),
|
|
165
|
+
)
|
|
166
|
+
result = cursor.fetchone()
|
|
167
|
+
except psycopg.Error as e:
|
|
168
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
169
|
+
f"Failed to check TimescaleDB version: {e}"
|
|
170
|
+
) from e
|
|
171
|
+
|
|
172
|
+
if not result:
|
|
173
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
174
|
+
"TimescaleDB extension is not installed"
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
self._timescaledb_version = result[0]
|
|
178
|
+
|
|
179
|
+
# Version processing logic outside try/catch - not a database operation
|
|
180
|
+
# _timescaledb_version is guaranteed to be non-None at this point
|
|
181
|
+
current_version = self._parse_version(self._timescaledb_version) # type: ignore[arg-type]
|
|
182
|
+
min_version = self._parse_version(self.MIN_TIMESCALEDB_VERSION)
|
|
183
|
+
|
|
184
|
+
if current_version < min_version:
|
|
185
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
186
|
+
f"TimescaleDB version {self._timescaledb_version} is not supported. "
|
|
187
|
+
f"Minimum required version: {self.MIN_TIMESCALEDB_VERSION} "
|
|
188
|
+
f"(required for finalized continuous aggregates)"
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
self._version_checked = True
|
|
192
|
+
|
|
193
|
+
@property
|
|
194
|
+
def timescaledb_version(self) -> Optional[str]:
|
|
195
|
+
"""Get the TimescaleDB version (triggers version check if not done)."""
|
|
196
|
+
if not self._version_checked:
|
|
197
|
+
self._check_timescaledb_version()
|
|
198
|
+
return self._timescaledb_version
|
|
199
|
+
|
|
200
|
+
def run(
|
|
201
|
+
self,
|
|
202
|
+
statements: Optional[Union[str, Statement, list[Union[str, Statement]]]] = None,
|
|
203
|
+
query: Optional[Union[str, Statement]] = None,
|
|
204
|
+
) -> Optional[QueryResult]:
|
|
205
|
+
"""
|
|
206
|
+
Execute statements and optionally return query results with deadlock-aware retry logic.
|
|
207
|
+
|
|
208
|
+
Supports both string SQL and parameterized Statement objects.
|
|
209
|
+
Uses deadlock-specific retry logic for optimal performance.
|
|
210
|
+
|
|
211
|
+
:param statements: SQL statements to execute. Can be:
|
|
212
|
+
- str: Simple SQL string
|
|
213
|
+
- Statement: Parameterized statement
|
|
214
|
+
- list: Mix of str and Statement objects
|
|
215
|
+
:param query: Optional query to execute after statements. Can be str or Statement.
|
|
216
|
+
:return: QueryResult if query provided, None otherwise
|
|
217
|
+
"""
|
|
218
|
+
# Perform version check on first use
|
|
219
|
+
if not self._version_checked:
|
|
220
|
+
self._check_timescaledb_version()
|
|
221
|
+
|
|
222
|
+
if statements := self._normalize_statements(statements):
|
|
223
|
+
self._execute_with_retry(
|
|
224
|
+
cursor_operation_callable=lambda cursor: self._execute_statements(
|
|
225
|
+
cursor, statements
|
|
226
|
+
),
|
|
227
|
+
operation_name="statements",
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
# Execute query with retry logic for recoverable errors
|
|
231
|
+
if query:
|
|
232
|
+
return self._execute_with_retry(
|
|
233
|
+
cursor_operation_callable=lambda cursor: self._execute_query(
|
|
234
|
+
cursor, query
|
|
235
|
+
),
|
|
236
|
+
operation_name="query",
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
return None
|
|
240
|
+
|
|
241
|
+
def _normalize_statements(
|
|
242
|
+
self, statements: Optional[Union[str, Statement, list[Union[str, Statement]]]]
|
|
243
|
+
) -> list[Union[str, Statement]]:
|
|
244
|
+
"""Convert statements to a normalized list format."""
|
|
245
|
+
if statements is None:
|
|
246
|
+
return []
|
|
247
|
+
return [statements] if isinstance(statements, str | Statement) else statements
|
|
248
|
+
|
|
249
|
+
def _execute_operation(
|
|
250
|
+
self,
|
|
251
|
+
statements: list[Union[str, Statement]],
|
|
252
|
+
query: Optional[Union[str, Statement]],
|
|
253
|
+
) -> Optional[QueryResult]:
|
|
254
|
+
"""Execute a single database operation (statements + optional query)."""
|
|
255
|
+
with self.pool.connection() as conn:
|
|
256
|
+
conn.autocommit = self._autocommit
|
|
257
|
+
|
|
258
|
+
with conn.cursor() as cursor:
|
|
259
|
+
self._execute_statements(cursor, statements)
|
|
260
|
+
if not self._autocommit:
|
|
261
|
+
conn.commit()
|
|
262
|
+
return self._execute_query(cursor, query) if query else None
|
|
263
|
+
|
|
264
|
+
def _execute_statements(
|
|
265
|
+
self, cursor, statements: list[Union[str, Statement]]
|
|
266
|
+
) -> None:
|
|
267
|
+
"""Execute prefix statements and main statements."""
|
|
268
|
+
# Execute prefix statements
|
|
269
|
+
for stmt in self.prefix_statements:
|
|
270
|
+
if isinstance(stmt, Statement):
|
|
271
|
+
stmt.execute(cursor)
|
|
272
|
+
else:
|
|
273
|
+
cursor.execute(stmt)
|
|
274
|
+
|
|
275
|
+
# Execute main statements
|
|
276
|
+
for statement in statements:
|
|
277
|
+
if isinstance(statement, Statement):
|
|
278
|
+
statement.execute(cursor)
|
|
279
|
+
else:
|
|
280
|
+
cursor.execute(statement)
|
|
281
|
+
|
|
282
|
+
def _execute_query(self, cursor, query: Union[str, Statement]) -> QueryResult:
|
|
283
|
+
"""Execute a query and return formatted results."""
|
|
284
|
+
if isinstance(query, Statement):
|
|
285
|
+
query.execute(cursor)
|
|
286
|
+
else:
|
|
287
|
+
cursor.execute(query)
|
|
288
|
+
|
|
289
|
+
if cursor.description:
|
|
290
|
+
field_names = [desc.name for desc in cursor.description]
|
|
291
|
+
results = cursor.fetchall()
|
|
292
|
+
data = [tuple(row) for row in results]
|
|
293
|
+
return QueryResult(data, field_names)
|
|
294
|
+
else:
|
|
295
|
+
return QueryResult([], [])
|
|
296
|
+
|
|
297
|
+
def execute_with_fallback(
|
|
298
|
+
self,
|
|
299
|
+
pre_aggregate_manager: PreAggregateManager,
|
|
300
|
+
pre_agg_query_builder: Callable[[], str],
|
|
301
|
+
raw_query_builder: Callable[[], str],
|
|
302
|
+
interval: Optional[str] = None,
|
|
303
|
+
agg_funcs: Optional[list[str]] = None,
|
|
304
|
+
column_mapping_rules: Optional[dict[str, list[str]]] = None,
|
|
305
|
+
debug_name: str = "query",
|
|
306
|
+
) -> pd.DataFrame:
|
|
307
|
+
"""
|
|
308
|
+
Execute a query with pre-aggregate optimization and automatic fallback.
|
|
309
|
+
|
|
310
|
+
This method encapsulates the common pattern of trying pre-aggregate queries first,
|
|
311
|
+
then falling back to raw data queries if the pre-aggregate fails.
|
|
312
|
+
|
|
313
|
+
:param pre_aggregate_manager: Manager for pre-aggregate operations
|
|
314
|
+
:param pre_agg_query_builder: Function that returns pre-aggregate query string
|
|
315
|
+
:param raw_query_builder: Function that returns raw data query string
|
|
316
|
+
:param interval: Time interval for aggregation
|
|
317
|
+
:param agg_funcs: List of aggregation functions
|
|
318
|
+
:param column_mapping_rules: Rules for mapping column names in pre-aggregate results
|
|
319
|
+
:param debug_name: Name for debugging/logging purposes
|
|
320
|
+
:return: DataFrame with query results
|
|
321
|
+
"""
|
|
322
|
+
# Import locally to avoid circular dependency
|
|
323
|
+
from mlrun.model_monitoring.db.tsdb.timescaledb.utils.timescaledb_dataframe_processor import (
|
|
324
|
+
TimescaleDBDataFrameProcessor,
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
df_processor = TimescaleDBDataFrameProcessor()
|
|
328
|
+
|
|
329
|
+
if pre_aggregate_manager.can_use_pre_aggregates(
|
|
330
|
+
interval=interval, agg_funcs=agg_funcs
|
|
331
|
+
):
|
|
332
|
+
try:
|
|
333
|
+
# Try pre-aggregate query first
|
|
334
|
+
query = pre_agg_query_builder()
|
|
335
|
+
result = self.run(query=query)
|
|
336
|
+
df = df_processor.from_query_result(result)
|
|
337
|
+
|
|
338
|
+
if not df.empty and column_mapping_rules:
|
|
339
|
+
# Apply flexible column mapping for pre-aggregate results
|
|
340
|
+
mapping = df_processor.build_flexible_column_mapping(
|
|
341
|
+
df, column_mapping_rules
|
|
342
|
+
)
|
|
343
|
+
df = df_processor.apply_column_mapping(df, mapping)
|
|
344
|
+
|
|
345
|
+
return df
|
|
346
|
+
|
|
347
|
+
except Exception as e:
|
|
348
|
+
logger.warning(
|
|
349
|
+
f"Pre-aggregate {debug_name} query failed, falling back to raw data",
|
|
350
|
+
error=mlrun.errors.err_to_str(e),
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
# Fallback to raw data query
|
|
354
|
+
raw_query = raw_query_builder()
|
|
355
|
+
result = self.run(query=raw_query)
|
|
356
|
+
return df_processor.from_query_result(result)
|
|
357
|
+
|
|
358
|
+
def _execute_with_retry(
|
|
359
|
+
self,
|
|
360
|
+
cursor_operation_callable: Callable[
|
|
361
|
+
[psycopg.Cursor[Any]], Optional[QueryResult]
|
|
362
|
+
],
|
|
363
|
+
operation_name: str,
|
|
364
|
+
) -> Optional[QueryResult]:
|
|
365
|
+
"""
|
|
366
|
+
Generic retry wrapper for database operations.
|
|
367
|
+
|
|
368
|
+
PostgreSQL Error Handling Strategy Matrix (Currently Implemented):
|
|
369
|
+
|
|
370
|
+
| Category |Retry?| Timing | Reason |
|
|
371
|
+
|-----------------------------|------|------------------|----------------------------------|
|
|
372
|
+
| DeadlockDetected | Yes | 0.1s, 0.2s, 0.4s | Auto-rollback, fast resolution |
|
|
373
|
+
| Other OperationalError | Yes | 1s, 2s, 4s | Network/server recovery time |
|
|
374
|
+
| InterfaceError | Yes | 1s, 2s, 4s | Client connection issues |
|
|
375
|
+
| All Other psycopg.Error | No | - | Pass through without wrapping |
|
|
376
|
+
|
|
377
|
+
Note: PostgreSQL automatically rolls back failed transactions, so explicit
|
|
378
|
+
rollback is only needed for DeadlockDetected where we retry the operation.
|
|
379
|
+
|
|
380
|
+
Note: Unhandled errors are passed through without wrapping to preserve
|
|
381
|
+
original exception types and stack traces for proper debugging.
|
|
382
|
+
|
|
383
|
+
:param cursor_operation_callable: Function that takes a cursor and executes the operation
|
|
384
|
+
:param operation_name: Name for logging (e.g., "statements", "query")
|
|
385
|
+
:return: Result of cursor_operation_callable()
|
|
386
|
+
"""
|
|
387
|
+
deadlock_attempts = 0
|
|
388
|
+
connection_attempts = 0
|
|
389
|
+
|
|
390
|
+
while True:
|
|
391
|
+
try:
|
|
392
|
+
# Execute operation within a transaction
|
|
393
|
+
with self.pool.connection() as conn:
|
|
394
|
+
conn.autocommit = self._autocommit
|
|
395
|
+
with conn.cursor() as cursor:
|
|
396
|
+
result = cursor_operation_callable(cursor)
|
|
397
|
+
if not self._autocommit:
|
|
398
|
+
conn.commit()
|
|
399
|
+
return result
|
|
400
|
+
except (psycopg.OperationalError, psycopg.InterfaceError) as e:
|
|
401
|
+
# Different retry limits and timing based on error type
|
|
402
|
+
if isinstance(e, psycopg.errors.DeadlockDetected):
|
|
403
|
+
if deadlock_attempts >= self.MAX_DEADLOCK_RETRIES:
|
|
404
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
405
|
+
f"Database {operation_name} failed: deadlock persisted "
|
|
406
|
+
f"after {self.MAX_DEADLOCK_RETRIES} retries: {e}"
|
|
407
|
+
) from e
|
|
408
|
+
# Fast retry for deadlocks: ~0.1s, ~0.2s, ~0.4s with jitter
|
|
409
|
+
delay = (2**deadlock_attempts) * 0.1 + random.uniform(0, 0.05)
|
|
410
|
+
error_type = "deadlock"
|
|
411
|
+
deadlock_attempts += 1
|
|
412
|
+
else:
|
|
413
|
+
if connection_attempts >= self._max_retries:
|
|
414
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
415
|
+
f"Database {operation_name} failed after "
|
|
416
|
+
f"{self._max_retries} connection retries: {e}"
|
|
417
|
+
) from e
|
|
418
|
+
# Slower retry for connection issues: 1s, 2s, 4s
|
|
419
|
+
delay = self._retry_delay * (2**connection_attempts)
|
|
420
|
+
error_type = "connection"
|
|
421
|
+
connection_attempts += 1
|
|
422
|
+
|
|
423
|
+
logger.warning(
|
|
424
|
+
f"TimescaleDB {error_type} error in {operation_name}, retrying",
|
|
425
|
+
attempt=deadlock_attempts
|
|
426
|
+
if error_type == "deadlock"
|
|
427
|
+
else connection_attempts,
|
|
428
|
+
max_retries=self.MAX_DEADLOCK_RETRIES
|
|
429
|
+
if error_type == "deadlock"
|
|
430
|
+
else self._max_retries,
|
|
431
|
+
delay=delay,
|
|
432
|
+
error=mlrun.errors.err_to_str(e),
|
|
433
|
+
)
|
|
434
|
+
time.sleep(delay)
|