mlrun 1.10.0rc40__py3-none-any.whl → 1.11.0rc16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +3 -2
- mlrun/__main__.py +0 -4
- mlrun/artifacts/dataset.py +2 -2
- mlrun/artifacts/plots.py +1 -1
- mlrun/{model_monitoring/db/tsdb/tdengine → auth}/__init__.py +2 -3
- mlrun/auth/nuclio.py +89 -0
- mlrun/auth/providers.py +429 -0
- mlrun/auth/utils.py +415 -0
- mlrun/common/constants.py +7 -0
- mlrun/common/model_monitoring/helpers.py +41 -4
- mlrun/common/runtimes/constants.py +28 -0
- mlrun/common/schemas/__init__.py +13 -3
- mlrun/common/schemas/alert.py +2 -2
- mlrun/common/schemas/api_gateway.py +3 -0
- mlrun/common/schemas/auth.py +10 -10
- mlrun/common/schemas/client_spec.py +4 -0
- mlrun/common/schemas/constants.py +25 -0
- mlrun/common/schemas/frontend_spec.py +1 -8
- mlrun/common/schemas/function.py +24 -0
- mlrun/common/schemas/hub.py +3 -2
- mlrun/common/schemas/model_monitoring/__init__.py +1 -1
- mlrun/common/schemas/model_monitoring/constants.py +2 -2
- mlrun/common/schemas/secret.py +17 -2
- mlrun/common/secrets.py +95 -1
- mlrun/common/types.py +10 -10
- mlrun/config.py +53 -15
- mlrun/data_types/infer.py +2 -2
- mlrun/datastore/__init__.py +2 -3
- mlrun/datastore/base.py +274 -10
- mlrun/datastore/datastore.py +1 -1
- mlrun/datastore/datastore_profile.py +49 -17
- mlrun/datastore/model_provider/huggingface_provider.py +6 -2
- mlrun/datastore/model_provider/model_provider.py +2 -2
- mlrun/datastore/model_provider/openai_provider.py +2 -2
- mlrun/datastore/s3.py +15 -16
- mlrun/datastore/sources.py +1 -1
- mlrun/datastore/store_resources.py +4 -4
- mlrun/datastore/storeytargets.py +16 -10
- mlrun/datastore/targets.py +1 -1
- mlrun/datastore/utils.py +16 -3
- mlrun/datastore/v3io.py +1 -1
- mlrun/db/base.py +36 -12
- mlrun/db/httpdb.py +316 -101
- mlrun/db/nopdb.py +29 -11
- mlrun/errors.py +4 -2
- mlrun/execution.py +11 -12
- mlrun/feature_store/api.py +1 -1
- mlrun/feature_store/common.py +1 -1
- mlrun/feature_store/feature_vector_utils.py +1 -1
- mlrun/feature_store/steps.py +8 -6
- mlrun/frameworks/_common/utils.py +3 -3
- mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -1
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
- mlrun/frameworks/_ml_common/utils.py +2 -1
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +4 -3
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +2 -1
- mlrun/frameworks/onnx/dataset.py +2 -1
- mlrun/frameworks/onnx/mlrun_interface.py +2 -1
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +5 -4
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +2 -1
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +2 -1
- mlrun/frameworks/pytorch/utils.py +2 -1
- mlrun/frameworks/sklearn/metric.py +2 -1
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +5 -4
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +2 -1
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +2 -1
- mlrun/hub/__init__.py +37 -0
- mlrun/hub/base.py +142 -0
- mlrun/hub/module.py +67 -76
- mlrun/hub/step.py +113 -0
- mlrun/launcher/base.py +2 -1
- mlrun/launcher/local.py +2 -1
- mlrun/model.py +12 -2
- mlrun/model_monitoring/__init__.py +0 -1
- mlrun/model_monitoring/api.py +2 -2
- mlrun/model_monitoring/applications/base.py +20 -6
- mlrun/model_monitoring/applications/context.py +1 -0
- mlrun/model_monitoring/controller.py +7 -17
- mlrun/model_monitoring/db/_schedules.py +2 -16
- mlrun/model_monitoring/db/_stats.py +2 -13
- mlrun/model_monitoring/db/tsdb/__init__.py +9 -7
- mlrun/model_monitoring/db/tsdb/base.py +2 -4
- mlrun/model_monitoring/db/tsdb/preaggregate.py +234 -0
- mlrun/model_monitoring/db/tsdb/stream_graph_steps.py +63 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_metrics_queries.py +414 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_predictions_queries.py +376 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_results_queries.py +590 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connection.py +434 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connector.py +541 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_operations.py +808 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_schema.py +502 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream.py +163 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream_graph_steps.py +60 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_dataframe_processor.py +141 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_query_builder.py +585 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/writer_graph_steps.py +73 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +4 -6
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +147 -79
- mlrun/model_monitoring/features_drift_table.py +2 -1
- mlrun/model_monitoring/helpers.py +2 -1
- mlrun/model_monitoring/stream_processing.py +18 -16
- mlrun/model_monitoring/writer.py +4 -3
- mlrun/package/__init__.py +2 -1
- mlrun/platforms/__init__.py +0 -44
- mlrun/platforms/iguazio.py +1 -1
- mlrun/projects/operations.py +11 -10
- mlrun/projects/project.py +81 -82
- mlrun/run.py +4 -7
- mlrun/runtimes/__init__.py +2 -204
- mlrun/runtimes/base.py +89 -21
- mlrun/runtimes/constants.py +225 -0
- mlrun/runtimes/daskjob.py +4 -2
- mlrun/runtimes/databricks_job/databricks_runtime.py +2 -1
- mlrun/runtimes/mounts.py +5 -0
- mlrun/runtimes/nuclio/__init__.py +12 -8
- mlrun/runtimes/nuclio/api_gateway.py +36 -6
- mlrun/runtimes/nuclio/application/application.py +200 -32
- mlrun/runtimes/nuclio/function.py +154 -49
- mlrun/runtimes/nuclio/serving.py +55 -42
- mlrun/runtimes/pod.py +59 -10
- mlrun/secrets.py +46 -2
- mlrun/serving/__init__.py +2 -0
- mlrun/serving/remote.py +5 -5
- mlrun/serving/routers.py +3 -3
- mlrun/serving/server.py +46 -43
- mlrun/serving/serving_wrapper.py +6 -2
- mlrun/serving/states.py +554 -207
- mlrun/serving/steps.py +1 -1
- mlrun/serving/system_steps.py +42 -33
- mlrun/track/trackers/mlflow_tracker.py +29 -31
- mlrun/utils/helpers.py +89 -16
- mlrun/utils/http.py +9 -2
- mlrun/utils/notifications/notification/git.py +1 -1
- mlrun/utils/notifications/notification/mail.py +39 -16
- mlrun/utils/notifications/notification_pusher.py +2 -2
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +3 -4
- {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/METADATA +39 -49
- {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/RECORD +144 -130
- mlrun/db/auth_utils.py +0 -152
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +0 -343
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -75
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +0 -281
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +0 -1368
- mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py +0 -51
- {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,541 @@
|
|
|
1
|
+
# Copyright 2025 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import datetime
|
|
16
|
+
from typing import Optional
|
|
17
|
+
|
|
18
|
+
import pandas as pd
|
|
19
|
+
|
|
20
|
+
import mlrun
|
|
21
|
+
import mlrun.common.model_monitoring.helpers
|
|
22
|
+
import mlrun.common.schemas.model_monitoring as mm_schemas
|
|
23
|
+
import mlrun.model_monitoring.db.tsdb.timescaledb.timescaledb_schema as timescaledb_schema
|
|
24
|
+
from mlrun.config import config
|
|
25
|
+
from mlrun.datastore.datastore_profile import DatastoreProfilePostgreSQL
|
|
26
|
+
from mlrun.model_monitoring.db import TSDBConnector
|
|
27
|
+
from mlrun.model_monitoring.db.tsdb.preaggregate import (
|
|
28
|
+
PreAggregateConfig,
|
|
29
|
+
PreAggregateManager,
|
|
30
|
+
)
|
|
31
|
+
from mlrun.model_monitoring.db.tsdb.timescaledb.queries.timescaledb_metrics_queries import (
|
|
32
|
+
TimescaleDBMetricsQueries,
|
|
33
|
+
)
|
|
34
|
+
from mlrun.model_monitoring.db.tsdb.timescaledb.queries.timescaledb_predictions_queries import (
|
|
35
|
+
TimescaleDBPredictionsQueries,
|
|
36
|
+
)
|
|
37
|
+
from mlrun.model_monitoring.db.tsdb.timescaledb.queries.timescaledb_results_queries import (
|
|
38
|
+
TimescaleDBResultsQueries,
|
|
39
|
+
)
|
|
40
|
+
from mlrun.model_monitoring.db.tsdb.timescaledb.timescaledb_connection import (
|
|
41
|
+
Statement,
|
|
42
|
+
TimescaleDBConnection,
|
|
43
|
+
)
|
|
44
|
+
from mlrun.model_monitoring.db.tsdb.timescaledb.timescaledb_operations import (
|
|
45
|
+
TimescaleDBOperationsManager,
|
|
46
|
+
)
|
|
47
|
+
from mlrun.model_monitoring.db.tsdb.timescaledb.timescaledb_stream import (
|
|
48
|
+
TimescaleDBStreamProcessor,
|
|
49
|
+
)
|
|
50
|
+
from mlrun.utils import logger
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class TimescaleDBConnector(TSDBConnector):
|
|
54
|
+
"""
|
|
55
|
+
Complete TimescaleDB TSDB connector using composition pattern.
|
|
56
|
+
|
|
57
|
+
Uses composition for all specialized functionality:
|
|
58
|
+
- TimescaleDBMetricsQueries, TimescaleDBPredictionsQueries, TimescaleDBResultsQueries: Direct query operations
|
|
59
|
+
- TimescaleDBOperationsManager: Table management and write operations
|
|
60
|
+
- TimescaleDBStreamProcessor: Stream processing operations
|
|
61
|
+
|
|
62
|
+
Database naming (controlled by mlrun.mlconf.model_endpoint_monitoring.tsdb.auto_create_database):
|
|
63
|
+
- When auto_create_database=True (default): generates database name using system_id: 'mlrun_mm_{system_id}'
|
|
64
|
+
- When auto_create_database=False: uses the database from the profile/connection string as-is
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
type: str = mm_schemas.TSDBTarget.TimescaleDB
|
|
68
|
+
|
|
69
|
+
def __init__(
|
|
70
|
+
self,
|
|
71
|
+
project: str,
|
|
72
|
+
profile: DatastoreProfilePostgreSQL,
|
|
73
|
+
pre_aggregate_config: Optional[PreAggregateConfig] = None,
|
|
74
|
+
**kwargs,
|
|
75
|
+
):
|
|
76
|
+
super().__init__(project=project)
|
|
77
|
+
|
|
78
|
+
self.profile = profile
|
|
79
|
+
|
|
80
|
+
# Determine the monitoring database name
|
|
81
|
+
self.database = self._determine_database_name(profile)
|
|
82
|
+
|
|
83
|
+
# Update profile to use the determined database name
|
|
84
|
+
# This ensures the connection uses the correct database
|
|
85
|
+
if profile.database != self.database:
|
|
86
|
+
logger.info(
|
|
87
|
+
"Auto-generated database name for TimescaleDB",
|
|
88
|
+
original_database=profile.database,
|
|
89
|
+
database=self.database,
|
|
90
|
+
)
|
|
91
|
+
# Create a new profile with the generated database
|
|
92
|
+
profile = DatastoreProfilePostgreSQL(
|
|
93
|
+
name=profile.name,
|
|
94
|
+
user=profile.user,
|
|
95
|
+
password=profile.password,
|
|
96
|
+
host=profile.host,
|
|
97
|
+
port=profile.port,
|
|
98
|
+
database=self.database,
|
|
99
|
+
)
|
|
100
|
+
self.profile = profile
|
|
101
|
+
|
|
102
|
+
# Create shared connection
|
|
103
|
+
self._connection = TimescaleDBConnection(
|
|
104
|
+
dsn=profile.dsn(),
|
|
105
|
+
min_connections=kwargs.get("min_connections", 1),
|
|
106
|
+
max_connections=kwargs.get("max_connections", 10),
|
|
107
|
+
max_retries=kwargs.get("max_retries", 3),
|
|
108
|
+
retry_delay=kwargs.get("retry_delay", 1.0),
|
|
109
|
+
autocommit=kwargs.get("autocommit", False),
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
# Create shared components needed by query classes
|
|
113
|
+
self._tables = timescaledb_schema.create_table_schemas(project)
|
|
114
|
+
self._pre_aggregate_manager = PreAggregateManager(pre_aggregate_config)
|
|
115
|
+
|
|
116
|
+
# Create specialized query handlers with proper initialization
|
|
117
|
+
self._metrics_queries = TimescaleDBMetricsQueries(
|
|
118
|
+
project=project,
|
|
119
|
+
connection=self._connection,
|
|
120
|
+
pre_aggregate_manager=self._pre_aggregate_manager,
|
|
121
|
+
tables=self._tables,
|
|
122
|
+
)
|
|
123
|
+
self._predictions_queries = TimescaleDBPredictionsQueries(
|
|
124
|
+
project=project,
|
|
125
|
+
connection=self._connection,
|
|
126
|
+
pre_aggregate_manager=self._pre_aggregate_manager,
|
|
127
|
+
tables=self._tables,
|
|
128
|
+
)
|
|
129
|
+
self._results_queries = TimescaleDBResultsQueries(
|
|
130
|
+
connection=self._connection,
|
|
131
|
+
project=project,
|
|
132
|
+
pre_aggregate_manager=self._pre_aggregate_manager,
|
|
133
|
+
tables=self._tables,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
# Create operations and stream handlers
|
|
137
|
+
self._operations = TimescaleDBOperationsManager(
|
|
138
|
+
project=project,
|
|
139
|
+
connection=self._connection,
|
|
140
|
+
pre_aggregate_config=pre_aggregate_config,
|
|
141
|
+
profile=profile,
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
self._stream = TimescaleDBStreamProcessor(
|
|
145
|
+
project=project, profile=profile, connection=self._connection
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
self._pre_aggregate_config = pre_aggregate_config
|
|
149
|
+
|
|
150
|
+
def _determine_database_name(self, profile: DatastoreProfilePostgreSQL) -> str:
|
|
151
|
+
"""
|
|
152
|
+
Determine the database name to use.
|
|
153
|
+
|
|
154
|
+
Delegates to the shared helper function to ensure consistent database naming
|
|
155
|
+
across all TimescaleDB components (connector, stream, storey targets).
|
|
156
|
+
|
|
157
|
+
:param profile: The PostgreSQL profile
|
|
158
|
+
:return: The database name to use
|
|
159
|
+
"""
|
|
160
|
+
return mlrun.common.model_monitoring.helpers.get_tsdb_database_name(
|
|
161
|
+
profile.database
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
# Delegate operations methods
|
|
165
|
+
def create_tables(self, *args, **kwargs) -> None:
|
|
166
|
+
return self._operations.create_tables(*args, **kwargs)
|
|
167
|
+
|
|
168
|
+
def write_application_event(self, *args, **kwargs) -> None:
|
|
169
|
+
return self._operations.write_application_event(*args, **kwargs)
|
|
170
|
+
|
|
171
|
+
def delete_tsdb_records(self, *args, **kwargs) -> None:
|
|
172
|
+
return self._operations.delete_tsdb_records(*args, **kwargs)
|
|
173
|
+
|
|
174
|
+
def delete_tsdb_resources(self, *args, **kwargs) -> None:
|
|
175
|
+
return self._operations.delete_tsdb_resources(*args, **kwargs)
|
|
176
|
+
|
|
177
|
+
def delete_application_records(self, *args, **kwargs) -> None:
|
|
178
|
+
return self._operations.delete_application_records(*args, **kwargs)
|
|
179
|
+
|
|
180
|
+
def read_metrics_data(
|
|
181
|
+
self,
|
|
182
|
+
*,
|
|
183
|
+
endpoint_id: str,
|
|
184
|
+
start: datetime.datetime,
|
|
185
|
+
end: datetime.datetime,
|
|
186
|
+
metrics: list[mm_schemas.ModelEndpointMonitoringMetric],
|
|
187
|
+
type: str,
|
|
188
|
+
with_result_extra_data: bool = False,
|
|
189
|
+
):
|
|
190
|
+
"""Read metrics or results data from TimescaleDB (cross-cutting coordination)."""
|
|
191
|
+
|
|
192
|
+
if type == "metrics":
|
|
193
|
+
df = self._metrics_queries.read_metrics_data_impl(
|
|
194
|
+
endpoint_id=endpoint_id,
|
|
195
|
+
start=start,
|
|
196
|
+
end=end,
|
|
197
|
+
metrics=metrics,
|
|
198
|
+
)
|
|
199
|
+
# Use inherited method to convert DataFrame to domain objects
|
|
200
|
+
return self.df_to_metrics_values(
|
|
201
|
+
df=df, metrics=metrics, project=self.project
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
else: # results
|
|
205
|
+
df = self._results_queries.read_results_data_impl(
|
|
206
|
+
endpoint_id=endpoint_id,
|
|
207
|
+
start=start,
|
|
208
|
+
end=end,
|
|
209
|
+
metrics=metrics,
|
|
210
|
+
with_result_extra_data=with_result_extra_data,
|
|
211
|
+
)
|
|
212
|
+
# Use inherited method to convert DataFrame to domain objects
|
|
213
|
+
return self.df_to_results_values(
|
|
214
|
+
df=df, metrics=metrics, project=self.project
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
def get_model_endpoint_real_time_metrics(self, *args, **kwargs):
|
|
218
|
+
return self._metrics_queries.get_model_endpoint_real_time_metrics(
|
|
219
|
+
*args, **kwargs
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
def get_metrics_metadata(self, *args, **kwargs):
|
|
223
|
+
return self._metrics_queries.get_metrics_metadata(*args, **kwargs)
|
|
224
|
+
|
|
225
|
+
def add_basic_metrics(
|
|
226
|
+
self,
|
|
227
|
+
model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
|
|
228
|
+
metric_list: Optional[list[str]] = None,
|
|
229
|
+
) -> list[mlrun.common.schemas.ModelEndpoint]:
|
|
230
|
+
"""
|
|
231
|
+
Add basic metrics to the model endpoint object using TimescaleDB optimizations.
|
|
232
|
+
|
|
233
|
+
:param model_endpoint_objects: A list of `ModelEndpoint` objects that will
|
|
234
|
+
be filled with the relevant basic metrics.
|
|
235
|
+
:param metric_list: List of metrics to include from the time series DB. Defaults to all metrics.
|
|
236
|
+
|
|
237
|
+
:return: A list of `ModelEndpointMonitoringMetric` objects.
|
|
238
|
+
"""
|
|
239
|
+
uids = [mep.metadata.uid for mep in model_endpoint_objects]
|
|
240
|
+
|
|
241
|
+
# Access methods directly from the respective query classes
|
|
242
|
+
# Note: last_request is handled separately due to potential data synchronization issues
|
|
243
|
+
metric_name_to_function = {
|
|
244
|
+
mm_schemas.EventFieldType.ERROR_COUNT: self._results_queries.get_error_count,
|
|
245
|
+
mm_schemas.ModelEndpointSchema.AVG_LATENCY: self._predictions_queries.get_avg_latency,
|
|
246
|
+
mm_schemas.ResultData.RESULT_STATUS: self._results_queries.get_drift_status,
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
if metric_list is not None:
|
|
250
|
+
for metric_name in list(metric_name_to_function):
|
|
251
|
+
if metric_name not in metric_list:
|
|
252
|
+
del metric_name_to_function[metric_name]
|
|
253
|
+
|
|
254
|
+
metric_name_to_df = {
|
|
255
|
+
metric_name: function(endpoint_ids=uids)
|
|
256
|
+
for metric_name, function in metric_name_to_function.items()
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
def add_metrics(
|
|
260
|
+
mep: mlrun.common.schemas.ModelEndpoint,
|
|
261
|
+
df_dictionary: dict[str, pd.DataFrame],
|
|
262
|
+
):
|
|
263
|
+
for metric in df_dictionary:
|
|
264
|
+
df = df_dictionary.get(metric, pd.DataFrame())
|
|
265
|
+
if not df.empty:
|
|
266
|
+
line = df[
|
|
267
|
+
df[mm_schemas.WriterEvent.ENDPOINT_ID] == mep.metadata.uid
|
|
268
|
+
]
|
|
269
|
+
if not line.empty and metric in line:
|
|
270
|
+
value = line[metric].item()
|
|
271
|
+
if isinstance(value, pd.Timestamp):
|
|
272
|
+
value = value.to_pydatetime()
|
|
273
|
+
setattr(mep.status, metric, value)
|
|
274
|
+
|
|
275
|
+
return mep
|
|
276
|
+
|
|
277
|
+
enriched_endpoints = list(
|
|
278
|
+
map(
|
|
279
|
+
lambda mep: add_metrics(
|
|
280
|
+
mep=mep,
|
|
281
|
+
df_dictionary=metric_name_to_df,
|
|
282
|
+
),
|
|
283
|
+
model_endpoint_objects,
|
|
284
|
+
)
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
# Handle last_request separately with special enrichment
|
|
288
|
+
if metric_list is None or "last_request" in metric_list:
|
|
289
|
+
self._enrich_mep_with_last_request(
|
|
290
|
+
model_endpoint_objects={
|
|
291
|
+
mep.metadata.uid: mep for mep in enriched_endpoints
|
|
292
|
+
}
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
return enriched_endpoints
|
|
296
|
+
|
|
297
|
+
def _enrich_mep_with_last_request(
|
|
298
|
+
self,
|
|
299
|
+
model_endpoint_objects: dict[str, mlrun.common.schemas.ModelEndpoint],
|
|
300
|
+
):
|
|
301
|
+
"""
|
|
302
|
+
Enrich model endpoint objects with last_request data from predictions table.
|
|
303
|
+
This method handles the special case of last_request which may have timing issues.
|
|
304
|
+
"""
|
|
305
|
+
try:
|
|
306
|
+
last_request_df = self._predictions_queries.get_last_request(
|
|
307
|
+
endpoint_ids=list(model_endpoint_objects.keys())
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
if not last_request_df.empty:
|
|
311
|
+
for _, row in last_request_df.iterrows():
|
|
312
|
+
endpoint_id = row.get(mm_schemas.WriterEvent.ENDPOINT_ID)
|
|
313
|
+
last_request = row.get("last_request")
|
|
314
|
+
|
|
315
|
+
if (
|
|
316
|
+
endpoint_id in model_endpoint_objects
|
|
317
|
+
and last_request is not None
|
|
318
|
+
):
|
|
319
|
+
if isinstance(last_request, pd.Timestamp):
|
|
320
|
+
last_request = last_request.to_pydatetime()
|
|
321
|
+
model_endpoint_objects[
|
|
322
|
+
endpoint_id
|
|
323
|
+
].status.last_request = last_request
|
|
324
|
+
except Exception as e:
|
|
325
|
+
# Log but don't fail - last_request is not critical for basic functionality
|
|
326
|
+
logger.warning(
|
|
327
|
+
"Failed to enrich model endpoints with last_request data",
|
|
328
|
+
error=mlrun.errors.err_to_str(e),
|
|
329
|
+
endpoint_count=len(model_endpoint_objects),
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
def read_predictions(self, *args, **kwargs):
|
|
333
|
+
return self._predictions_queries.read_predictions(*args, **kwargs)
|
|
334
|
+
|
|
335
|
+
def _get_records(
|
|
336
|
+
self,
|
|
337
|
+
table: str,
|
|
338
|
+
start: datetime.datetime,
|
|
339
|
+
end: datetime.datetime,
|
|
340
|
+
endpoint_id: Optional[str] = None,
|
|
341
|
+
columns: Optional[list[str]] = None,
|
|
342
|
+
timestamp_column: Optional[str] = None,
|
|
343
|
+
) -> pd.DataFrame:
|
|
344
|
+
"""
|
|
345
|
+
Get raw records from TimescaleDB as pandas DataFrame.
|
|
346
|
+
|
|
347
|
+
This method provides direct access to raw table data.
|
|
348
|
+
|
|
349
|
+
:param table: Table name - use TimescaleDBTables enum (METRICS, APP_RESULTS, or PREDICTIONS)
|
|
350
|
+
:param start: Start time for the query
|
|
351
|
+
:param end: End time for the query
|
|
352
|
+
:param endpoint_id: Optional endpoint ID filter (None = all endpoints)
|
|
353
|
+
:param columns: Optional list of specific columns to return (None = all columns)
|
|
354
|
+
:param timestamp_column: Optional timestamp column to use for time filtering (None = use table's default)
|
|
355
|
+
:return: Raw pandas DataFrame with all matching records
|
|
356
|
+
"""
|
|
357
|
+
if table == mm_schemas.TimescaleDBTables.METRICS:
|
|
358
|
+
df = self._metrics_queries.read_metrics_data_impl(
|
|
359
|
+
endpoint_id=endpoint_id,
|
|
360
|
+
start=start,
|
|
361
|
+
end=end,
|
|
362
|
+
metrics=None, # Get all metrics
|
|
363
|
+
timestamp_column=timestamp_column,
|
|
364
|
+
)
|
|
365
|
+
elif table == mm_schemas.TimescaleDBTables.APP_RESULTS:
|
|
366
|
+
df = self._results_queries.read_results_data_impl(
|
|
367
|
+
endpoint_id=endpoint_id,
|
|
368
|
+
start=start,
|
|
369
|
+
end=end,
|
|
370
|
+
metrics=None, # Get all results
|
|
371
|
+
with_result_extra_data=True,
|
|
372
|
+
timestamp_column=timestamp_column,
|
|
373
|
+
)
|
|
374
|
+
elif table == mm_schemas.TimescaleDBTables.PREDICTIONS:
|
|
375
|
+
df = self._predictions_queries.read_predictions_impl(
|
|
376
|
+
endpoint_id=endpoint_id,
|
|
377
|
+
start=start,
|
|
378
|
+
end=end,
|
|
379
|
+
columns=columns,
|
|
380
|
+
timestamp_column=timestamp_column,
|
|
381
|
+
)
|
|
382
|
+
else:
|
|
383
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
384
|
+
f"Invalid table '{table}'. Must be METRICS, APP_RESULTS, or PREDICTIONS from TimescaleDBTables enum"
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
if columns is not None and not df.empty:
|
|
388
|
+
# Filter to requested columns if specified
|
|
389
|
+
available_columns = [col for col in columns if col in df.columns]
|
|
390
|
+
df = df[available_columns]
|
|
391
|
+
|
|
392
|
+
return df
|
|
393
|
+
|
|
394
|
+
def get_last_request(self, *args, **kwargs):
|
|
395
|
+
return self._predictions_queries.get_last_request(*args, **kwargs)
|
|
396
|
+
|
|
397
|
+
def get_avg_latency(self, *args, **kwargs):
|
|
398
|
+
return self._predictions_queries.get_avg_latency(*args, **kwargs)
|
|
399
|
+
|
|
400
|
+
def count_processed_model_endpoints(
|
|
401
|
+
self,
|
|
402
|
+
start: Optional[datetime.datetime] = None,
|
|
403
|
+
end: Optional[datetime.datetime] = None,
|
|
404
|
+
application_names: Optional[list[str] | str] = None,
|
|
405
|
+
) -> dict[str, int]:
|
|
406
|
+
"""
|
|
407
|
+
Count unique endpoints per application from METRICS and APP_RESULTS tables.
|
|
408
|
+
|
|
409
|
+
Uses SQL UNION to efficiently count endpoints that have data in EITHER table.
|
|
410
|
+
|
|
411
|
+
:param start: Start time for the query (default: last 24 hours)
|
|
412
|
+
:param end: End time for the query (default: current time)
|
|
413
|
+
:param application_names: Filter by specific application names
|
|
414
|
+
:return: Dictionary mapping application_name to endpoint count
|
|
415
|
+
"""
|
|
416
|
+
# Set default time range
|
|
417
|
+
start = start or (mlrun.utils.datetime_now() - datetime.timedelta(hours=24))
|
|
418
|
+
start, end = self._pre_aggregate_manager.get_start_end(start, end)
|
|
419
|
+
|
|
420
|
+
metrics_table = self._tables[mm_schemas.TimescaleDBTables.METRICS]
|
|
421
|
+
app_results_table = self._tables[mm_schemas.TimescaleDBTables.APP_RESULTS]
|
|
422
|
+
time_column = mm_schemas.WriterEvent.END_INFER_TIME
|
|
423
|
+
app_column = mm_schemas.WriterEvent.APPLICATION_NAME
|
|
424
|
+
endpoint_column = mm_schemas.WriterEvent.ENDPOINT_ID
|
|
425
|
+
|
|
426
|
+
# Build application filter and params
|
|
427
|
+
app_filter_metrics = ""
|
|
428
|
+
app_filter_results = ""
|
|
429
|
+
|
|
430
|
+
if application_names:
|
|
431
|
+
if isinstance(application_names, str):
|
|
432
|
+
application_names = [application_names]
|
|
433
|
+
app_names_list = list(application_names)
|
|
434
|
+
app_placeholders = ", ".join(["%s"] * len(app_names_list))
|
|
435
|
+
app_filter_metrics = f"AND {app_column} IN ({app_placeholders})"
|
|
436
|
+
app_filter_results = f"AND {app_column} IN ({app_placeholders})"
|
|
437
|
+
# Params: metrics (start, end, apps), app_results (start, end, apps)
|
|
438
|
+
params = [start, end] + app_names_list + [start, end] + app_names_list
|
|
439
|
+
else:
|
|
440
|
+
params = [start, end, start, end]
|
|
441
|
+
|
|
442
|
+
# Use UNION to combine endpoints from both METRICS and APP_RESULTS tables
|
|
443
|
+
query_sql = f"""
|
|
444
|
+
SELECT {app_column}, COUNT(DISTINCT {endpoint_column}) as endpoint_count
|
|
445
|
+
FROM (
|
|
446
|
+
SELECT DISTINCT {app_column}, {endpoint_column}
|
|
447
|
+
FROM {metrics_table.full_name()}
|
|
448
|
+
WHERE {time_column} >= %s AND {time_column} <= %s
|
|
449
|
+
{app_filter_metrics}
|
|
450
|
+
|
|
451
|
+
UNION
|
|
452
|
+
|
|
453
|
+
SELECT DISTINCT {app_column}, {endpoint_column}
|
|
454
|
+
FROM {app_results_table.full_name()}
|
|
455
|
+
WHERE {time_column} >= %s AND {time_column} <= %s
|
|
456
|
+
{app_filter_results}
|
|
457
|
+
) combined
|
|
458
|
+
GROUP BY {app_column}
|
|
459
|
+
"""
|
|
460
|
+
|
|
461
|
+
stmt = Statement(query_sql, params)
|
|
462
|
+
result = self._connection.run(query=stmt)
|
|
463
|
+
|
|
464
|
+
if not result or not result.data:
|
|
465
|
+
return {}
|
|
466
|
+
|
|
467
|
+
# Convert result to dict: {application_name: count}
|
|
468
|
+
return {row[0]: row[1] for row in result.data}
|
|
469
|
+
|
|
470
|
+
def get_drift_status(self, *args, **kwargs):
|
|
471
|
+
return self._results_queries.get_drift_status(*args, **kwargs)
|
|
472
|
+
|
|
473
|
+
def get_results_metadata(self, *args, **kwargs):
|
|
474
|
+
return self._results_queries.get_results_metadata(*args, **kwargs)
|
|
475
|
+
|
|
476
|
+
def get_error_count(self, *args, **kwargs):
|
|
477
|
+
return self._results_queries.get_error_count(*args, **kwargs)
|
|
478
|
+
|
|
479
|
+
def count_results_by_status(self, *args, **kwargs):
|
|
480
|
+
return self._results_queries.count_results_by_status(*args, **kwargs)
|
|
481
|
+
|
|
482
|
+
def apply_monitoring_stream_steps(self, *args, **kwargs) -> None:
|
|
483
|
+
return self._stream.apply_monitoring_stream_steps(*args, **kwargs)
|
|
484
|
+
|
|
485
|
+
def handle_model_error(self, *args, **kwargs) -> None:
|
|
486
|
+
return self._stream.handle_model_error(*args, **kwargs)
|
|
487
|
+
|
|
488
|
+
def calculate_latest_metrics(self, *args, **kwargs):
|
|
489
|
+
return self._metrics_queries.calculate_latest_metrics(*args, **kwargs)
|
|
490
|
+
|
|
491
|
+
def get_drift_data(self, *args, **kwargs):
|
|
492
|
+
return self._results_queries.get_drift_data(*args, **kwargs)
|
|
493
|
+
|
|
494
|
+
def add_pre_writer_steps(self, graph, after):
|
|
495
|
+
return graph.add_step(
|
|
496
|
+
"mlrun.model_monitoring.db.tsdb.timescaledb.writer_graph_steps.ProcessBeforeTimescaleDBWriter",
|
|
497
|
+
name="ProcessBeforeTimescaleDBWriter",
|
|
498
|
+
after=after,
|
|
499
|
+
)
|
|
500
|
+
|
|
501
|
+
def apply_writer_steps(self, graph, after, **kwargs) -> None:
|
|
502
|
+
tables = timescaledb_schema.create_table_schemas(self.project)
|
|
503
|
+
|
|
504
|
+
graph.add_step(
|
|
505
|
+
"mlrun.datastore.storeytargets.TimescaleDBStoreyTarget",
|
|
506
|
+
name="tsdb_metrics",
|
|
507
|
+
after=after,
|
|
508
|
+
url=f"ds://{self.profile.name}",
|
|
509
|
+
table=tables[mm_schemas.TimescaleDBTables.METRICS].full_name(),
|
|
510
|
+
time_col=mm_schemas.WriterEvent.END_INFER_TIME,
|
|
511
|
+
columns=[
|
|
512
|
+
mm_schemas.WriterEvent.START_INFER_TIME,
|
|
513
|
+
mm_schemas.MetricData.METRIC_VALUE,
|
|
514
|
+
mm_schemas.WriterEvent.ENDPOINT_ID,
|
|
515
|
+
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
516
|
+
mm_schemas.MetricData.METRIC_NAME,
|
|
517
|
+
],
|
|
518
|
+
max_events=config.model_endpoint_monitoring.writer_graph.max_events,
|
|
519
|
+
flush_after_seconds=config.model_endpoint_monitoring.writer_graph.flush_after_seconds,
|
|
520
|
+
)
|
|
521
|
+
|
|
522
|
+
graph.add_step(
|
|
523
|
+
"mlrun.datastore.storeytargets.TimescaleDBStoreyTarget",
|
|
524
|
+
name="tsdb_app_results",
|
|
525
|
+
after=after,
|
|
526
|
+
url=f"ds://{self.profile.name}",
|
|
527
|
+
table=tables[mm_schemas.TimescaleDBTables.APP_RESULTS].full_name(),
|
|
528
|
+
time_col=mm_schemas.WriterEvent.END_INFER_TIME,
|
|
529
|
+
columns=[
|
|
530
|
+
mm_schemas.WriterEvent.START_INFER_TIME,
|
|
531
|
+
mm_schemas.ResultData.RESULT_VALUE,
|
|
532
|
+
mm_schemas.ResultData.RESULT_STATUS,
|
|
533
|
+
mm_schemas.ResultData.RESULT_EXTRA_DATA,
|
|
534
|
+
mm_schemas.WriterEvent.ENDPOINT_ID,
|
|
535
|
+
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
536
|
+
mm_schemas.ResultData.RESULT_NAME,
|
|
537
|
+
mm_schemas.ResultData.RESULT_KIND,
|
|
538
|
+
],
|
|
539
|
+
max_events=config.model_endpoint_monitoring.writer_graph.max_events,
|
|
540
|
+
flush_after_seconds=config.model_endpoint_monitoring.writer_graph.flush_after_seconds,
|
|
541
|
+
)
|