mlrun 1.10.0rc18__py3-none-any.whl → 1.11.0rc16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +24 -3
- mlrun/__main__.py +0 -4
- mlrun/artifacts/dataset.py +2 -2
- mlrun/artifacts/document.py +6 -1
- mlrun/artifacts/llm_prompt.py +21 -15
- mlrun/artifacts/model.py +3 -3
- mlrun/artifacts/plots.py +1 -1
- mlrun/{model_monitoring/db/tsdb/tdengine → auth}/__init__.py +2 -3
- mlrun/auth/nuclio.py +89 -0
- mlrun/auth/providers.py +429 -0
- mlrun/auth/utils.py +415 -0
- mlrun/common/constants.py +14 -0
- mlrun/common/model_monitoring/helpers.py +123 -0
- mlrun/common/runtimes/constants.py +28 -0
- mlrun/common/schemas/__init__.py +14 -3
- mlrun/common/schemas/alert.py +2 -2
- mlrun/common/schemas/api_gateway.py +3 -0
- mlrun/common/schemas/auth.py +12 -10
- mlrun/common/schemas/client_spec.py +4 -0
- mlrun/common/schemas/constants.py +25 -0
- mlrun/common/schemas/frontend_spec.py +1 -8
- mlrun/common/schemas/function.py +34 -0
- mlrun/common/schemas/hub.py +33 -20
- mlrun/common/schemas/model_monitoring/__init__.py +2 -1
- mlrun/common/schemas/model_monitoring/constants.py +12 -15
- mlrun/common/schemas/model_monitoring/functions.py +13 -4
- mlrun/common/schemas/model_monitoring/model_endpoints.py +11 -0
- mlrun/common/schemas/pipeline.py +1 -1
- mlrun/common/schemas/secret.py +17 -2
- mlrun/common/secrets.py +95 -1
- mlrun/common/types.py +10 -10
- mlrun/config.py +69 -19
- mlrun/data_types/infer.py +2 -2
- mlrun/datastore/__init__.py +12 -5
- mlrun/datastore/azure_blob.py +162 -47
- mlrun/datastore/base.py +274 -10
- mlrun/datastore/datastore.py +7 -2
- mlrun/datastore/datastore_profile.py +84 -22
- mlrun/datastore/model_provider/huggingface_provider.py +225 -41
- mlrun/datastore/model_provider/mock_model_provider.py +87 -0
- mlrun/datastore/model_provider/model_provider.py +206 -74
- mlrun/datastore/model_provider/openai_provider.py +226 -66
- mlrun/datastore/s3.py +39 -18
- mlrun/datastore/sources.py +1 -1
- mlrun/datastore/store_resources.py +4 -4
- mlrun/datastore/storeytargets.py +17 -12
- mlrun/datastore/targets.py +1 -1
- mlrun/datastore/utils.py +25 -6
- mlrun/datastore/v3io.py +1 -1
- mlrun/db/base.py +63 -32
- mlrun/db/httpdb.py +373 -153
- mlrun/db/nopdb.py +54 -21
- mlrun/errors.py +4 -2
- mlrun/execution.py +66 -25
- mlrun/feature_store/api.py +1 -1
- mlrun/feature_store/common.py +1 -1
- mlrun/feature_store/feature_vector_utils.py +1 -1
- mlrun/feature_store/steps.py +8 -6
- mlrun/frameworks/_common/utils.py +3 -3
- mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -1
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
- mlrun/frameworks/_ml_common/utils.py +2 -1
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +4 -3
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +2 -1
- mlrun/frameworks/onnx/dataset.py +2 -1
- mlrun/frameworks/onnx/mlrun_interface.py +2 -1
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +5 -4
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +2 -1
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +2 -1
- mlrun/frameworks/pytorch/utils.py +2 -1
- mlrun/frameworks/sklearn/metric.py +2 -1
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +5 -4
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +2 -1
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +2 -1
- mlrun/hub/__init__.py +52 -0
- mlrun/hub/base.py +142 -0
- mlrun/hub/module.py +172 -0
- mlrun/hub/step.py +113 -0
- mlrun/k8s_utils.py +105 -16
- mlrun/launcher/base.py +15 -7
- mlrun/launcher/local.py +4 -1
- mlrun/model.py +14 -4
- mlrun/model_monitoring/__init__.py +0 -1
- mlrun/model_monitoring/api.py +65 -28
- mlrun/model_monitoring/applications/__init__.py +1 -1
- mlrun/model_monitoring/applications/base.py +299 -128
- mlrun/model_monitoring/applications/context.py +2 -4
- mlrun/model_monitoring/controller.py +132 -58
- mlrun/model_monitoring/db/_schedules.py +38 -29
- mlrun/model_monitoring/db/_stats.py +6 -16
- mlrun/model_monitoring/db/tsdb/__init__.py +9 -7
- mlrun/model_monitoring/db/tsdb/base.py +29 -9
- mlrun/model_monitoring/db/tsdb/preaggregate.py +234 -0
- mlrun/model_monitoring/db/tsdb/stream_graph_steps.py +63 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_metrics_queries.py +414 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_predictions_queries.py +376 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_results_queries.py +590 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connection.py +434 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connector.py +541 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_operations.py +808 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_schema.py +502 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream.py +163 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream_graph_steps.py +60 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_dataframe_processor.py +141 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_query_builder.py +585 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/writer_graph_steps.py +73 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +20 -9
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +235 -51
- mlrun/model_monitoring/features_drift_table.py +2 -1
- mlrun/model_monitoring/helpers.py +30 -6
- mlrun/model_monitoring/stream_processing.py +34 -28
- mlrun/model_monitoring/writer.py +224 -4
- mlrun/package/__init__.py +2 -1
- mlrun/platforms/__init__.py +0 -43
- mlrun/platforms/iguazio.py +8 -4
- mlrun/projects/operations.py +17 -11
- mlrun/projects/pipelines.py +2 -2
- mlrun/projects/project.py +187 -123
- mlrun/run.py +95 -21
- mlrun/runtimes/__init__.py +2 -186
- mlrun/runtimes/base.py +103 -25
- mlrun/runtimes/constants.py +225 -0
- mlrun/runtimes/daskjob.py +5 -2
- mlrun/runtimes/databricks_job/databricks_runtime.py +2 -1
- mlrun/runtimes/local.py +5 -2
- mlrun/runtimes/mounts.py +20 -2
- mlrun/runtimes/nuclio/__init__.py +12 -7
- mlrun/runtimes/nuclio/api_gateway.py +36 -6
- mlrun/runtimes/nuclio/application/application.py +339 -40
- mlrun/runtimes/nuclio/function.py +222 -72
- mlrun/runtimes/nuclio/serving.py +132 -42
- mlrun/runtimes/pod.py +213 -21
- mlrun/runtimes/utils.py +49 -9
- mlrun/secrets.py +99 -14
- mlrun/serving/__init__.py +2 -0
- mlrun/serving/remote.py +84 -11
- mlrun/serving/routers.py +26 -44
- mlrun/serving/server.py +138 -51
- mlrun/serving/serving_wrapper.py +6 -2
- mlrun/serving/states.py +997 -283
- mlrun/serving/steps.py +62 -0
- mlrun/serving/system_steps.py +149 -95
- mlrun/serving/v2_serving.py +9 -10
- mlrun/track/trackers/mlflow_tracker.py +29 -31
- mlrun/utils/helpers.py +292 -94
- mlrun/utils/http.py +9 -2
- mlrun/utils/notifications/notification/base.py +18 -0
- mlrun/utils/notifications/notification/git.py +3 -5
- mlrun/utils/notifications/notification/mail.py +39 -16
- mlrun/utils/notifications/notification/slack.py +2 -4
- mlrun/utils/notifications/notification/webhook.py +2 -5
- mlrun/utils/notifications/notification_pusher.py +3 -3
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +3 -4
- {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/METADATA +63 -74
- {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/RECORD +161 -143
- mlrun/api/schemas/__init__.py +0 -259
- mlrun/db/auth_utils.py +0 -152
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +0 -344
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -75
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +0 -281
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +0 -1266
- {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,590 @@
|
|
|
1
|
+
# Copyright 2025 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from datetime import datetime, timedelta
|
|
16
|
+
from typing import Optional, Union
|
|
17
|
+
|
|
18
|
+
import pandas as pd
|
|
19
|
+
import v3io_frames.client
|
|
20
|
+
|
|
21
|
+
import mlrun
|
|
22
|
+
import mlrun.common.schemas.model_monitoring as mm_schemas
|
|
23
|
+
import mlrun.model_monitoring.db.tsdb.timescaledb.timescaledb_schema as timescaledb_schema
|
|
24
|
+
import mlrun.utils
|
|
25
|
+
from mlrun.model_monitoring.db.tsdb.timescaledb.utils.timescaledb_dataframe_processor import (
|
|
26
|
+
TimescaleDBDataFrameProcessor,
|
|
27
|
+
)
|
|
28
|
+
from mlrun.model_monitoring.db.tsdb.timescaledb.utils.timescaledb_query_builder import (
|
|
29
|
+
TimescaleDBQueryBuilder,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class TimescaleDBResultsQueries:
|
|
34
|
+
"""
|
|
35
|
+
Query class containing results and drift-related query methods for TimescaleDB.
|
|
36
|
+
|
|
37
|
+
Can be used as a mixin or standalone instance with proper initialization.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __init__(
|
|
41
|
+
self,
|
|
42
|
+
connection, # Required parameter
|
|
43
|
+
project: Optional[str] = None,
|
|
44
|
+
pre_aggregate_manager=None,
|
|
45
|
+
tables: Optional[dict] = None,
|
|
46
|
+
):
|
|
47
|
+
"""
|
|
48
|
+
Initialize TimescaleDB results query handler.
|
|
49
|
+
|
|
50
|
+
:param connection: TimescaleDB connection instance (required)
|
|
51
|
+
:param project: Project name
|
|
52
|
+
:param pre_aggregate_manager: PreAggregateManager instance
|
|
53
|
+
:param tables: Dictionary of table schemas
|
|
54
|
+
"""
|
|
55
|
+
self.project = project
|
|
56
|
+
self._connection = connection
|
|
57
|
+
self._pre_aggregate_manager = pre_aggregate_manager
|
|
58
|
+
self.tables = tables
|
|
59
|
+
|
|
60
|
+
def get_drift_status(
|
|
61
|
+
self,
|
|
62
|
+
endpoint_ids: Union[str, list[str]],
|
|
63
|
+
start: Optional[datetime] = None,
|
|
64
|
+
end: Optional[datetime] = None,
|
|
65
|
+
get_raw: bool = False,
|
|
66
|
+
) -> Union[pd.DataFrame, list[v3io_frames.client.RawFrame]]:
|
|
67
|
+
"""Get drift status for specified endpoints.
|
|
68
|
+
|
|
69
|
+
:param endpoint_ids: Endpoint ID(s) to get drift status for
|
|
70
|
+
:param start: Start datetime for filtering
|
|
71
|
+
:param end: End datetime for filtering
|
|
72
|
+
:param get_raw: If True, return raw frame data (not implemented)
|
|
73
|
+
:return: DataFrame with drift status data
|
|
74
|
+
"""
|
|
75
|
+
del get_raw # Suppress unused variable warning (not implemented)
|
|
76
|
+
|
|
77
|
+
agg_func = "max" # Default aggregation function
|
|
78
|
+
|
|
79
|
+
if isinstance(endpoint_ids, str):
|
|
80
|
+
endpoint_ids = [endpoint_ids]
|
|
81
|
+
|
|
82
|
+
# Set default start time and prepare time range with auto-determined interval
|
|
83
|
+
start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
|
|
84
|
+
start, end, interval = TimescaleDBQueryBuilder.prepare_time_range_and_interval(
|
|
85
|
+
self._pre_aggregate_manager, start, end
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
table_schema = self.tables[mm_schemas.TimescaleDBTables.APP_RESULTS]
|
|
89
|
+
filter_query = TimescaleDBQueryBuilder.build_endpoint_filter(endpoint_ids)
|
|
90
|
+
|
|
91
|
+
def build_pre_agg_query():
|
|
92
|
+
# Calculate overall MAX in SQL across all time buckets
|
|
93
|
+
# Use subquery to get time-bucketed data, then MAX over those results
|
|
94
|
+
subquery = table_schema._get_records_query(
|
|
95
|
+
start=start,
|
|
96
|
+
end=end,
|
|
97
|
+
columns_to_filter=[
|
|
98
|
+
timescaledb_schema.TIME_BUCKET_COLUMN,
|
|
99
|
+
f"{agg_func}_{mm_schemas.ResultData.RESULT_STATUS}",
|
|
100
|
+
mm_schemas.WriterEvent.ENDPOINT_ID,
|
|
101
|
+
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
102
|
+
mm_schemas.ResultData.RESULT_NAME,
|
|
103
|
+
],
|
|
104
|
+
filter_query=filter_query,
|
|
105
|
+
agg_funcs=[agg_func],
|
|
106
|
+
interval=interval,
|
|
107
|
+
use_pre_aggregates=True,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
# Use helper to build endpoint aggregation query
|
|
111
|
+
return TimescaleDBQueryBuilder.build_endpoint_aggregation_query(
|
|
112
|
+
subquery=subquery,
|
|
113
|
+
aggregation_columns={
|
|
114
|
+
mm_schemas.ResultData.RESULT_STATUS: f"MAX({agg_func}_{mm_schemas.ResultData.RESULT_STATUS})",
|
|
115
|
+
mm_schemas.WriterEvent.APPLICATION_NAME: f"MAX({mm_schemas.WriterEvent.APPLICATION_NAME})",
|
|
116
|
+
mm_schemas.ResultData.RESULT_NAME: f"MAX({mm_schemas.ResultData.RESULT_NAME})",
|
|
117
|
+
},
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
def build_raw_query():
|
|
121
|
+
columns = [
|
|
122
|
+
f"{mm_schemas.WriterEvent.ENDPOINT_ID} AS {mm_schemas.WriterEvent.ENDPOINT_ID}",
|
|
123
|
+
f"MAX({mm_schemas.ResultData.RESULT_STATUS}) as {mm_schemas.ResultData.RESULT_STATUS}",
|
|
124
|
+
]
|
|
125
|
+
group_by_columns = [mm_schemas.WriterEvent.ENDPOINT_ID]
|
|
126
|
+
|
|
127
|
+
# Build filter using query builder utilities
|
|
128
|
+
filters = [
|
|
129
|
+
filter_query,
|
|
130
|
+
f"{mm_schemas.ResultData.RESULT_STATUS} IS NOT NULL",
|
|
131
|
+
]
|
|
132
|
+
enhanced_filter_query = TimescaleDBQueryBuilder.combine_filters(filters)
|
|
133
|
+
|
|
134
|
+
return table_schema._get_records_query(
|
|
135
|
+
start=start,
|
|
136
|
+
end=end,
|
|
137
|
+
columns_to_filter=columns,
|
|
138
|
+
filter_query=enhanced_filter_query,
|
|
139
|
+
group_by=group_by_columns,
|
|
140
|
+
order_by=mm_schemas.WriterEvent.ENDPOINT_ID,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
# Column mapping rules for pre-aggregate results
|
|
144
|
+
column_mapping_rules = {
|
|
145
|
+
mm_schemas.ResultData.RESULT_STATUS: [
|
|
146
|
+
f"{agg_func}_{mm_schemas.ResultData.RESULT_STATUS}",
|
|
147
|
+
f"{agg_func}_{mm_schemas.ResultData.RESULT_STATUS}",
|
|
148
|
+
mm_schemas.ResultData.RESULT_STATUS,
|
|
149
|
+
],
|
|
150
|
+
mm_schemas.WriterEvent.ENDPOINT_ID: [mm_schemas.WriterEvent.ENDPOINT_ID],
|
|
151
|
+
mm_schemas.WriterEvent.APPLICATION_NAME: [
|
|
152
|
+
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
153
|
+
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
154
|
+
],
|
|
155
|
+
mm_schemas.ResultData.RESULT_NAME: [mm_schemas.ResultData.RESULT_NAME],
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
return self._connection.execute_with_fallback(
|
|
159
|
+
self._pre_aggregate_manager,
|
|
160
|
+
build_pre_agg_query,
|
|
161
|
+
build_raw_query,
|
|
162
|
+
interval=interval,
|
|
163
|
+
agg_funcs=[agg_func],
|
|
164
|
+
column_mapping_rules=column_mapping_rules,
|
|
165
|
+
debug_name="drift_status",
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
def get_error_count(
|
|
169
|
+
self,
|
|
170
|
+
endpoint_ids: Union[str, list[str]],
|
|
171
|
+
start: Optional[datetime] = None,
|
|
172
|
+
end: Optional[datetime] = None,
|
|
173
|
+
) -> pd.DataFrame:
|
|
174
|
+
"""Get error count with optional pre-aggregate optimization."""
|
|
175
|
+
|
|
176
|
+
if isinstance(endpoint_ids, str):
|
|
177
|
+
endpoint_ids = [endpoint_ids]
|
|
178
|
+
|
|
179
|
+
# Set default start time and prepare time range with auto-determined interval
|
|
180
|
+
start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
|
|
181
|
+
start, end, interval = TimescaleDBQueryBuilder.prepare_time_range_and_interval(
|
|
182
|
+
self._pre_aggregate_manager, start, end
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
table_schema = self.tables[mm_schemas.TimescaleDBTables.ERRORS]
|
|
186
|
+
filter_query = TimescaleDBQueryBuilder.build_endpoint_filter(endpoint_ids)
|
|
187
|
+
|
|
188
|
+
def build_pre_agg_query():
|
|
189
|
+
# Calculate total error count in SQL across all time buckets
|
|
190
|
+
# Use subquery to get time-bucketed data, then SUM over those results
|
|
191
|
+
subquery = table_schema._get_records_query(
|
|
192
|
+
start=start,
|
|
193
|
+
end=end,
|
|
194
|
+
columns_to_filter=[
|
|
195
|
+
timescaledb_schema.TIME_BUCKET_COLUMN,
|
|
196
|
+
f"count_{mm_schemas.EventFieldType.MODEL_ERROR}",
|
|
197
|
+
mm_schemas.WriterEvent.ENDPOINT_ID,
|
|
198
|
+
],
|
|
199
|
+
filter_query=filter_query, # Only endpoint filter, no error_type
|
|
200
|
+
agg_funcs=["count"],
|
|
201
|
+
interval=interval,
|
|
202
|
+
use_pre_aggregates=True,
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
# Use helper to build endpoint aggregation query
|
|
206
|
+
return TimescaleDBQueryBuilder.build_endpoint_aggregation_query(
|
|
207
|
+
subquery=subquery,
|
|
208
|
+
aggregation_columns={
|
|
209
|
+
mm_schemas.EventFieldType.ERROR_COUNT: f"SUM(count_{mm_schemas.EventFieldType.MODEL_ERROR})"
|
|
210
|
+
},
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
def build_raw_query():
|
|
214
|
+
# Build filter using query builder utilities
|
|
215
|
+
filters = [
|
|
216
|
+
filter_query,
|
|
217
|
+
f"{mm_schemas.EventFieldType.ERROR_TYPE} = '{mm_schemas.EventFieldType.INFER_ERROR}'",
|
|
218
|
+
]
|
|
219
|
+
enhanced_filter_query = TimescaleDBQueryBuilder.combine_filters(filters)
|
|
220
|
+
|
|
221
|
+
columns = [
|
|
222
|
+
f"{mm_schemas.WriterEvent.ENDPOINT_ID} AS {mm_schemas.WriterEvent.ENDPOINT_ID}",
|
|
223
|
+
f"COUNT(*) AS {mm_schemas.EventFieldType.ERROR_COUNT}",
|
|
224
|
+
]
|
|
225
|
+
group_by_columns = [mm_schemas.WriterEvent.ENDPOINT_ID]
|
|
226
|
+
|
|
227
|
+
return table_schema._get_records_query(
|
|
228
|
+
start=start,
|
|
229
|
+
end=end,
|
|
230
|
+
columns_to_filter=columns,
|
|
231
|
+
filter_query=enhanced_filter_query,
|
|
232
|
+
group_by=group_by_columns,
|
|
233
|
+
order_by=mm_schemas.WriterEvent.ENDPOINT_ID,
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
# Column mapping rules for pre-aggregate results
|
|
237
|
+
column_mapping_rules = {
|
|
238
|
+
mm_schemas.EventFieldType.ERROR_COUNT: [
|
|
239
|
+
f"count_{mm_schemas.EventFieldType.MODEL_ERROR}",
|
|
240
|
+
"count",
|
|
241
|
+
mm_schemas.EventFieldType.ERROR_COUNT,
|
|
242
|
+
],
|
|
243
|
+
mm_schemas.WriterEvent.ENDPOINT_ID: [mm_schemas.WriterEvent.ENDPOINT_ID],
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
return self._connection.execute_with_fallback(
|
|
247
|
+
self._pre_aggregate_manager,
|
|
248
|
+
build_pre_agg_query,
|
|
249
|
+
build_raw_query,
|
|
250
|
+
interval=interval,
|
|
251
|
+
agg_funcs=["count"],
|
|
252
|
+
column_mapping_rules=column_mapping_rules,
|
|
253
|
+
debug_name="error_count",
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
def get_results_metadata(
|
|
257
|
+
self,
|
|
258
|
+
endpoint_id: Union[str, list[str]],
|
|
259
|
+
start: Optional[datetime] = None,
|
|
260
|
+
end: Optional[datetime] = None,
|
|
261
|
+
interval: Optional[str] = None,
|
|
262
|
+
) -> pd.DataFrame:
|
|
263
|
+
"""Get results metadata with optional pre-aggregate optimization."""
|
|
264
|
+
|
|
265
|
+
start, end = self._pre_aggregate_manager.get_start_end(start, end)
|
|
266
|
+
start, end = self._pre_aggregate_manager.align_time_range(start, end, interval)
|
|
267
|
+
|
|
268
|
+
table_schema = self.tables[mm_schemas.TimescaleDBTables.APP_RESULTS]
|
|
269
|
+
filter_query = TimescaleDBQueryBuilder.build_endpoint_filter(endpoint_id)
|
|
270
|
+
|
|
271
|
+
columns = [
|
|
272
|
+
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
273
|
+
mm_schemas.ResultData.RESULT_NAME,
|
|
274
|
+
mm_schemas.ResultData.RESULT_KIND,
|
|
275
|
+
mm_schemas.WriterEvent.ENDPOINT_ID,
|
|
276
|
+
]
|
|
277
|
+
|
|
278
|
+
query = table_schema._get_records_query(
|
|
279
|
+
start=start,
|
|
280
|
+
end=end,
|
|
281
|
+
columns_to_filter=columns,
|
|
282
|
+
filter_query=filter_query,
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
result = self._connection.run(query=query)
|
|
286
|
+
df = TimescaleDBDataFrameProcessor.from_query_result(result)
|
|
287
|
+
|
|
288
|
+
# Get distinct values
|
|
289
|
+
if not df.empty:
|
|
290
|
+
df = df.drop_duplicates()
|
|
291
|
+
|
|
292
|
+
return df
|
|
293
|
+
|
|
294
|
+
def count_results_by_status(
|
|
295
|
+
self,
|
|
296
|
+
start: Optional[Union[datetime, str]] = None,
|
|
297
|
+
end: Optional[Union[datetime, str]] = None,
|
|
298
|
+
endpoint_ids: Optional[Union[str, list[str]]] = None,
|
|
299
|
+
application_names: Optional[Union[str, list[str]]] = None,
|
|
300
|
+
result_status_list: Optional[list[int]] = None,
|
|
301
|
+
) -> dict[tuple[str, int], int]:
|
|
302
|
+
"""
|
|
303
|
+
Read results status from the TSDB and return a dictionary of results statuses by application name.
|
|
304
|
+
|
|
305
|
+
:param start: The start time in which to read the results. By default, the last 24 hours are read.
|
|
306
|
+
:param end: The end time in which to read the results. Default is the current time (now).
|
|
307
|
+
:param endpoint_ids: Optional list of endpoint ids to filter the results by. By default, all
|
|
308
|
+
endpoint ids are included.
|
|
309
|
+
:param application_names: Optional list of application names to filter the results by. By default, all
|
|
310
|
+
application are included.
|
|
311
|
+
:param result_status_list: Optional list of result statuses to filter the results by. By default, all
|
|
312
|
+
result statuses are included.
|
|
313
|
+
|
|
314
|
+
:return: A dictionary where the key is a tuple of (application_name, result_status) and the value is the total
|
|
315
|
+
number of results with that status for that application.
|
|
316
|
+
For example:
|
|
317
|
+
{
|
|
318
|
+
('app1', 1): 10,
|
|
319
|
+
('app1', 2): 5
|
|
320
|
+
}
|
|
321
|
+
"""
|
|
322
|
+
# Set defaults
|
|
323
|
+
now = mlrun.utils.datetime_now()
|
|
324
|
+
start = start or (now - timedelta(hours=24))
|
|
325
|
+
end = end or now
|
|
326
|
+
|
|
327
|
+
# Convert string dates to datetime if needed
|
|
328
|
+
if isinstance(start, str):
|
|
329
|
+
start = datetime.fromisoformat(start)
|
|
330
|
+
if isinstance(end, str):
|
|
331
|
+
end = datetime.fromisoformat(end)
|
|
332
|
+
|
|
333
|
+
table_schema = self.tables[mm_schemas.TimescaleDBTables.APP_RESULTS]
|
|
334
|
+
|
|
335
|
+
# Build filter conditions
|
|
336
|
+
filter_conditions = []
|
|
337
|
+
|
|
338
|
+
if endpoint_ids:
|
|
339
|
+
endpoint_filter = TimescaleDBQueryBuilder.build_endpoint_filter(
|
|
340
|
+
endpoint_ids
|
|
341
|
+
)
|
|
342
|
+
filter_conditions.append(endpoint_filter)
|
|
343
|
+
|
|
344
|
+
if application_names:
|
|
345
|
+
app_filter = TimescaleDBQueryBuilder.build_application_filter(
|
|
346
|
+
application_names
|
|
347
|
+
)
|
|
348
|
+
filter_conditions.append(app_filter)
|
|
349
|
+
|
|
350
|
+
if result_status_list:
|
|
351
|
+
if len(result_status_list) == 1:
|
|
352
|
+
status_filter = (
|
|
353
|
+
f"{mm_schemas.ResultData.RESULT_STATUS} = {result_status_list[0]}"
|
|
354
|
+
)
|
|
355
|
+
else:
|
|
356
|
+
status_list = ", ".join(map(str, result_status_list))
|
|
357
|
+
status_filter = (
|
|
358
|
+
f"{mm_schemas.ResultData.RESULT_STATUS} IN ({status_list})"
|
|
359
|
+
)
|
|
360
|
+
filter_conditions.append(status_filter)
|
|
361
|
+
|
|
362
|
+
filter_query = TimescaleDBQueryBuilder.combine_filters(filter_conditions)
|
|
363
|
+
|
|
364
|
+
# Build the aggregation query using the enhanced _get_records_query
|
|
365
|
+
columns = [
|
|
366
|
+
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
367
|
+
mm_schemas.ResultData.RESULT_STATUS,
|
|
368
|
+
"COUNT(*) AS count",
|
|
369
|
+
]
|
|
370
|
+
|
|
371
|
+
group_by_columns = [
|
|
372
|
+
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
373
|
+
mm_schemas.ResultData.RESULT_STATUS,
|
|
374
|
+
]
|
|
375
|
+
|
|
376
|
+
order_by_clause = f"{mm_schemas.WriterEvent.APPLICATION_NAME}, {mm_schemas.ResultData.RESULT_STATUS}"
|
|
377
|
+
|
|
378
|
+
query = table_schema._get_records_query(
|
|
379
|
+
start=start,
|
|
380
|
+
end=end,
|
|
381
|
+
columns_to_filter=columns,
|
|
382
|
+
filter_query=filter_query,
|
|
383
|
+
group_by=group_by_columns,
|
|
384
|
+
order_by=order_by_clause,
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
result = self._connection.run(query=query)
|
|
388
|
+
|
|
389
|
+
if not result or not result.data:
|
|
390
|
+
return {}
|
|
391
|
+
|
|
392
|
+
return {(row[0].lower(), row[1]): row[2] for row in result.data}
|
|
393
|
+
|
|
394
|
+
def get_drift_data(
|
|
395
|
+
self,
|
|
396
|
+
start: datetime,
|
|
397
|
+
end: datetime,
|
|
398
|
+
interval: Optional[str] = None,
|
|
399
|
+
) -> mm_schemas.ModelEndpointDriftValues:
|
|
400
|
+
"""
|
|
401
|
+
Get drift data aggregated by time intervals, showing the count of suspected and detected drift events.
|
|
402
|
+
|
|
403
|
+
This method queries the app_results table for drift-related statuses (potential_detection=1, detected=2)
|
|
404
|
+
and aggregates them by time intervals, counting the maximum drift status per endpoint per interval.
|
|
405
|
+
Uses pre-aggregate optimization when available.
|
|
406
|
+
|
|
407
|
+
:param start: Start time for the query
|
|
408
|
+
:param end: End time for the query
|
|
409
|
+
:param interval: Optional time interval for aggregation (e.g., "1 hour", "30 minutes").
|
|
410
|
+
If not provided, will be automatically determined based on query duration.
|
|
411
|
+
:return: ModelEndpointDriftValues containing time-binned drift counts
|
|
412
|
+
"""
|
|
413
|
+
# Prepare time range and interval using helper
|
|
414
|
+
start, end, interval = TimescaleDBQueryBuilder.prepare_time_range_and_interval(
|
|
415
|
+
self._pre_aggregate_manager, start, end, interval
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
# Build status filter for drift-related statuses only
|
|
419
|
+
suspected_status = mm_schemas.ResultStatusApp.potential_detection.value # 1
|
|
420
|
+
detected_status = mm_schemas.ResultStatusApp.detected.value # 2
|
|
421
|
+
|
|
422
|
+
app_results_table = self.tables[mm_schemas.TimescaleDBTables.APP_RESULTS]
|
|
423
|
+
|
|
424
|
+
def build_raw_query():
|
|
425
|
+
# Use TimescaleDB's time_bucket function for interval aggregation
|
|
426
|
+
return f"""
|
|
427
|
+
WITH drift_intervals AS (
|
|
428
|
+
SELECT
|
|
429
|
+
time_bucket('{interval}', {mm_schemas.WriterEvent.END_INFER_TIME}) AS bucket_start,
|
|
430
|
+
{mm_schemas.WriterEvent.ENDPOINT_ID},
|
|
431
|
+
MAX({mm_schemas.ResultData.RESULT_STATUS}) AS max_status
|
|
432
|
+
FROM {app_results_table.full_name()}
|
|
433
|
+
WHERE {mm_schemas.ResultData.RESULT_STATUS} IN (%s, %s)
|
|
434
|
+
AND {mm_schemas.WriterEvent.END_INFER_TIME} >= %s
|
|
435
|
+
AND {mm_schemas.WriterEvent.END_INFER_TIME} <= %s
|
|
436
|
+
GROUP BY bucket_start, {mm_schemas.WriterEvent.ENDPOINT_ID}
|
|
437
|
+
)
|
|
438
|
+
SELECT
|
|
439
|
+
bucket_start,
|
|
440
|
+
max_status,
|
|
441
|
+
COUNT(*) AS status_count
|
|
442
|
+
FROM drift_intervals
|
|
443
|
+
GROUP BY bucket_start, max_status
|
|
444
|
+
ORDER BY bucket_start, max_status
|
|
445
|
+
"""
|
|
446
|
+
|
|
447
|
+
raw_query = build_raw_query()
|
|
448
|
+
from mlrun.model_monitoring.db.tsdb.timescaledb.timescaledb_connection import (
|
|
449
|
+
Statement,
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
stmt = Statement(raw_query, (suspected_status, detected_status, start, end))
|
|
453
|
+
result = self._connection.run(query=stmt)
|
|
454
|
+
|
|
455
|
+
if not result or not result.data:
|
|
456
|
+
return mm_schemas.ModelEndpointDriftValues(values=[])
|
|
457
|
+
|
|
458
|
+
# Convert to DataFrame for easier processing
|
|
459
|
+
df = TimescaleDBDataFrameProcessor.from_query_result(result)
|
|
460
|
+
|
|
461
|
+
# Use the shared _df_to_drift_data method to convert to the expected format
|
|
462
|
+
return self._df_to_drift_data(df)
|
|
463
|
+
|
|
464
|
+
def _df_to_drift_data(
|
|
465
|
+
self, df: pd.DataFrame
|
|
466
|
+
) -> mm_schemas.ModelEndpointDriftValues:
|
|
467
|
+
"""
|
|
468
|
+
Convert DataFrame with drift data to ModelEndpointDriftValues format.
|
|
469
|
+
|
|
470
|
+
Expected DataFrame columns:
|
|
471
|
+
- bucket_start: timestamp of the interval bucket
|
|
472
|
+
- max_status: the maximum drift status in that bucket (1=suspected, 2=detected)
|
|
473
|
+
- status_count: count of endpoints with that status in the bucket
|
|
474
|
+
|
|
475
|
+
:param df: DataFrame with aggregated drift data
|
|
476
|
+
:return: ModelEndpointDriftValues with time-binned counts
|
|
477
|
+
"""
|
|
478
|
+
if df.empty:
|
|
479
|
+
return mm_schemas.ModelEndpointDriftValues(values=[])
|
|
480
|
+
|
|
481
|
+
suspected_val = mm_schemas.ResultStatusApp.potential_detection.value # 1
|
|
482
|
+
detected_val = mm_schemas.ResultStatusApp.detected.value # 2
|
|
483
|
+
|
|
484
|
+
# Rename columns to match expected format
|
|
485
|
+
df = df.rename(
|
|
486
|
+
columns={
|
|
487
|
+
"bucket_start": "_wstart",
|
|
488
|
+
"max_status": f"max({mm_schemas.ResultData.RESULT_STATUS})",
|
|
489
|
+
"status_count": "count",
|
|
490
|
+
}
|
|
491
|
+
)
|
|
492
|
+
|
|
493
|
+
# Pivot the data to have separate columns for suspected and detected counts
|
|
494
|
+
aggregated_df = (
|
|
495
|
+
df.groupby(["_wstart", f"max({mm_schemas.ResultData.RESULT_STATUS})"])[
|
|
496
|
+
"count"
|
|
497
|
+
]
|
|
498
|
+
.sum() # Sum counts for each interval x result-status combination
|
|
499
|
+
.unstack() # Create separate columns for each result-status
|
|
500
|
+
.reindex(
|
|
501
|
+
columns=[suspected_val, detected_val], fill_value=0
|
|
502
|
+
) # Ensure both columns exist
|
|
503
|
+
.fillna(0)
|
|
504
|
+
.astype(int)
|
|
505
|
+
.rename(
|
|
506
|
+
columns={
|
|
507
|
+
suspected_val: "count_suspected",
|
|
508
|
+
detected_val: "count_detected",
|
|
509
|
+
}
|
|
510
|
+
)
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
# Convert to list of tuples: (timestamp, count_suspected, count_detected)
|
|
514
|
+
values = list(
|
|
515
|
+
zip(
|
|
516
|
+
aggregated_df.index,
|
|
517
|
+
aggregated_df["count_suspected"],
|
|
518
|
+
aggregated_df["count_detected"],
|
|
519
|
+
)
|
|
520
|
+
)
|
|
521
|
+
|
|
522
|
+
return mm_schemas.ModelEndpointDriftValues(values=values)
|
|
523
|
+
|
|
524
|
+
def read_results_data_impl(
|
|
525
|
+
self,
|
|
526
|
+
*,
|
|
527
|
+
endpoint_id: Optional[str] = None,
|
|
528
|
+
start: datetime,
|
|
529
|
+
end: datetime,
|
|
530
|
+
metrics: Optional[list[mm_schemas.ModelEndpointMonitoringMetric]] = None,
|
|
531
|
+
with_result_extra_data: bool = False,
|
|
532
|
+
timestamp_column: Optional[str] = None,
|
|
533
|
+
) -> pd.DataFrame:
|
|
534
|
+
"""Read results data from TimescaleDB (app_results table only) - returns DataFrame.
|
|
535
|
+
|
|
536
|
+
:param endpoint_id: Endpoint ID to filter by, or None to get all endpoints
|
|
537
|
+
:param start: Start time
|
|
538
|
+
:param end: End time
|
|
539
|
+
:param metrics: List of metrics to filter by, or None to get all results
|
|
540
|
+
:param with_result_extra_data: Whether to include extra data column
|
|
541
|
+
:param timestamp_column: Optional timestamp column to use for time filtering
|
|
542
|
+
:return: DataFrame with results data
|
|
543
|
+
"""
|
|
544
|
+
|
|
545
|
+
table_schema = self.tables[mm_schemas.TimescaleDBTables.APP_RESULTS]
|
|
546
|
+
name_column = mm_schemas.ResultData.RESULT_NAME
|
|
547
|
+
value_column = mm_schemas.ResultData.RESULT_VALUE
|
|
548
|
+
columns = [
|
|
549
|
+
table_schema.time_column,
|
|
550
|
+
mm_schemas.WriterEvent.START_INFER_TIME,
|
|
551
|
+
mm_schemas.WriterEvent.ENDPOINT_ID,
|
|
552
|
+
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
553
|
+
name_column,
|
|
554
|
+
value_column,
|
|
555
|
+
mm_schemas.ResultData.RESULT_STATUS,
|
|
556
|
+
mm_schemas.ResultData.RESULT_KIND,
|
|
557
|
+
]
|
|
558
|
+
if with_result_extra_data:
|
|
559
|
+
columns.append(mm_schemas.ResultData.RESULT_EXTRA_DATA)
|
|
560
|
+
|
|
561
|
+
metrics_condition = TimescaleDBQueryBuilder.build_results_filter(metrics)
|
|
562
|
+
endpoint_filter = TimescaleDBQueryBuilder.build_endpoint_filter(endpoint_id)
|
|
563
|
+
|
|
564
|
+
# Combine filters using query builder utilities
|
|
565
|
+
filters = [endpoint_filter, metrics_condition]
|
|
566
|
+
filter_query = TimescaleDBQueryBuilder.combine_filters(filters)
|
|
567
|
+
|
|
568
|
+
# Use shared utility for consistent query building with fallback
|
|
569
|
+
df = TimescaleDBQueryBuilder.build_read_data_with_fallback(
|
|
570
|
+
connection=self._connection,
|
|
571
|
+
pre_aggregate_manager=self._pre_aggregate_manager,
|
|
572
|
+
table_schema=table_schema,
|
|
573
|
+
start=start,
|
|
574
|
+
end=end,
|
|
575
|
+
columns=columns,
|
|
576
|
+
filter_query=filter_query,
|
|
577
|
+
name_column=name_column,
|
|
578
|
+
value_column=value_column,
|
|
579
|
+
debug_name="read_results_data",
|
|
580
|
+
timestamp_column=timestamp_column,
|
|
581
|
+
)
|
|
582
|
+
|
|
583
|
+
if not df.empty:
|
|
584
|
+
df[table_schema.time_column] = pd.to_datetime(df[table_schema.time_column])
|
|
585
|
+
df.set_index(table_schema.time_column, inplace=True)
|
|
586
|
+
|
|
587
|
+
if not with_result_extra_data:
|
|
588
|
+
df[mm_schemas.ResultData.RESULT_EXTRA_DATA] = ""
|
|
589
|
+
|
|
590
|
+
return df
|