mlrun 1.10.0rc40__py3-none-any.whl → 1.11.0rc16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (150) hide show
  1. mlrun/__init__.py +3 -2
  2. mlrun/__main__.py +0 -4
  3. mlrun/artifacts/dataset.py +2 -2
  4. mlrun/artifacts/plots.py +1 -1
  5. mlrun/{model_monitoring/db/tsdb/tdengine → auth}/__init__.py +2 -3
  6. mlrun/auth/nuclio.py +89 -0
  7. mlrun/auth/providers.py +429 -0
  8. mlrun/auth/utils.py +415 -0
  9. mlrun/common/constants.py +7 -0
  10. mlrun/common/model_monitoring/helpers.py +41 -4
  11. mlrun/common/runtimes/constants.py +28 -0
  12. mlrun/common/schemas/__init__.py +13 -3
  13. mlrun/common/schemas/alert.py +2 -2
  14. mlrun/common/schemas/api_gateway.py +3 -0
  15. mlrun/common/schemas/auth.py +10 -10
  16. mlrun/common/schemas/client_spec.py +4 -0
  17. mlrun/common/schemas/constants.py +25 -0
  18. mlrun/common/schemas/frontend_spec.py +1 -8
  19. mlrun/common/schemas/function.py +24 -0
  20. mlrun/common/schemas/hub.py +3 -2
  21. mlrun/common/schemas/model_monitoring/__init__.py +1 -1
  22. mlrun/common/schemas/model_monitoring/constants.py +2 -2
  23. mlrun/common/schemas/secret.py +17 -2
  24. mlrun/common/secrets.py +95 -1
  25. mlrun/common/types.py +10 -10
  26. mlrun/config.py +53 -15
  27. mlrun/data_types/infer.py +2 -2
  28. mlrun/datastore/__init__.py +2 -3
  29. mlrun/datastore/base.py +274 -10
  30. mlrun/datastore/datastore.py +1 -1
  31. mlrun/datastore/datastore_profile.py +49 -17
  32. mlrun/datastore/model_provider/huggingface_provider.py +6 -2
  33. mlrun/datastore/model_provider/model_provider.py +2 -2
  34. mlrun/datastore/model_provider/openai_provider.py +2 -2
  35. mlrun/datastore/s3.py +15 -16
  36. mlrun/datastore/sources.py +1 -1
  37. mlrun/datastore/store_resources.py +4 -4
  38. mlrun/datastore/storeytargets.py +16 -10
  39. mlrun/datastore/targets.py +1 -1
  40. mlrun/datastore/utils.py +16 -3
  41. mlrun/datastore/v3io.py +1 -1
  42. mlrun/db/base.py +36 -12
  43. mlrun/db/httpdb.py +316 -101
  44. mlrun/db/nopdb.py +29 -11
  45. mlrun/errors.py +4 -2
  46. mlrun/execution.py +11 -12
  47. mlrun/feature_store/api.py +1 -1
  48. mlrun/feature_store/common.py +1 -1
  49. mlrun/feature_store/feature_vector_utils.py +1 -1
  50. mlrun/feature_store/steps.py +8 -6
  51. mlrun/frameworks/_common/utils.py +3 -3
  52. mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
  53. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -1
  54. mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
  55. mlrun/frameworks/_ml_common/utils.py +2 -1
  56. mlrun/frameworks/auto_mlrun/auto_mlrun.py +4 -3
  57. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +2 -1
  58. mlrun/frameworks/onnx/dataset.py +2 -1
  59. mlrun/frameworks/onnx/mlrun_interface.py +2 -1
  60. mlrun/frameworks/pytorch/callbacks/logging_callback.py +5 -4
  61. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +2 -1
  62. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +2 -1
  63. mlrun/frameworks/pytorch/utils.py +2 -1
  64. mlrun/frameworks/sklearn/metric.py +2 -1
  65. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +5 -4
  66. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +2 -1
  67. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +2 -1
  68. mlrun/hub/__init__.py +37 -0
  69. mlrun/hub/base.py +142 -0
  70. mlrun/hub/module.py +67 -76
  71. mlrun/hub/step.py +113 -0
  72. mlrun/launcher/base.py +2 -1
  73. mlrun/launcher/local.py +2 -1
  74. mlrun/model.py +12 -2
  75. mlrun/model_monitoring/__init__.py +0 -1
  76. mlrun/model_monitoring/api.py +2 -2
  77. mlrun/model_monitoring/applications/base.py +20 -6
  78. mlrun/model_monitoring/applications/context.py +1 -0
  79. mlrun/model_monitoring/controller.py +7 -17
  80. mlrun/model_monitoring/db/_schedules.py +2 -16
  81. mlrun/model_monitoring/db/_stats.py +2 -13
  82. mlrun/model_monitoring/db/tsdb/__init__.py +9 -7
  83. mlrun/model_monitoring/db/tsdb/base.py +2 -4
  84. mlrun/model_monitoring/db/tsdb/preaggregate.py +234 -0
  85. mlrun/model_monitoring/db/tsdb/stream_graph_steps.py +63 -0
  86. mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_metrics_queries.py +414 -0
  87. mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_predictions_queries.py +376 -0
  88. mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_results_queries.py +590 -0
  89. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connection.py +434 -0
  90. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connector.py +541 -0
  91. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_operations.py +808 -0
  92. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_schema.py +502 -0
  93. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream.py +163 -0
  94. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream_graph_steps.py +60 -0
  95. mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_dataframe_processor.py +141 -0
  96. mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_query_builder.py +585 -0
  97. mlrun/model_monitoring/db/tsdb/timescaledb/writer_graph_steps.py +73 -0
  98. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +4 -6
  99. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +147 -79
  100. mlrun/model_monitoring/features_drift_table.py +2 -1
  101. mlrun/model_monitoring/helpers.py +2 -1
  102. mlrun/model_monitoring/stream_processing.py +18 -16
  103. mlrun/model_monitoring/writer.py +4 -3
  104. mlrun/package/__init__.py +2 -1
  105. mlrun/platforms/__init__.py +0 -44
  106. mlrun/platforms/iguazio.py +1 -1
  107. mlrun/projects/operations.py +11 -10
  108. mlrun/projects/project.py +81 -82
  109. mlrun/run.py +4 -7
  110. mlrun/runtimes/__init__.py +2 -204
  111. mlrun/runtimes/base.py +89 -21
  112. mlrun/runtimes/constants.py +225 -0
  113. mlrun/runtimes/daskjob.py +4 -2
  114. mlrun/runtimes/databricks_job/databricks_runtime.py +2 -1
  115. mlrun/runtimes/mounts.py +5 -0
  116. mlrun/runtimes/nuclio/__init__.py +12 -8
  117. mlrun/runtimes/nuclio/api_gateway.py +36 -6
  118. mlrun/runtimes/nuclio/application/application.py +200 -32
  119. mlrun/runtimes/nuclio/function.py +154 -49
  120. mlrun/runtimes/nuclio/serving.py +55 -42
  121. mlrun/runtimes/pod.py +59 -10
  122. mlrun/secrets.py +46 -2
  123. mlrun/serving/__init__.py +2 -0
  124. mlrun/serving/remote.py +5 -5
  125. mlrun/serving/routers.py +3 -3
  126. mlrun/serving/server.py +46 -43
  127. mlrun/serving/serving_wrapper.py +6 -2
  128. mlrun/serving/states.py +554 -207
  129. mlrun/serving/steps.py +1 -1
  130. mlrun/serving/system_steps.py +42 -33
  131. mlrun/track/trackers/mlflow_tracker.py +29 -31
  132. mlrun/utils/helpers.py +89 -16
  133. mlrun/utils/http.py +9 -2
  134. mlrun/utils/notifications/notification/git.py +1 -1
  135. mlrun/utils/notifications/notification/mail.py +39 -16
  136. mlrun/utils/notifications/notification_pusher.py +2 -2
  137. mlrun/utils/version/version.json +2 -2
  138. mlrun/utils/version/version.py +3 -4
  139. {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/METADATA +39 -49
  140. {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/RECORD +144 -130
  141. mlrun/db/auth_utils.py +0 -152
  142. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +0 -343
  143. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -75
  144. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +0 -281
  145. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +0 -1368
  146. mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py +0 -51
  147. {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/WHEEL +0 -0
  148. {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/entry_points.txt +0 -0
  149. {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/licenses/LICENSE +0 -0
  150. {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,541 @@
1
+ # Copyright 2025 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import datetime
16
+ from typing import Optional
17
+
18
+ import pandas as pd
19
+
20
+ import mlrun
21
+ import mlrun.common.model_monitoring.helpers
22
+ import mlrun.common.schemas.model_monitoring as mm_schemas
23
+ import mlrun.model_monitoring.db.tsdb.timescaledb.timescaledb_schema as timescaledb_schema
24
+ from mlrun.config import config
25
+ from mlrun.datastore.datastore_profile import DatastoreProfilePostgreSQL
26
+ from mlrun.model_monitoring.db import TSDBConnector
27
+ from mlrun.model_monitoring.db.tsdb.preaggregate import (
28
+ PreAggregateConfig,
29
+ PreAggregateManager,
30
+ )
31
+ from mlrun.model_monitoring.db.tsdb.timescaledb.queries.timescaledb_metrics_queries import (
32
+ TimescaleDBMetricsQueries,
33
+ )
34
+ from mlrun.model_monitoring.db.tsdb.timescaledb.queries.timescaledb_predictions_queries import (
35
+ TimescaleDBPredictionsQueries,
36
+ )
37
+ from mlrun.model_monitoring.db.tsdb.timescaledb.queries.timescaledb_results_queries import (
38
+ TimescaleDBResultsQueries,
39
+ )
40
+ from mlrun.model_monitoring.db.tsdb.timescaledb.timescaledb_connection import (
41
+ Statement,
42
+ TimescaleDBConnection,
43
+ )
44
+ from mlrun.model_monitoring.db.tsdb.timescaledb.timescaledb_operations import (
45
+ TimescaleDBOperationsManager,
46
+ )
47
+ from mlrun.model_monitoring.db.tsdb.timescaledb.timescaledb_stream import (
48
+ TimescaleDBStreamProcessor,
49
+ )
50
+ from mlrun.utils import logger
51
+
52
+
53
+ class TimescaleDBConnector(TSDBConnector):
54
+ """
55
+ Complete TimescaleDB TSDB connector using composition pattern.
56
+
57
+ Uses composition for all specialized functionality:
58
+ - TimescaleDBMetricsQueries, TimescaleDBPredictionsQueries, TimescaleDBResultsQueries: Direct query operations
59
+ - TimescaleDBOperationsManager: Table management and write operations
60
+ - TimescaleDBStreamProcessor: Stream processing operations
61
+
62
+ Database naming (controlled by mlrun.mlconf.model_endpoint_monitoring.tsdb.auto_create_database):
63
+ - When auto_create_database=True (default): generates database name using system_id: 'mlrun_mm_{system_id}'
64
+ - When auto_create_database=False: uses the database from the profile/connection string as-is
65
+ """
66
+
67
+ type: str = mm_schemas.TSDBTarget.TimescaleDB
68
+
69
+ def __init__(
70
+ self,
71
+ project: str,
72
+ profile: DatastoreProfilePostgreSQL,
73
+ pre_aggregate_config: Optional[PreAggregateConfig] = None,
74
+ **kwargs,
75
+ ):
76
+ super().__init__(project=project)
77
+
78
+ self.profile = profile
79
+
80
+ # Determine the monitoring database name
81
+ self.database = self._determine_database_name(profile)
82
+
83
+ # Update profile to use the determined database name
84
+ # This ensures the connection uses the correct database
85
+ if profile.database != self.database:
86
+ logger.info(
87
+ "Auto-generated database name for TimescaleDB",
88
+ original_database=profile.database,
89
+ database=self.database,
90
+ )
91
+ # Create a new profile with the generated database
92
+ profile = DatastoreProfilePostgreSQL(
93
+ name=profile.name,
94
+ user=profile.user,
95
+ password=profile.password,
96
+ host=profile.host,
97
+ port=profile.port,
98
+ database=self.database,
99
+ )
100
+ self.profile = profile
101
+
102
+ # Create shared connection
103
+ self._connection = TimescaleDBConnection(
104
+ dsn=profile.dsn(),
105
+ min_connections=kwargs.get("min_connections", 1),
106
+ max_connections=kwargs.get("max_connections", 10),
107
+ max_retries=kwargs.get("max_retries", 3),
108
+ retry_delay=kwargs.get("retry_delay", 1.0),
109
+ autocommit=kwargs.get("autocommit", False),
110
+ )
111
+
112
+ # Create shared components needed by query classes
113
+ self._tables = timescaledb_schema.create_table_schemas(project)
114
+ self._pre_aggregate_manager = PreAggregateManager(pre_aggregate_config)
115
+
116
+ # Create specialized query handlers with proper initialization
117
+ self._metrics_queries = TimescaleDBMetricsQueries(
118
+ project=project,
119
+ connection=self._connection,
120
+ pre_aggregate_manager=self._pre_aggregate_manager,
121
+ tables=self._tables,
122
+ )
123
+ self._predictions_queries = TimescaleDBPredictionsQueries(
124
+ project=project,
125
+ connection=self._connection,
126
+ pre_aggregate_manager=self._pre_aggregate_manager,
127
+ tables=self._tables,
128
+ )
129
+ self._results_queries = TimescaleDBResultsQueries(
130
+ connection=self._connection,
131
+ project=project,
132
+ pre_aggregate_manager=self._pre_aggregate_manager,
133
+ tables=self._tables,
134
+ )
135
+
136
+ # Create operations and stream handlers
137
+ self._operations = TimescaleDBOperationsManager(
138
+ project=project,
139
+ connection=self._connection,
140
+ pre_aggregate_config=pre_aggregate_config,
141
+ profile=profile,
142
+ )
143
+
144
+ self._stream = TimescaleDBStreamProcessor(
145
+ project=project, profile=profile, connection=self._connection
146
+ )
147
+
148
+ self._pre_aggregate_config = pre_aggregate_config
149
+
150
+ def _determine_database_name(self, profile: DatastoreProfilePostgreSQL) -> str:
151
+ """
152
+ Determine the database name to use.
153
+
154
+ Delegates to the shared helper function to ensure consistent database naming
155
+ across all TimescaleDB components (connector, stream, storey targets).
156
+
157
+ :param profile: The PostgreSQL profile
158
+ :return: The database name to use
159
+ """
160
+ return mlrun.common.model_monitoring.helpers.get_tsdb_database_name(
161
+ profile.database
162
+ )
163
+
164
+ # Delegate operations methods
165
+ def create_tables(self, *args, **kwargs) -> None:
166
+ return self._operations.create_tables(*args, **kwargs)
167
+
168
+ def write_application_event(self, *args, **kwargs) -> None:
169
+ return self._operations.write_application_event(*args, **kwargs)
170
+
171
+ def delete_tsdb_records(self, *args, **kwargs) -> None:
172
+ return self._operations.delete_tsdb_records(*args, **kwargs)
173
+
174
+ def delete_tsdb_resources(self, *args, **kwargs) -> None:
175
+ return self._operations.delete_tsdb_resources(*args, **kwargs)
176
+
177
+ def delete_application_records(self, *args, **kwargs) -> None:
178
+ return self._operations.delete_application_records(*args, **kwargs)
179
+
180
+ def read_metrics_data(
181
+ self,
182
+ *,
183
+ endpoint_id: str,
184
+ start: datetime.datetime,
185
+ end: datetime.datetime,
186
+ metrics: list[mm_schemas.ModelEndpointMonitoringMetric],
187
+ type: str,
188
+ with_result_extra_data: bool = False,
189
+ ):
190
+ """Read metrics or results data from TimescaleDB (cross-cutting coordination)."""
191
+
192
+ if type == "metrics":
193
+ df = self._metrics_queries.read_metrics_data_impl(
194
+ endpoint_id=endpoint_id,
195
+ start=start,
196
+ end=end,
197
+ metrics=metrics,
198
+ )
199
+ # Use inherited method to convert DataFrame to domain objects
200
+ return self.df_to_metrics_values(
201
+ df=df, metrics=metrics, project=self.project
202
+ )
203
+
204
+ else: # results
205
+ df = self._results_queries.read_results_data_impl(
206
+ endpoint_id=endpoint_id,
207
+ start=start,
208
+ end=end,
209
+ metrics=metrics,
210
+ with_result_extra_data=with_result_extra_data,
211
+ )
212
+ # Use inherited method to convert DataFrame to domain objects
213
+ return self.df_to_results_values(
214
+ df=df, metrics=metrics, project=self.project
215
+ )
216
+
217
+ def get_model_endpoint_real_time_metrics(self, *args, **kwargs):
218
+ return self._metrics_queries.get_model_endpoint_real_time_metrics(
219
+ *args, **kwargs
220
+ )
221
+
222
+ def get_metrics_metadata(self, *args, **kwargs):
223
+ return self._metrics_queries.get_metrics_metadata(*args, **kwargs)
224
+
225
+ def add_basic_metrics(
226
+ self,
227
+ model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
228
+ metric_list: Optional[list[str]] = None,
229
+ ) -> list[mlrun.common.schemas.ModelEndpoint]:
230
+ """
231
+ Add basic metrics to the model endpoint object using TimescaleDB optimizations.
232
+
233
+ :param model_endpoint_objects: A list of `ModelEndpoint` objects that will
234
+ be filled with the relevant basic metrics.
235
+ :param metric_list: List of metrics to include from the time series DB. Defaults to all metrics.
236
+
237
+ :return: A list of `ModelEndpointMonitoringMetric` objects.
238
+ """
239
+ uids = [mep.metadata.uid for mep in model_endpoint_objects]
240
+
241
+ # Access methods directly from the respective query classes
242
+ # Note: last_request is handled separately due to potential data synchronization issues
243
+ metric_name_to_function = {
244
+ mm_schemas.EventFieldType.ERROR_COUNT: self._results_queries.get_error_count,
245
+ mm_schemas.ModelEndpointSchema.AVG_LATENCY: self._predictions_queries.get_avg_latency,
246
+ mm_schemas.ResultData.RESULT_STATUS: self._results_queries.get_drift_status,
247
+ }
248
+
249
+ if metric_list is not None:
250
+ for metric_name in list(metric_name_to_function):
251
+ if metric_name not in metric_list:
252
+ del metric_name_to_function[metric_name]
253
+
254
+ metric_name_to_df = {
255
+ metric_name: function(endpoint_ids=uids)
256
+ for metric_name, function in metric_name_to_function.items()
257
+ }
258
+
259
+ def add_metrics(
260
+ mep: mlrun.common.schemas.ModelEndpoint,
261
+ df_dictionary: dict[str, pd.DataFrame],
262
+ ):
263
+ for metric in df_dictionary:
264
+ df = df_dictionary.get(metric, pd.DataFrame())
265
+ if not df.empty:
266
+ line = df[
267
+ df[mm_schemas.WriterEvent.ENDPOINT_ID] == mep.metadata.uid
268
+ ]
269
+ if not line.empty and metric in line:
270
+ value = line[metric].item()
271
+ if isinstance(value, pd.Timestamp):
272
+ value = value.to_pydatetime()
273
+ setattr(mep.status, metric, value)
274
+
275
+ return mep
276
+
277
+ enriched_endpoints = list(
278
+ map(
279
+ lambda mep: add_metrics(
280
+ mep=mep,
281
+ df_dictionary=metric_name_to_df,
282
+ ),
283
+ model_endpoint_objects,
284
+ )
285
+ )
286
+
287
+ # Handle last_request separately with special enrichment
288
+ if metric_list is None or "last_request" in metric_list:
289
+ self._enrich_mep_with_last_request(
290
+ model_endpoint_objects={
291
+ mep.metadata.uid: mep for mep in enriched_endpoints
292
+ }
293
+ )
294
+
295
+ return enriched_endpoints
296
+
297
+ def _enrich_mep_with_last_request(
298
+ self,
299
+ model_endpoint_objects: dict[str, mlrun.common.schemas.ModelEndpoint],
300
+ ):
301
+ """
302
+ Enrich model endpoint objects with last_request data from predictions table.
303
+ This method handles the special case of last_request which may have timing issues.
304
+ """
305
+ try:
306
+ last_request_df = self._predictions_queries.get_last_request(
307
+ endpoint_ids=list(model_endpoint_objects.keys())
308
+ )
309
+
310
+ if not last_request_df.empty:
311
+ for _, row in last_request_df.iterrows():
312
+ endpoint_id = row.get(mm_schemas.WriterEvent.ENDPOINT_ID)
313
+ last_request = row.get("last_request")
314
+
315
+ if (
316
+ endpoint_id in model_endpoint_objects
317
+ and last_request is not None
318
+ ):
319
+ if isinstance(last_request, pd.Timestamp):
320
+ last_request = last_request.to_pydatetime()
321
+ model_endpoint_objects[
322
+ endpoint_id
323
+ ].status.last_request = last_request
324
+ except Exception as e:
325
+ # Log but don't fail - last_request is not critical for basic functionality
326
+ logger.warning(
327
+ "Failed to enrich model endpoints with last_request data",
328
+ error=mlrun.errors.err_to_str(e),
329
+ endpoint_count=len(model_endpoint_objects),
330
+ )
331
+
332
+ def read_predictions(self, *args, **kwargs):
333
+ return self._predictions_queries.read_predictions(*args, **kwargs)
334
+
335
+ def _get_records(
336
+ self,
337
+ table: str,
338
+ start: datetime.datetime,
339
+ end: datetime.datetime,
340
+ endpoint_id: Optional[str] = None,
341
+ columns: Optional[list[str]] = None,
342
+ timestamp_column: Optional[str] = None,
343
+ ) -> pd.DataFrame:
344
+ """
345
+ Get raw records from TimescaleDB as pandas DataFrame.
346
+
347
+ This method provides direct access to raw table data.
348
+
349
+ :param table: Table name - use TimescaleDBTables enum (METRICS, APP_RESULTS, or PREDICTIONS)
350
+ :param start: Start time for the query
351
+ :param end: End time for the query
352
+ :param endpoint_id: Optional endpoint ID filter (None = all endpoints)
353
+ :param columns: Optional list of specific columns to return (None = all columns)
354
+ :param timestamp_column: Optional timestamp column to use for time filtering (None = use table's default)
355
+ :return: Raw pandas DataFrame with all matching records
356
+ """
357
+ if table == mm_schemas.TimescaleDBTables.METRICS:
358
+ df = self._metrics_queries.read_metrics_data_impl(
359
+ endpoint_id=endpoint_id,
360
+ start=start,
361
+ end=end,
362
+ metrics=None, # Get all metrics
363
+ timestamp_column=timestamp_column,
364
+ )
365
+ elif table == mm_schemas.TimescaleDBTables.APP_RESULTS:
366
+ df = self._results_queries.read_results_data_impl(
367
+ endpoint_id=endpoint_id,
368
+ start=start,
369
+ end=end,
370
+ metrics=None, # Get all results
371
+ with_result_extra_data=True,
372
+ timestamp_column=timestamp_column,
373
+ )
374
+ elif table == mm_schemas.TimescaleDBTables.PREDICTIONS:
375
+ df = self._predictions_queries.read_predictions_impl(
376
+ endpoint_id=endpoint_id,
377
+ start=start,
378
+ end=end,
379
+ columns=columns,
380
+ timestamp_column=timestamp_column,
381
+ )
382
+ else:
383
+ raise mlrun.errors.MLRunInvalidArgumentError(
384
+ f"Invalid table '{table}'. Must be METRICS, APP_RESULTS, or PREDICTIONS from TimescaleDBTables enum"
385
+ )
386
+
387
+ if columns is not None and not df.empty:
388
+ # Filter to requested columns if specified
389
+ available_columns = [col for col in columns if col in df.columns]
390
+ df = df[available_columns]
391
+
392
+ return df
393
+
394
+ def get_last_request(self, *args, **kwargs):
395
+ return self._predictions_queries.get_last_request(*args, **kwargs)
396
+
397
+ def get_avg_latency(self, *args, **kwargs):
398
+ return self._predictions_queries.get_avg_latency(*args, **kwargs)
399
+
400
+ def count_processed_model_endpoints(
401
+ self,
402
+ start: Optional[datetime.datetime] = None,
403
+ end: Optional[datetime.datetime] = None,
404
+ application_names: Optional[list[str] | str] = None,
405
+ ) -> dict[str, int]:
406
+ """
407
+ Count unique endpoints per application from METRICS and APP_RESULTS tables.
408
+
409
+ Uses SQL UNION to efficiently count endpoints that have data in EITHER table.
410
+
411
+ :param start: Start time for the query (default: last 24 hours)
412
+ :param end: End time for the query (default: current time)
413
+ :param application_names: Filter by specific application names
414
+ :return: Dictionary mapping application_name to endpoint count
415
+ """
416
+ # Set default time range
417
+ start = start or (mlrun.utils.datetime_now() - datetime.timedelta(hours=24))
418
+ start, end = self._pre_aggregate_manager.get_start_end(start, end)
419
+
420
+ metrics_table = self._tables[mm_schemas.TimescaleDBTables.METRICS]
421
+ app_results_table = self._tables[mm_schemas.TimescaleDBTables.APP_RESULTS]
422
+ time_column = mm_schemas.WriterEvent.END_INFER_TIME
423
+ app_column = mm_schemas.WriterEvent.APPLICATION_NAME
424
+ endpoint_column = mm_schemas.WriterEvent.ENDPOINT_ID
425
+
426
+ # Build application filter and params
427
+ app_filter_metrics = ""
428
+ app_filter_results = ""
429
+
430
+ if application_names:
431
+ if isinstance(application_names, str):
432
+ application_names = [application_names]
433
+ app_names_list = list(application_names)
434
+ app_placeholders = ", ".join(["%s"] * len(app_names_list))
435
+ app_filter_metrics = f"AND {app_column} IN ({app_placeholders})"
436
+ app_filter_results = f"AND {app_column} IN ({app_placeholders})"
437
+ # Params: metrics (start, end, apps), app_results (start, end, apps)
438
+ params = [start, end] + app_names_list + [start, end] + app_names_list
439
+ else:
440
+ params = [start, end, start, end]
441
+
442
+ # Use UNION to combine endpoints from both METRICS and APP_RESULTS tables
443
+ query_sql = f"""
444
+ SELECT {app_column}, COUNT(DISTINCT {endpoint_column}) as endpoint_count
445
+ FROM (
446
+ SELECT DISTINCT {app_column}, {endpoint_column}
447
+ FROM {metrics_table.full_name()}
448
+ WHERE {time_column} >= %s AND {time_column} <= %s
449
+ {app_filter_metrics}
450
+
451
+ UNION
452
+
453
+ SELECT DISTINCT {app_column}, {endpoint_column}
454
+ FROM {app_results_table.full_name()}
455
+ WHERE {time_column} >= %s AND {time_column} <= %s
456
+ {app_filter_results}
457
+ ) combined
458
+ GROUP BY {app_column}
459
+ """
460
+
461
+ stmt = Statement(query_sql, params)
462
+ result = self._connection.run(query=stmt)
463
+
464
+ if not result or not result.data:
465
+ return {}
466
+
467
+ # Convert result to dict: {application_name: count}
468
+ return {row[0]: row[1] for row in result.data}
469
+
470
+ def get_drift_status(self, *args, **kwargs):
471
+ return self._results_queries.get_drift_status(*args, **kwargs)
472
+
473
+ def get_results_metadata(self, *args, **kwargs):
474
+ return self._results_queries.get_results_metadata(*args, **kwargs)
475
+
476
+ def get_error_count(self, *args, **kwargs):
477
+ return self._results_queries.get_error_count(*args, **kwargs)
478
+
479
+ def count_results_by_status(self, *args, **kwargs):
480
+ return self._results_queries.count_results_by_status(*args, **kwargs)
481
+
482
+ def apply_monitoring_stream_steps(self, *args, **kwargs) -> None:
483
+ return self._stream.apply_monitoring_stream_steps(*args, **kwargs)
484
+
485
+ def handle_model_error(self, *args, **kwargs) -> None:
486
+ return self._stream.handle_model_error(*args, **kwargs)
487
+
488
+ def calculate_latest_metrics(self, *args, **kwargs):
489
+ return self._metrics_queries.calculate_latest_metrics(*args, **kwargs)
490
+
491
+ def get_drift_data(self, *args, **kwargs):
492
+ return self._results_queries.get_drift_data(*args, **kwargs)
493
+
494
+ def add_pre_writer_steps(self, graph, after):
495
+ return graph.add_step(
496
+ "mlrun.model_monitoring.db.tsdb.timescaledb.writer_graph_steps.ProcessBeforeTimescaleDBWriter",
497
+ name="ProcessBeforeTimescaleDBWriter",
498
+ after=after,
499
+ )
500
+
501
+ def apply_writer_steps(self, graph, after, **kwargs) -> None:
502
+ tables = timescaledb_schema.create_table_schemas(self.project)
503
+
504
+ graph.add_step(
505
+ "mlrun.datastore.storeytargets.TimescaleDBStoreyTarget",
506
+ name="tsdb_metrics",
507
+ after=after,
508
+ url=f"ds://{self.profile.name}",
509
+ table=tables[mm_schemas.TimescaleDBTables.METRICS].full_name(),
510
+ time_col=mm_schemas.WriterEvent.END_INFER_TIME,
511
+ columns=[
512
+ mm_schemas.WriterEvent.START_INFER_TIME,
513
+ mm_schemas.MetricData.METRIC_VALUE,
514
+ mm_schemas.WriterEvent.ENDPOINT_ID,
515
+ mm_schemas.WriterEvent.APPLICATION_NAME,
516
+ mm_schemas.MetricData.METRIC_NAME,
517
+ ],
518
+ max_events=config.model_endpoint_monitoring.writer_graph.max_events,
519
+ flush_after_seconds=config.model_endpoint_monitoring.writer_graph.flush_after_seconds,
520
+ )
521
+
522
+ graph.add_step(
523
+ "mlrun.datastore.storeytargets.TimescaleDBStoreyTarget",
524
+ name="tsdb_app_results",
525
+ after=after,
526
+ url=f"ds://{self.profile.name}",
527
+ table=tables[mm_schemas.TimescaleDBTables.APP_RESULTS].full_name(),
528
+ time_col=mm_schemas.WriterEvent.END_INFER_TIME,
529
+ columns=[
530
+ mm_schemas.WriterEvent.START_INFER_TIME,
531
+ mm_schemas.ResultData.RESULT_VALUE,
532
+ mm_schemas.ResultData.RESULT_STATUS,
533
+ mm_schemas.ResultData.RESULT_EXTRA_DATA,
534
+ mm_schemas.WriterEvent.ENDPOINT_ID,
535
+ mm_schemas.WriterEvent.APPLICATION_NAME,
536
+ mm_schemas.ResultData.RESULT_NAME,
537
+ mm_schemas.ResultData.RESULT_KIND,
538
+ ],
539
+ max_events=config.model_endpoint_monitoring.writer_graph.max_events,
540
+ flush_after_seconds=config.model_endpoint_monitoring.writer_graph.flush_after_seconds,
541
+ )