mlrun 1.10.0rc18__py3-none-any.whl → 1.11.0rc16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (167) hide show
  1. mlrun/__init__.py +24 -3
  2. mlrun/__main__.py +0 -4
  3. mlrun/artifacts/dataset.py +2 -2
  4. mlrun/artifacts/document.py +6 -1
  5. mlrun/artifacts/llm_prompt.py +21 -15
  6. mlrun/artifacts/model.py +3 -3
  7. mlrun/artifacts/plots.py +1 -1
  8. mlrun/{model_monitoring/db/tsdb/tdengine → auth}/__init__.py +2 -3
  9. mlrun/auth/nuclio.py +89 -0
  10. mlrun/auth/providers.py +429 -0
  11. mlrun/auth/utils.py +415 -0
  12. mlrun/common/constants.py +14 -0
  13. mlrun/common/model_monitoring/helpers.py +123 -0
  14. mlrun/common/runtimes/constants.py +28 -0
  15. mlrun/common/schemas/__init__.py +14 -3
  16. mlrun/common/schemas/alert.py +2 -2
  17. mlrun/common/schemas/api_gateway.py +3 -0
  18. mlrun/common/schemas/auth.py +12 -10
  19. mlrun/common/schemas/client_spec.py +4 -0
  20. mlrun/common/schemas/constants.py +25 -0
  21. mlrun/common/schemas/frontend_spec.py +1 -8
  22. mlrun/common/schemas/function.py +34 -0
  23. mlrun/common/schemas/hub.py +33 -20
  24. mlrun/common/schemas/model_monitoring/__init__.py +2 -1
  25. mlrun/common/schemas/model_monitoring/constants.py +12 -15
  26. mlrun/common/schemas/model_monitoring/functions.py +13 -4
  27. mlrun/common/schemas/model_monitoring/model_endpoints.py +11 -0
  28. mlrun/common/schemas/pipeline.py +1 -1
  29. mlrun/common/schemas/secret.py +17 -2
  30. mlrun/common/secrets.py +95 -1
  31. mlrun/common/types.py +10 -10
  32. mlrun/config.py +69 -19
  33. mlrun/data_types/infer.py +2 -2
  34. mlrun/datastore/__init__.py +12 -5
  35. mlrun/datastore/azure_blob.py +162 -47
  36. mlrun/datastore/base.py +274 -10
  37. mlrun/datastore/datastore.py +7 -2
  38. mlrun/datastore/datastore_profile.py +84 -22
  39. mlrun/datastore/model_provider/huggingface_provider.py +225 -41
  40. mlrun/datastore/model_provider/mock_model_provider.py +87 -0
  41. mlrun/datastore/model_provider/model_provider.py +206 -74
  42. mlrun/datastore/model_provider/openai_provider.py +226 -66
  43. mlrun/datastore/s3.py +39 -18
  44. mlrun/datastore/sources.py +1 -1
  45. mlrun/datastore/store_resources.py +4 -4
  46. mlrun/datastore/storeytargets.py +17 -12
  47. mlrun/datastore/targets.py +1 -1
  48. mlrun/datastore/utils.py +25 -6
  49. mlrun/datastore/v3io.py +1 -1
  50. mlrun/db/base.py +63 -32
  51. mlrun/db/httpdb.py +373 -153
  52. mlrun/db/nopdb.py +54 -21
  53. mlrun/errors.py +4 -2
  54. mlrun/execution.py +66 -25
  55. mlrun/feature_store/api.py +1 -1
  56. mlrun/feature_store/common.py +1 -1
  57. mlrun/feature_store/feature_vector_utils.py +1 -1
  58. mlrun/feature_store/steps.py +8 -6
  59. mlrun/frameworks/_common/utils.py +3 -3
  60. mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
  61. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -1
  62. mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
  63. mlrun/frameworks/_ml_common/utils.py +2 -1
  64. mlrun/frameworks/auto_mlrun/auto_mlrun.py +4 -3
  65. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +2 -1
  66. mlrun/frameworks/onnx/dataset.py +2 -1
  67. mlrun/frameworks/onnx/mlrun_interface.py +2 -1
  68. mlrun/frameworks/pytorch/callbacks/logging_callback.py +5 -4
  69. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +2 -1
  70. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +2 -1
  71. mlrun/frameworks/pytorch/utils.py +2 -1
  72. mlrun/frameworks/sklearn/metric.py +2 -1
  73. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +5 -4
  74. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +2 -1
  75. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +2 -1
  76. mlrun/hub/__init__.py +52 -0
  77. mlrun/hub/base.py +142 -0
  78. mlrun/hub/module.py +172 -0
  79. mlrun/hub/step.py +113 -0
  80. mlrun/k8s_utils.py +105 -16
  81. mlrun/launcher/base.py +15 -7
  82. mlrun/launcher/local.py +4 -1
  83. mlrun/model.py +14 -4
  84. mlrun/model_monitoring/__init__.py +0 -1
  85. mlrun/model_monitoring/api.py +65 -28
  86. mlrun/model_monitoring/applications/__init__.py +1 -1
  87. mlrun/model_monitoring/applications/base.py +299 -128
  88. mlrun/model_monitoring/applications/context.py +2 -4
  89. mlrun/model_monitoring/controller.py +132 -58
  90. mlrun/model_monitoring/db/_schedules.py +38 -29
  91. mlrun/model_monitoring/db/_stats.py +6 -16
  92. mlrun/model_monitoring/db/tsdb/__init__.py +9 -7
  93. mlrun/model_monitoring/db/tsdb/base.py +29 -9
  94. mlrun/model_monitoring/db/tsdb/preaggregate.py +234 -0
  95. mlrun/model_monitoring/db/tsdb/stream_graph_steps.py +63 -0
  96. mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_metrics_queries.py +414 -0
  97. mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_predictions_queries.py +376 -0
  98. mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_results_queries.py +590 -0
  99. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connection.py +434 -0
  100. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connector.py +541 -0
  101. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_operations.py +808 -0
  102. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_schema.py +502 -0
  103. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream.py +163 -0
  104. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream_graph_steps.py +60 -0
  105. mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_dataframe_processor.py +141 -0
  106. mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_query_builder.py +585 -0
  107. mlrun/model_monitoring/db/tsdb/timescaledb/writer_graph_steps.py +73 -0
  108. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +20 -9
  109. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +235 -51
  110. mlrun/model_monitoring/features_drift_table.py +2 -1
  111. mlrun/model_monitoring/helpers.py +30 -6
  112. mlrun/model_monitoring/stream_processing.py +34 -28
  113. mlrun/model_monitoring/writer.py +224 -4
  114. mlrun/package/__init__.py +2 -1
  115. mlrun/platforms/__init__.py +0 -43
  116. mlrun/platforms/iguazio.py +8 -4
  117. mlrun/projects/operations.py +17 -11
  118. mlrun/projects/pipelines.py +2 -2
  119. mlrun/projects/project.py +187 -123
  120. mlrun/run.py +95 -21
  121. mlrun/runtimes/__init__.py +2 -186
  122. mlrun/runtimes/base.py +103 -25
  123. mlrun/runtimes/constants.py +225 -0
  124. mlrun/runtimes/daskjob.py +5 -2
  125. mlrun/runtimes/databricks_job/databricks_runtime.py +2 -1
  126. mlrun/runtimes/local.py +5 -2
  127. mlrun/runtimes/mounts.py +20 -2
  128. mlrun/runtimes/nuclio/__init__.py +12 -7
  129. mlrun/runtimes/nuclio/api_gateway.py +36 -6
  130. mlrun/runtimes/nuclio/application/application.py +339 -40
  131. mlrun/runtimes/nuclio/function.py +222 -72
  132. mlrun/runtimes/nuclio/serving.py +132 -42
  133. mlrun/runtimes/pod.py +213 -21
  134. mlrun/runtimes/utils.py +49 -9
  135. mlrun/secrets.py +99 -14
  136. mlrun/serving/__init__.py +2 -0
  137. mlrun/serving/remote.py +84 -11
  138. mlrun/serving/routers.py +26 -44
  139. mlrun/serving/server.py +138 -51
  140. mlrun/serving/serving_wrapper.py +6 -2
  141. mlrun/serving/states.py +997 -283
  142. mlrun/serving/steps.py +62 -0
  143. mlrun/serving/system_steps.py +149 -95
  144. mlrun/serving/v2_serving.py +9 -10
  145. mlrun/track/trackers/mlflow_tracker.py +29 -31
  146. mlrun/utils/helpers.py +292 -94
  147. mlrun/utils/http.py +9 -2
  148. mlrun/utils/notifications/notification/base.py +18 -0
  149. mlrun/utils/notifications/notification/git.py +3 -5
  150. mlrun/utils/notifications/notification/mail.py +39 -16
  151. mlrun/utils/notifications/notification/slack.py +2 -4
  152. mlrun/utils/notifications/notification/webhook.py +2 -5
  153. mlrun/utils/notifications/notification_pusher.py +3 -3
  154. mlrun/utils/version/version.json +2 -2
  155. mlrun/utils/version/version.py +3 -4
  156. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/METADATA +63 -74
  157. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/RECORD +161 -143
  158. mlrun/api/schemas/__init__.py +0 -259
  159. mlrun/db/auth_utils.py +0 -152
  160. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +0 -344
  161. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -75
  162. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +0 -281
  163. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +0 -1266
  164. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/WHEEL +0 -0
  165. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/entry_points.txt +0 -0
  166. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/licenses/LICENSE +0 -0
  167. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/top_level.txt +0 -0
@@ -1,1266 +0,0 @@
1
- # Copyright 2024 Iguazio
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- import threading
16
- from datetime import datetime, timedelta
17
- from typing import Callable, Final, Literal, Optional, Union
18
-
19
- import pandas as pd
20
- import taosws
21
-
22
- import mlrun.common.schemas.model_monitoring as mm_schemas
23
- import mlrun.common.types
24
- import mlrun.model_monitoring.db.tsdb.tdengine.schemas as tdengine_schemas
25
- import mlrun.model_monitoring.db.tsdb.tdengine.stream_graph_steps
26
- from mlrun.datastore.datastore_profile import DatastoreProfile
27
- from mlrun.model_monitoring.db import TSDBConnector
28
- from mlrun.model_monitoring.db.tsdb.tdengine.tdengine_connection import (
29
- Statement,
30
- TDEngineConnection,
31
- )
32
- from mlrun.model_monitoring.helpers import get_invocations_fqn, get_start_end
33
- from mlrun.utils import logger
34
-
35
- # Thread-local storage for connections
36
- _thread_local = threading.local()
37
-
38
-
39
- class TDEngineTimestampPrecision(mlrun.common.types.StrEnum):
40
- """
41
- The timestamp precision for the TDEngine database.
42
- For more information, see:
43
- https://docs.tdengine.com/tdengine-reference/sql-manual/data-types/#timestamp
44
- https://docs.tdengine.com/tdengine-reference/sql-manual/manage-databases/#create-database
45
- """
46
-
47
- MILLISECOND = "ms" # TDEngine's default
48
- MICROSECOND = "us" # MLRun's default
49
- NANOSECOND = "ns"
50
-
51
-
52
- class TDEngineConnector(TSDBConnector):
53
- """
54
- Handles the TSDB operations when the TSDB connector is of type TDEngine.
55
- """
56
-
57
- type: str = mm_schemas.TSDBTarget.TDEngine
58
- database = f"{tdengine_schemas._MODEL_MONITORING_DATABASE}_{mlrun.mlconf.system_id}"
59
-
60
- def __init__(
61
- self,
62
- project: str,
63
- profile: DatastoreProfile,
64
- timestamp_precision: TDEngineTimestampPrecision = TDEngineTimestampPrecision.MICROSECOND,
65
- **kwargs,
66
- ):
67
- super().__init__(project=project)
68
-
69
- self._tdengine_connection_profile = profile
70
-
71
- self._timestamp_precision: Final = ( # cannot be changed after initialization
72
- timestamp_precision
73
- )
74
-
75
- self._init_super_tables()
76
-
77
- @property
78
- def connection(self) -> TDEngineConnection:
79
- if not hasattr(_thread_local, "connection"):
80
- _thread_local.connection = self._create_connection()
81
- logger.debug(
82
- "Created new TDEngine connection for thread",
83
- project=self.project,
84
- thread_name=threading.current_thread().name,
85
- thread_id=threading.get_ident(),
86
- )
87
- return _thread_local.connection
88
-
89
- def _create_connection(self) -> TDEngineConnection:
90
- """Establish a connection to the TSDB server."""
91
- logger.debug("Creating a new connection to TDEngine", project=self.project)
92
- conn = TDEngineConnection(
93
- self._tdengine_connection_profile.dsn(),
94
- )
95
- conn.prefix_statements = [f"USE {self.database}"]
96
-
97
- return conn
98
-
99
- def _init_super_tables(self):
100
- """Initialize the super tables for the TSDB."""
101
- self.tables = {
102
- mm_schemas.TDEngineSuperTables.APP_RESULTS: tdengine_schemas.AppResultTable(
103
- project=self.project, database=self.database
104
- ),
105
- mm_schemas.TDEngineSuperTables.METRICS: tdengine_schemas.Metrics(
106
- project=self.project, database=self.database
107
- ),
108
- mm_schemas.TDEngineSuperTables.PREDICTIONS: tdengine_schemas.Predictions(
109
- project=self.project, database=self.database
110
- ),
111
- mm_schemas.TDEngineSuperTables.ERRORS: tdengine_schemas.Errors(
112
- project=self.project, database=self.database
113
- ),
114
- }
115
-
116
- def _create_db_if_not_exists(self):
117
- """Create the database if it does not exist."""
118
- self.connection.prefix_statements = []
119
- self.connection.run(
120
- statements=f"CREATE DATABASE IF NOT EXISTS {self.database} PRECISION '{self._timestamp_precision}'",
121
- )
122
- self.connection.prefix_statements = [f"USE {self.database}"]
123
- logger.debug(
124
- "The TDEngine database is currently in use",
125
- project=self.project,
126
- database=self.database,
127
- )
128
-
129
- def create_tables(self):
130
- """Create TDEngine supertables."""
131
-
132
- # Create the database if it does not exist
133
- self._create_db_if_not_exists()
134
-
135
- for table in self.tables:
136
- create_table_query = self.tables[table]._create_super_table_query()
137
- conn = self.connection
138
- conn.run(
139
- statements=create_table_query,
140
- )
141
-
142
- def write_application_event(
143
- self,
144
- event: dict,
145
- kind: mm_schemas.WriterEventKind = mm_schemas.WriterEventKind.RESULT,
146
- ) -> None:
147
- """
148
- Write a single result or metric to TSDB.
149
- """
150
-
151
- table_name = (
152
- f"{event[mm_schemas.WriterEvent.ENDPOINT_ID]}_"
153
- f"{event[mm_schemas.WriterEvent.APPLICATION_NAME]}"
154
- )
155
-
156
- if kind == mm_schemas.WriterEventKind.RESULT:
157
- # Write a new result
158
- table = self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS]
159
- table_name = (
160
- f"{table_name}_{event[mm_schemas.ResultData.RESULT_NAME]}"
161
- ).replace("-", "_")
162
-
163
- else:
164
- # Write a new metric
165
- table = self.tables[mm_schemas.TDEngineSuperTables.METRICS]
166
- table_name = (
167
- f"{table_name}_{event[mm_schemas.MetricData.METRIC_NAME]}"
168
- ).replace("-", "_")
169
-
170
- # Escape the table name for case-sensitivity (ML-7908)
171
- # https://github.com/taosdata/taos-connector-python/issues/260
172
- table_name = f"`{table_name}`"
173
-
174
- # Convert the datetime strings to datetime objects
175
- event[mm_schemas.WriterEvent.END_INFER_TIME] = self._convert_to_datetime(
176
- val=event[mm_schemas.WriterEvent.END_INFER_TIME]
177
- )
178
- event[mm_schemas.WriterEvent.START_INFER_TIME] = self._convert_to_datetime(
179
- val=event[mm_schemas.WriterEvent.START_INFER_TIME]
180
- )
181
-
182
- create_table_sql = table._create_subtable_sql(subtable=table_name, values=event)
183
-
184
- # we need the string values to be sent to the connection, not the enum
185
- columns = {str(key): str(val) for key, val in table.columns.items()}
186
-
187
- insert_statement = Statement(
188
- columns=columns,
189
- subtable=table_name,
190
- values=event,
191
- timestamp_precision=self._timestamp_precision,
192
- )
193
-
194
- self.connection.run(
195
- statements=[
196
- create_table_sql,
197
- insert_statement,
198
- ],
199
- )
200
-
201
- @staticmethod
202
- def _convert_to_datetime(val: Union[str, datetime]) -> datetime:
203
- return datetime.fromisoformat(val) if isinstance(val, str) else val
204
-
205
- @staticmethod
206
- def _generate_filter_query(
207
- filter_column: str, filter_values: Union[str, list[Union[str, int]]]
208
- ) -> Optional[str]:
209
- """
210
- Generate a filter query for TDEngine based on the provided column and values.
211
-
212
- :param filter_column: The column to filter by.
213
- :param filter_values: A single value or a list of values to filter by.
214
-
215
- :return: A string representing the filter query.
216
- :raise: MLRunInvalidArgumentError if the filter values are not of type string or list.
217
- """
218
-
219
- if isinstance(filter_values, str):
220
- return f"{filter_column}='{filter_values}'"
221
- elif isinstance(filter_values, list):
222
- return f"{filter_column} IN ({', '.join(repr(v) for v in filter_values)}) "
223
- else:
224
- raise mlrun.errors.MLRunInvalidArgumentError(
225
- f"Invalid filter values {filter_values}: must be a string or a list, "
226
- f"got {type(filter_values).__name__}; filter values: {filter_values}"
227
- )
228
-
229
- def _drop_database_query(self) -> str:
230
- return f"DROP DATABASE IF EXISTS {self.database};"
231
-
232
- def _get_table_name_query(self) -> str:
233
- return f"SELECT table_name FROM information_schema.ins_tables where db_name='{self.database}' LIMIT 1;"
234
-
235
- def apply_monitoring_stream_steps(self, graph, **kwarg):
236
- """
237
- Apply TSDB steps on the provided monitoring graph. Throughout these steps, the graph stores live data of
238
- different key metric dictionaries. This data is being used by the monitoring dashboards in
239
- grafana. At the moment, we store two types of data:
240
- - prediction latency.
241
- - custom metrics.
242
- """
243
-
244
- def apply_process_before_tsdb():
245
- graph.add_step(
246
- "mlrun.model_monitoring.db.tsdb.tdengine.stream_graph_steps.ProcessBeforeTDEngine",
247
- name="ProcessBeforeTDEngine",
248
- after="FilterNOP",
249
- )
250
-
251
- def apply_tdengine_target(name, after):
252
- graph.add_step(
253
- "mlrun.datastore.storeytargets.TDEngineStoreyTarget",
254
- name=name,
255
- after=after,
256
- url=f"ds://{self._tdengine_connection_profile.name}",
257
- supertable=self.tables[
258
- mm_schemas.TDEngineSuperTables.PREDICTIONS
259
- ].super_table,
260
- table_col=mm_schemas.EventFieldType.TABLE_COLUMN,
261
- time_col=mm_schemas.EventFieldType.TIME,
262
- database=self.database,
263
- columns=[
264
- mm_schemas.EventFieldType.LATENCY,
265
- mm_schemas.EventKeyMetrics.CUSTOM_METRICS,
266
- mm_schemas.EventFieldType.ESTIMATED_PREDICTION_COUNT,
267
- mm_schemas.EventFieldType.EFFECTIVE_SAMPLE_COUNT,
268
- ],
269
- tag_cols=[
270
- mm_schemas.EventFieldType.ENDPOINT_ID,
271
- ],
272
- max_events=1000,
273
- flush_after_seconds=30,
274
- )
275
-
276
- apply_process_before_tsdb()
277
- apply_tdengine_target(
278
- name="TDEngineTarget",
279
- after="ProcessBeforeTDEngine",
280
- )
281
-
282
- def handle_model_error(
283
- self,
284
- graph,
285
- tsdb_batching_max_events: int = 1000,
286
- tsdb_batching_timeout_secs: int = 30,
287
- **kwargs,
288
- ) -> None:
289
- graph.add_step(
290
- "mlrun.model_monitoring.db.tsdb.tdengine.stream_graph_steps.ErrorExtractor",
291
- name="error_extractor",
292
- after="ForwardError",
293
- )
294
- graph.add_step(
295
- "mlrun.datastore.storeytargets.TDEngineStoreyTarget",
296
- name="tsdb_error",
297
- after="error_extractor",
298
- url=f"ds://{self._tdengine_connection_profile.name}",
299
- supertable=self.tables[mm_schemas.TDEngineSuperTables.ERRORS].super_table,
300
- table_col=mm_schemas.EventFieldType.TABLE_COLUMN,
301
- time_col=mm_schemas.EventFieldType.TIME,
302
- database=self.database,
303
- columns=[
304
- mm_schemas.EventFieldType.MODEL_ERROR,
305
- ],
306
- tag_cols=[
307
- mm_schemas.EventFieldType.ENDPOINT_ID,
308
- mm_schemas.EventFieldType.ERROR_TYPE,
309
- ],
310
- max_events=tsdb_batching_max_events,
311
- flush_after_seconds=tsdb_batching_timeout_secs,
312
- )
313
-
314
- def delete_tsdb_records(
315
- self,
316
- endpoint_ids: list[str],
317
- ):
318
- """
319
- To delete subtables within TDEngine, we first query the subtables names with the provided endpoint_ids.
320
- Then, we drop each subtable.
321
- """
322
- logger.debug(
323
- "Deleting model endpoint resources using the TDEngine connector",
324
- project=self.project,
325
- number_of_endpoints_to_delete=len(endpoint_ids),
326
- )
327
-
328
- # Get all subtables with the provided endpoint_ids
329
- subtables = []
330
- try:
331
- for table in self.tables:
332
- get_subtable_query = self.tables[table]._get_subtables_query_by_tag(
333
- filter_tag="endpoint_id", filter_values=endpoint_ids
334
- )
335
- subtables_result = self.connection.run(
336
- query=get_subtable_query,
337
- )
338
- subtables.extend([subtable[0] for subtable in subtables_result.data])
339
- except Exception as e:
340
- logger.warning(
341
- "Failed to get subtables for deletion. You may need to delete them manually."
342
- "These can be found under the following supertables: app_results, "
343
- "metrics, errors, and predictions.",
344
- project=self.project,
345
- error=mlrun.errors.err_to_str(e),
346
- )
347
-
348
- # Prepare the drop statements
349
- drop_statements = []
350
- for subtable in subtables:
351
- drop_statements.append(
352
- self.tables[table].drop_subtable_query(subtable=subtable)
353
- )
354
- try:
355
- self.connection.run(
356
- statements=drop_statements,
357
- )
358
- except Exception as e:
359
- logger.warning(
360
- "Failed to delete model endpoint resources. You may need to delete them manually. "
361
- "These can be found under the following supertables: app_results, "
362
- "metrics, errors, and predictions.",
363
- project=self.project,
364
- error=mlrun.errors.err_to_str(e),
365
- )
366
- logger.debug(
367
- "Deleted all model endpoint resources using the TDEngine connector",
368
- project=self.project,
369
- number_of_endpoints_to_delete=len(endpoint_ids),
370
- )
371
-
372
- def delete_tsdb_resources(self):
373
- """
374
- Delete all project resources in the TSDB connector, such as model endpoints data and drift results.
375
- """
376
- logger.debug(
377
- "Deleting all project resources using the TDEngine connector",
378
- project=self.project,
379
- )
380
- drop_statements = []
381
- for table in self.tables:
382
- drop_statements.append(self.tables[table].drop_supertable_query())
383
-
384
- try:
385
- self.connection.run(
386
- statements=drop_statements,
387
- )
388
- except Exception as e:
389
- logger.warning(
390
- "Failed to drop TDEngine tables. You may need to drop them manually. "
391
- "These can be found under the following supertables: app_results, "
392
- "metrics, errors, and predictions.",
393
- project=self.project,
394
- error=mlrun.errors.err_to_str(e),
395
- )
396
- logger.debug(
397
- "Deleted all project resources using the TDEngine connector",
398
- project=self.project,
399
- )
400
-
401
- # Check if database is empty and if so, drop it
402
- self._drop_database_if_empty()
403
-
404
- def _drop_database_if_empty(self):
405
- query_random_table_name = self._get_table_name_query()
406
- drop_database = False
407
- try:
408
- table_name = self.connection.run(
409
- query=query_random_table_name,
410
- )
411
- if len(table_name.data) == 0:
412
- # no tables were found under the database
413
- drop_database = True
414
-
415
- except Exception as e:
416
- logger.warning(
417
- "Failed to query tables in the database. You may need to drop the database manually if it is empty.",
418
- project=self.project,
419
- error=mlrun.errors.err_to_str(e),
420
- )
421
-
422
- if drop_database:
423
- logger.debug(
424
- "Going to drop the TDEngine database",
425
- project=self.project,
426
- database=self.database,
427
- )
428
- drop_database_query = self._drop_database_query()
429
- try:
430
- self.connection.run(
431
- statements=drop_database_query,
432
- )
433
- logger.debug(
434
- "The TDEngine database has been successfully dropped",
435
- project=self.project,
436
- database=self.database,
437
- )
438
-
439
- except Exception as e:
440
- logger.warning(
441
- "Failed to drop the database. You may need to drop it manually if it is empty.",
442
- project=self.project,
443
- error=mlrun.errors.err_to_str(e),
444
- )
445
-
446
- def get_model_endpoint_real_time_metrics(
447
- self,
448
- endpoint_id: str,
449
- metrics: list[str],
450
- start: str,
451
- end: str,
452
- ) -> dict[str, list[tuple[str, float]]]:
453
- # Not implemented, use get_records() instead
454
- pass
455
-
456
- def _get_records(
457
- self,
458
- table: str,
459
- start: datetime,
460
- end: datetime,
461
- columns: Optional[list[str]] = None,
462
- filter_query: Optional[str] = None,
463
- interval: Optional[str] = None,
464
- agg_funcs: Optional[list] = None,
465
- limit: Optional[int] = None,
466
- sliding_window_step: Optional[str] = None,
467
- timestamp_column: str = mm_schemas.EventFieldType.TIME,
468
- group_by: Optional[Union[list[str], str]] = None,
469
- preform_agg_columns: Optional[list] = None,
470
- order_by: Optional[str] = None,
471
- desc: Optional[bool] = None,
472
- partition_by: Optional[str] = None,
473
- ) -> pd.DataFrame:
474
- """
475
- Getting records from TSDB data collection.
476
- :param table: Either a supertable or a subtable name.
477
- :param start: The start time of the metrics.
478
- :param end: The end time of the metrics.
479
- :param columns: Columns to include in the result.
480
- :param filter_query: Optional filter expression as a string. TDengine supports SQL-like syntax.
481
- :param interval: The interval to aggregate the data by. Note that if interval is provided,
482
- `agg_funcs` must bg provided as well. Provided as a string in the format of '1m',
483
- '1h', etc.
484
- :param agg_funcs: The aggregation functions to apply on the columns. Note that if `agg_funcs` is
485
- provided, `interval` must bg provided as well. Provided as a list of strings in
486
- the format of ['sum', 'avg', 'count', ...].
487
- :param limit: The maximum number of records to return.
488
- :param sliding_window_step: The time step for which the time window moves forward. Note that if
489
- `sliding_window_step` is provided, interval must be provided as well. Provided
490
- as a string in the format of '1m', '1h', etc.
491
- :param timestamp_column: The column name that holds the timestamp index.
492
- :param group_by: The column name to group by. Note that if `group_by` is provided, aggregation
493
- functions must bg provided
494
- :param preform_agg_columns: The columns to preform aggregation on.
495
- notice that all aggregation functions provided will preform on those columns.
496
- If not provided The default behavior is to preform on all columns in columns,
497
- if an empty list was provided The aggregation won't be performed.
498
- :param order_by: The column or alias to preform ordering on the query.
499
- :param desc: Whether or not to sort the results in descending order.
500
- :param partition_by: The column to partition the results by. Note that if interval is provided,
501
- `agg_funcs` must bg provided as well.
502
-
503
- :return: DataFrame with the provided attributes from the data collection.
504
- :raise: MLRunInvalidArgumentError if query the provided table failed.
505
- """
506
-
507
- full_query = tdengine_schemas.TDEngineSchema._get_records_query(
508
- table=table,
509
- start=start,
510
- end=end,
511
- columns_to_filter=columns,
512
- filter_query=filter_query,
513
- interval=interval,
514
- limit=limit,
515
- agg_funcs=agg_funcs,
516
- sliding_window_step=sliding_window_step,
517
- timestamp_column=timestamp_column,
518
- database=self.database,
519
- group_by=group_by,
520
- preform_agg_funcs_columns=preform_agg_columns,
521
- order_by=order_by,
522
- desc=desc,
523
- partition_by=partition_by,
524
- )
525
- logger.debug("Querying TDEngine", query=full_query)
526
- try:
527
- query_result = self.connection.run(
528
- query=full_query,
529
- )
530
- except taosws.QueryError as e:
531
- raise mlrun.errors.MLRunInvalidArgumentError(
532
- f"Failed to query table {table} in database {self.database}, {str(e)}"
533
- )
534
-
535
- df_columns = [field.name for field in query_result.fields]
536
- return pd.DataFrame(query_result.data, columns=df_columns)
537
-
538
- def read_metrics_data(
539
- self,
540
- *,
541
- endpoint_id: str,
542
- start: datetime,
543
- end: datetime,
544
- metrics: list[mm_schemas.ModelEndpointMonitoringMetric],
545
- type: Literal["metrics", "results"],
546
- with_result_extra_data: bool = False,
547
- ) -> Union[
548
- list[
549
- Union[
550
- mm_schemas.ModelEndpointMonitoringResultValues,
551
- mm_schemas.ModelEndpointMonitoringMetricNoData,
552
- ],
553
- ],
554
- list[
555
- Union[
556
- mm_schemas.ModelEndpointMonitoringMetricValues,
557
- mm_schemas.ModelEndpointMonitoringMetricNoData,
558
- ],
559
- ],
560
- ]:
561
- timestamp_column = mm_schemas.WriterEvent.END_INFER_TIME
562
- columns = [timestamp_column, mm_schemas.WriterEvent.APPLICATION_NAME]
563
- if type == "metrics":
564
- if with_result_extra_data:
565
- logger.warning(
566
- "The 'with_result_extra_data' parameter is not supported for metrics, just for results",
567
- project=self.project,
568
- endpoint_id=endpoint_id,
569
- )
570
- table = self.tables[mm_schemas.TDEngineSuperTables.METRICS].super_table
571
- name = mm_schemas.MetricData.METRIC_NAME
572
- columns += [name, mm_schemas.MetricData.METRIC_VALUE]
573
- df_handler = self.df_to_metrics_values
574
- elif type == "results":
575
- table = self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS].super_table
576
- name = mm_schemas.ResultData.RESULT_NAME
577
- columns += [
578
- name,
579
- mm_schemas.ResultData.RESULT_VALUE,
580
- mm_schemas.ResultData.RESULT_STATUS,
581
- mm_schemas.ResultData.RESULT_KIND,
582
- ]
583
- if with_result_extra_data:
584
- columns.append(mm_schemas.ResultData.RESULT_EXTRA_DATA)
585
- df_handler = self.df_to_results_values
586
- else:
587
- raise mlrun.errors.MLRunInvalidArgumentError(
588
- f"Invalid type {type}, must be either 'metrics' or 'results'."
589
- )
590
-
591
- metrics_condition = " OR ".join(
592
- [
593
- f"({mm_schemas.WriterEvent.APPLICATION_NAME}='{metric.app}' AND {name}='{metric.name}')"
594
- for metric in metrics
595
- ]
596
- )
597
- filter_query = f"(endpoint_id='{endpoint_id}') AND ({metrics_condition})"
598
-
599
- df = self._get_records(
600
- table=table,
601
- start=start,
602
- end=end,
603
- filter_query=filter_query,
604
- timestamp_column=timestamp_column,
605
- columns=columns,
606
- )
607
-
608
- df[mm_schemas.WriterEvent.END_INFER_TIME] = pd.to_datetime(
609
- df[mm_schemas.WriterEvent.END_INFER_TIME]
610
- )
611
- df.set_index(mm_schemas.WriterEvent.END_INFER_TIME, inplace=True)
612
-
613
- logger.debug(
614
- "Converting a DataFrame to a list of metrics or results values",
615
- table=table,
616
- project=self.project,
617
- endpoint_id=endpoint_id,
618
- is_empty=df.empty,
619
- )
620
-
621
- if not with_result_extra_data and type == "results":
622
- # Set the extra data to an empty string if it's not requested
623
- df[mm_schemas.ResultData.RESULT_EXTRA_DATA] = ""
624
-
625
- return df_handler(df=df, metrics=metrics, project=self.project)
626
-
627
- def read_predictions(
628
- self,
629
- *,
630
- endpoint_id: str,
631
- start: datetime,
632
- end: datetime,
633
- aggregation_window: Optional[str] = None,
634
- agg_funcs: Optional[list] = None,
635
- limit: Optional[int] = None,
636
- ) -> Union[
637
- mm_schemas.ModelEndpointMonitoringMetricValues,
638
- mm_schemas.ModelEndpointMonitoringMetricNoData,
639
- ]:
640
- if (agg_funcs and not aggregation_window) or (
641
- aggregation_window and not agg_funcs
642
- ):
643
- raise mlrun.errors.MLRunInvalidArgumentError(
644
- "both or neither of `aggregation_window` and `agg_funcs` must be provided"
645
- )
646
- df = self._get_records(
647
- table=self.tables[mm_schemas.TDEngineSuperTables.PREDICTIONS].super_table,
648
- start=start,
649
- end=end,
650
- columns=[mm_schemas.EventFieldType.ESTIMATED_PREDICTION_COUNT],
651
- filter_query=f"endpoint_id='{endpoint_id}'",
652
- agg_funcs=agg_funcs,
653
- interval=aggregation_window,
654
- limit=limit,
655
- )
656
-
657
- full_name = get_invocations_fqn(self.project)
658
-
659
- if df.empty:
660
- return mm_schemas.ModelEndpointMonitoringMetricNoData(
661
- full_name=full_name,
662
- type=mm_schemas.ModelEndpointMonitoringMetricType.METRIC,
663
- )
664
-
665
- if aggregation_window:
666
- # _wend column, which represents the end time of each window, will be used as the time index
667
- df["_wend"] = pd.to_datetime(df["_wend"])
668
- df.set_index("_wend", inplace=True)
669
-
670
- estimated_prediction_count = (
671
- f"{agg_funcs[0]}({mm_schemas.EventFieldType.ESTIMATED_PREDICTION_COUNT})"
672
- if agg_funcs
673
- else mm_schemas.EventFieldType.ESTIMATED_PREDICTION_COUNT
674
- )
675
-
676
- return mm_schemas.ModelEndpointMonitoringMetricValues(
677
- full_name=full_name,
678
- values=list(
679
- zip(
680
- df.index,
681
- df[estimated_prediction_count],
682
- )
683
- ), # pyright: ignore[reportArgumentType]
684
- )
685
-
686
- def get_last_request(
687
- self,
688
- endpoint_ids: Union[str, list[str]],
689
- start: Optional[datetime] = None,
690
- end: Optional[datetime] = None,
691
- ) -> pd.DataFrame:
692
- filter_query = self._generate_filter_query(
693
- filter_column=mm_schemas.EventFieldType.ENDPOINT_ID,
694
- filter_values=endpoint_ids,
695
- )
696
- start, end = get_start_end(start, end)
697
- df = self._get_records(
698
- table=self.tables[mm_schemas.TDEngineSuperTables.PREDICTIONS].super_table,
699
- start=start,
700
- end=end,
701
- columns=[
702
- mm_schemas.EventFieldType.ENDPOINT_ID,
703
- mm_schemas.EventFieldType.TIME,
704
- mm_schemas.EventFieldType.LATENCY,
705
- ],
706
- filter_query=filter_query,
707
- timestamp_column=mm_schemas.EventFieldType.TIME,
708
- agg_funcs=["last"],
709
- group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
710
- preform_agg_columns=[mm_schemas.EventFieldType.TIME],
711
- )
712
- if not df.empty:
713
- df.dropna(inplace=True)
714
- df.rename(
715
- columns={
716
- f"last({mm_schemas.EventFieldType.TIME})": mm_schemas.EventFieldType.LAST_REQUEST,
717
- f"{mm_schemas.EventFieldType.LATENCY}": "last_latency",
718
- },
719
- inplace=True,
720
- )
721
- df[mm_schemas.EventFieldType.LAST_REQUEST] = pd.to_datetime(
722
- df[mm_schemas.EventFieldType.LAST_REQUEST],
723
- errors="coerce",
724
- format="ISO8601",
725
- utc=True,
726
- )
727
- return df
728
-
729
- def get_drift_status(
730
- self,
731
- endpoint_ids: Union[str, list[str]],
732
- start: Optional[datetime] = None,
733
- end: Optional[datetime] = None,
734
- get_raw: bool = False,
735
- ) -> pd.DataFrame:
736
- filter_query = self._generate_filter_query(
737
- filter_column=mm_schemas.EventFieldType.ENDPOINT_ID,
738
- filter_values=endpoint_ids,
739
- )
740
- start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
741
- start, end = get_start_end(start, end)
742
- df = self._get_records(
743
- table=self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS].super_table,
744
- start=start,
745
- end=end,
746
- columns=[
747
- mm_schemas.ResultData.RESULT_STATUS,
748
- mm_schemas.EventFieldType.ENDPOINT_ID,
749
- ],
750
- filter_query=filter_query,
751
- timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
752
- agg_funcs=["max"],
753
- group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
754
- preform_agg_columns=[mm_schemas.ResultData.RESULT_STATUS],
755
- )
756
- df.rename(
757
- columns={
758
- f"max({mm_schemas.ResultData.RESULT_STATUS})": mm_schemas.ResultData.RESULT_STATUS
759
- },
760
- inplace=True,
761
- )
762
- if not df.empty:
763
- df.dropna(inplace=True)
764
- return df
765
-
766
- def count_results_by_status(
767
- self,
768
- start: Optional[Union[datetime, str]] = None,
769
- end: Optional[Union[datetime, str]] = None,
770
- endpoint_ids: Optional[Union[str, list[str]]] = None,
771
- application_names: Optional[Union[str, list[str]]] = None,
772
- result_status_list: Optional[list[int]] = None,
773
- ) -> dict[tuple[str, int], int]:
774
- filter_query = ""
775
-
776
- start, end = get_start_end(start=start, end=end, delta=timedelta(hours=24))
777
-
778
- if endpoint_ids:
779
- filter_query = self._generate_filter_query(
780
- filter_column=mm_schemas.EventFieldType.ENDPOINT_ID,
781
- filter_values=endpoint_ids,
782
- )
783
- if application_names:
784
- app_filter_query = self._generate_filter_query(
785
- filter_column=mm_schemas.ApplicationEvent.APPLICATION_NAME,
786
- filter_values=application_names,
787
- )
788
- if filter_query:
789
- filter_query += f" AND {app_filter_query}"
790
- else:
791
- filter_query = app_filter_query
792
- if result_status_list:
793
- status_filter_query = self._generate_filter_query(
794
- filter_column=mm_schemas.ResultData.RESULT_STATUS,
795
- filter_values=result_status_list,
796
- )
797
- if filter_query:
798
- filter_query += f" AND {status_filter_query}"
799
- else:
800
- filter_query = status_filter_query
801
-
802
- df = self._get_records(
803
- table=self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS].super_table,
804
- start=start,
805
- end=end,
806
- columns=[
807
- mm_schemas.WriterEvent.APPLICATION_NAME,
808
- mm_schemas.ResultData.RESULT_STATUS,
809
- mm_schemas.ResultData.RESULT_VALUE,
810
- ],
811
- filter_query=filter_query,
812
- timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
813
- group_by=[
814
- mm_schemas.WriterEvent.APPLICATION_NAME,
815
- mm_schemas.ResultData.RESULT_STATUS,
816
- ],
817
- agg_funcs=["count"],
818
- preform_agg_columns=[mm_schemas.ResultData.RESULT_VALUE],
819
- )
820
- if df.empty:
821
- return {}
822
-
823
- # Convert DataFrame to a dictionary
824
- return {
825
- (
826
- row[mm_schemas.WriterEvent.APPLICATION_NAME],
827
- row[mm_schemas.ResultData.RESULT_STATUS],
828
- ): row["count(result_value)"]
829
- for _, row in df.iterrows()
830
- }
831
-
832
- def count_processed_model_endpoints(
833
- self,
834
- start: Optional[Union[datetime, str]] = None,
835
- end: Optional[Union[datetime, str]] = None,
836
- application_names: Optional[Union[str, list[str]]] = None,
837
- ) -> dict:
838
- filter_query = ""
839
- start, end = get_start_end(start=start, end=end, delta=timedelta(hours=24))
840
-
841
- if application_names:
842
- filter_query = self._generate_filter_query(
843
- filter_column=mm_schemas.WriterEvent.APPLICATION_NAME,
844
- filter_values=application_names,
845
- )
846
-
847
- def get_application_endpoints_records(super_table: str) -> pd.DataFrame:
848
- return self._get_records(
849
- table=super_table,
850
- start=start,
851
- end=end,
852
- timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
853
- columns=[
854
- mm_schemas.WriterEvent.APPLICATION_NAME,
855
- mm_schemas.EventFieldType.ENDPOINT_ID,
856
- ],
857
- filter_query=filter_query,
858
- group_by=[
859
- mm_schemas.WriterEvent.APPLICATION_NAME,
860
- mm_schemas.EventFieldType.ENDPOINT_ID,
861
- ],
862
- preform_agg_columns=[mm_schemas.ResultData.RESULT_VALUE],
863
- agg_funcs=["last"],
864
- )
865
-
866
- df_results = get_application_endpoints_records(
867
- super_table=self.tables[
868
- mm_schemas.TDEngineSuperTables.APP_RESULTS
869
- ].super_table
870
- )
871
- df_metrics = get_application_endpoints_records(
872
- super_table=self.tables[mm_schemas.TDEngineSuperTables.METRICS].super_table
873
- )
874
-
875
- combined_df = pd.concat([df_results, df_metrics]).drop_duplicates()
876
-
877
- if combined_df.empty:
878
- return {}
879
- grouped_df = combined_df.groupby(
880
- mm_schemas.WriterEvent.APPLICATION_NAME
881
- ).count()
882
-
883
- # Convert DataFrame to a dictionary
884
- return grouped_df[mm_schemas.WriterEvent.ENDPOINT_ID].to_dict()
885
-
886
- def calculate_latest_metrics(
887
- self,
888
- start: Optional[Union[datetime, str]] = None,
889
- end: Optional[Union[datetime, str]] = None,
890
- application_names: Optional[Union[str, list[str]]] = None,
891
- ) -> list[
892
- Union[mm_schemas.ApplicationResultRecord, mm_schemas.ApplicationMetricRecord]
893
- ]:
894
- metric_list = []
895
- filter_query = ""
896
- start, end = get_start_end(start=start, end=end, delta=timedelta(hours=24))
897
-
898
- if application_names:
899
- filter_query = self._generate_filter_query(
900
- filter_column=mm_schemas.WriterEvent.APPLICATION_NAME,
901
- filter_values=application_names,
902
- )
903
-
904
- def get_latest_metrics_records(
905
- record_type: Literal["metrics", "results"],
906
- ) -> pd.DataFrame:
907
- columns = [
908
- mm_schemas.WriterEvent.END_INFER_TIME,
909
- mm_schemas.WriterEvent.APPLICATION_NAME,
910
- ]
911
- if record_type == "results":
912
- table = self.tables[
913
- mm_schemas.TDEngineSuperTables.APP_RESULTS
914
- ].super_table
915
- columns += [
916
- mm_schemas.ResultData.RESULT_NAME,
917
- mm_schemas.ResultData.RESULT_VALUE,
918
- mm_schemas.ResultData.RESULT_STATUS,
919
- mm_schemas.ResultData.RESULT_KIND,
920
- ]
921
- agg_column = mm_schemas.ResultData.RESULT_VALUE
922
- else:
923
- table = self.tables[mm_schemas.TDEngineSuperTables.METRICS].super_table
924
- columns += [
925
- mm_schemas.MetricData.METRIC_NAME,
926
- mm_schemas.MetricData.METRIC_VALUE,
927
- ]
928
- agg_column = mm_schemas.MetricData.METRIC_VALUE
929
-
930
- return self._get_records(
931
- table=table,
932
- start=start,
933
- end=end,
934
- columns=columns,
935
- filter_query=filter_query,
936
- timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
937
- # Aggregate per application/metric pair regardless of timestamp
938
- group_by=columns[1:],
939
- preform_agg_columns=[agg_column],
940
- agg_funcs=["last"],
941
- )
942
-
943
- df_results = get_latest_metrics_records(record_type="results")
944
- df_metrics = get_latest_metrics_records(record_type="metrics")
945
-
946
- if df_results.empty and df_metrics.empty:
947
- return metric_list
948
-
949
- def build_metric_objects() -> (
950
- list[
951
- Union[
952
- mm_schemas.ApplicationResultRecord,
953
- mm_schemas.ApplicationMetricRecord,
954
- ]
955
- ]
956
- ):
957
- metric_objects = []
958
-
959
- if not df_results.empty:
960
- df_results.rename(
961
- columns={
962
- f"last({mm_schemas.ResultData.RESULT_VALUE})": mm_schemas.ResultData.RESULT_VALUE,
963
- },
964
- inplace=True,
965
- )
966
- for _, row in df_results.iterrows():
967
- metric_objects.append(
968
- mm_schemas.ApplicationResultRecord(
969
- time=datetime.fromisoformat(
970
- row[mm_schemas.WriterEvent.END_INFER_TIME]
971
- ),
972
- result_name=row[mm_schemas.ResultData.RESULT_NAME],
973
- kind=row[mm_schemas.ResultData.RESULT_KIND],
974
- status=row[mm_schemas.ResultData.RESULT_STATUS],
975
- value=row[mm_schemas.ResultData.RESULT_VALUE],
976
- )
977
- )
978
-
979
- if not df_metrics.empty:
980
- df_metrics.rename(
981
- columns={
982
- f"last({mm_schemas.MetricData.METRIC_VALUE})": mm_schemas.MetricData.METRIC_VALUE,
983
- },
984
- inplace=True,
985
- )
986
- for _, row in df_metrics.iterrows():
987
- metric_objects.append(
988
- mm_schemas.ApplicationMetricRecord(
989
- time=datetime.fromisoformat(
990
- row[mm_schemas.WriterEvent.END_INFER_TIME]
991
- ),
992
- metric_name=row[mm_schemas.MetricData.METRIC_NAME],
993
- value=row[mm_schemas.MetricData.METRIC_VALUE],
994
- )
995
- )
996
-
997
- return metric_objects
998
-
999
- return build_metric_objects()
1000
-
1001
- def get_metrics_metadata(
1002
- self,
1003
- endpoint_id: Union[str, list[str]],
1004
- start: Optional[datetime] = None,
1005
- end: Optional[datetime] = None,
1006
- ) -> pd.DataFrame:
1007
- start, end = get_start_end(start, end)
1008
- df = self._get_records(
1009
- table=self.tables[mm_schemas.TDEngineSuperTables.METRICS].super_table,
1010
- start=start,
1011
- end=end,
1012
- columns=[
1013
- mm_schemas.ApplicationEvent.APPLICATION_NAME,
1014
- mm_schemas.MetricData.METRIC_NAME,
1015
- mm_schemas.EventFieldType.ENDPOINT_ID,
1016
- ],
1017
- filter_query=self._generate_filter_query(
1018
- filter_column=mm_schemas.EventFieldType.ENDPOINT_ID,
1019
- filter_values=endpoint_id,
1020
- ),
1021
- timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
1022
- group_by=[
1023
- mm_schemas.WriterEvent.APPLICATION_NAME,
1024
- mm_schemas.MetricData.METRIC_NAME,
1025
- mm_schemas.EventFieldType.ENDPOINT_ID,
1026
- ],
1027
- agg_funcs=["last"],
1028
- )
1029
- df.rename(
1030
- columns={
1031
- f"last({mm_schemas.ApplicationEvent.APPLICATION_NAME})": mm_schemas.ApplicationEvent.APPLICATION_NAME,
1032
- f"last({mm_schemas.MetricData.METRIC_NAME})": mm_schemas.MetricData.METRIC_NAME,
1033
- f"last({mm_schemas.EventFieldType.ENDPOINT_ID})": mm_schemas.EventFieldType.ENDPOINT_ID,
1034
- },
1035
- inplace=True,
1036
- )
1037
- if not df.empty:
1038
- df.dropna(inplace=True)
1039
- return df
1040
-
1041
- def get_results_metadata(
1042
- self,
1043
- endpoint_id: Union[str, list[str]],
1044
- start: Optional[datetime] = None,
1045
- end: Optional[datetime] = None,
1046
- ) -> pd.DataFrame:
1047
- start, end = get_start_end(start, end)
1048
- df = self._get_records(
1049
- table=self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS].super_table,
1050
- start=start,
1051
- end=end,
1052
- columns=[
1053
- mm_schemas.ApplicationEvent.APPLICATION_NAME,
1054
- mm_schemas.ResultData.RESULT_NAME,
1055
- mm_schemas.ResultData.RESULT_KIND,
1056
- mm_schemas.EventFieldType.ENDPOINT_ID,
1057
- ],
1058
- filter_query=self._generate_filter_query(
1059
- filter_column=mm_schemas.EventFieldType.ENDPOINT_ID,
1060
- filter_values=endpoint_id,
1061
- ),
1062
- timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
1063
- group_by=[
1064
- mm_schemas.WriterEvent.APPLICATION_NAME,
1065
- mm_schemas.ResultData.RESULT_NAME,
1066
- mm_schemas.EventFieldType.ENDPOINT_ID,
1067
- ],
1068
- agg_funcs=["last"],
1069
- )
1070
- df.rename(
1071
- columns={
1072
- f"last({mm_schemas.ApplicationEvent.APPLICATION_NAME})": mm_schemas.ApplicationEvent.APPLICATION_NAME,
1073
- f"last({mm_schemas.ResultData.RESULT_NAME})": mm_schemas.ResultData.RESULT_NAME,
1074
- f"last({mm_schemas.ResultData.RESULT_KIND})": mm_schemas.ResultData.RESULT_KIND,
1075
- f"last({mm_schemas.EventFieldType.ENDPOINT_ID})": mm_schemas.EventFieldType.ENDPOINT_ID,
1076
- },
1077
- inplace=True,
1078
- )
1079
- if not df.empty:
1080
- df.dropna(inplace=True)
1081
- return df
1082
-
1083
- def get_error_count(
1084
- self,
1085
- endpoint_ids: Union[str, list[str]],
1086
- start: Optional[datetime] = None,
1087
- end: Optional[datetime] = None,
1088
- get_raw: bool = False,
1089
- ) -> pd.DataFrame:
1090
- filter_query = self._generate_filter_query(
1091
- filter_column=mm_schemas.EventFieldType.ENDPOINT_ID,
1092
- filter_values=endpoint_ids,
1093
- )
1094
- filter_query += f"AND {mm_schemas.EventFieldType.ERROR_TYPE} = '{mm_schemas.EventFieldType.INFER_ERROR}'"
1095
- start, end = get_start_end(start, end)
1096
- df = self._get_records(
1097
- table=self.tables[mm_schemas.TDEngineSuperTables.ERRORS].super_table,
1098
- start=start,
1099
- end=end,
1100
- columns=[
1101
- mm_schemas.EventFieldType.MODEL_ERROR,
1102
- mm_schemas.EventFieldType.ENDPOINT_ID,
1103
- ],
1104
- agg_funcs=["count"],
1105
- filter_query=filter_query,
1106
- group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
1107
- preform_agg_columns=[mm_schemas.EventFieldType.MODEL_ERROR],
1108
- )
1109
- df.rename(
1110
- columns={f"count({mm_schemas.EventFieldType.MODEL_ERROR})": "error_count"},
1111
- inplace=True,
1112
- )
1113
- if not df.empty:
1114
- df.dropna(inplace=True)
1115
- return df
1116
-
1117
- def get_avg_latency(
1118
- self,
1119
- endpoint_ids: Union[str, list[str]],
1120
- start: Optional[datetime] = None,
1121
- end: Optional[datetime] = None,
1122
- get_raw: bool = False,
1123
- ) -> pd.DataFrame:
1124
- endpoint_ids = (
1125
- endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
1126
- )
1127
- start, end = get_start_end(start, end, delta=timedelta(hours=24))
1128
- df = self._get_records(
1129
- table=self.tables[mm_schemas.TDEngineSuperTables.PREDICTIONS].super_table,
1130
- start=start,
1131
- end=end,
1132
- columns=[
1133
- mm_schemas.EventFieldType.LATENCY,
1134
- mm_schemas.EventFieldType.ENDPOINT_ID,
1135
- ],
1136
- agg_funcs=["avg"],
1137
- filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
1138
- group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
1139
- preform_agg_columns=[mm_schemas.EventFieldType.LATENCY],
1140
- )
1141
- df.rename(
1142
- columns={f"avg({mm_schemas.EventFieldType.LATENCY})": "avg_latency"},
1143
- inplace=True,
1144
- )
1145
- if not df.empty:
1146
- df.dropna(inplace=True)
1147
- return df
1148
-
1149
- async def add_basic_metrics(
1150
- self,
1151
- model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
1152
- project: str,
1153
- run_in_threadpool: Callable,
1154
- metric_list: Optional[list[str]] = None,
1155
- ) -> list[mlrun.common.schemas.ModelEndpoint]:
1156
- """
1157
- Add basic metrics to the model endpoint object.
1158
-
1159
- :param model_endpoint_objects: A list of `ModelEndpoint` objects that will
1160
- be filled with the relevant basic metrics.
1161
- :param project: The name of the project.
1162
- :param run_in_threadpool: A function that runs another function in a thread pool.
1163
- :param metric_list: List of metrics to include from the time series DB. Defaults to all metrics.
1164
-
1165
- :return: A list of `ModelEndpointMonitoringMetric` objects.
1166
- """
1167
-
1168
- uids = [mep.metadata.uid for mep in model_endpoint_objects]
1169
-
1170
- metric_name_to_function = {
1171
- "error_count": self.get_error_count,
1172
- "last_request": self.get_last_request,
1173
- "avg_latency": self.get_avg_latency,
1174
- "result_status": self.get_drift_status,
1175
- }
1176
- if metric_list is not None:
1177
- for metric_name in list(metric_name_to_function):
1178
- if metric_name not in metric_list:
1179
- del metric_name_to_function[metric_name]
1180
-
1181
- metric_name_to_df = {
1182
- metric_name: function(endpoint_ids=uids)
1183
- for metric_name, function in metric_name_to_function.items()
1184
- }
1185
-
1186
- def add_metrics(
1187
- mep: mlrun.common.schemas.ModelEndpoint,
1188
- df_dictionary: dict[str, pd.DataFrame],
1189
- ):
1190
- for metric in df_dictionary.keys():
1191
- df = df_dictionary.get(metric, pd.DataFrame())
1192
- if not df.empty:
1193
- line = df[df["endpoint_id"] == mep.metadata.uid]
1194
- if not line.empty and metric in line:
1195
- value = line[metric].item()
1196
- if isinstance(value, pd.Timestamp):
1197
- value = value.to_pydatetime()
1198
- setattr(mep.status, metric, value)
1199
-
1200
- return mep
1201
-
1202
- return list(
1203
- map(
1204
- lambda mep: add_metrics(
1205
- mep=mep,
1206
- df_dictionary=metric_name_to_df,
1207
- ),
1208
- model_endpoint_objects,
1209
- )
1210
- )
1211
-
1212
- def get_drift_data(
1213
- self,
1214
- start: datetime,
1215
- end: datetime,
1216
- ) -> mm_schemas.ModelEndpointDriftValues:
1217
- filter_query = self._generate_filter_query(
1218
- filter_column=mm_schemas.ResultData.RESULT_STATUS,
1219
- filter_values=[
1220
- mm_schemas.ResultStatusApp.potential_detection.value,
1221
- mm_schemas.ResultStatusApp.detected.value,
1222
- ],
1223
- )
1224
- table = self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS].super_table
1225
- start, end, interval = self._prepare_aligned_start_end(start, end)
1226
-
1227
- # get per time-interval x endpoint_id combination the max result status
1228
- df = self._get_records(
1229
- table=table,
1230
- start=start,
1231
- end=end,
1232
- interval=interval,
1233
- columns=[mm_schemas.ResultData.RESULT_STATUS],
1234
- filter_query=filter_query,
1235
- timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
1236
- agg_funcs=["max"],
1237
- partition_by=mm_schemas.WriterEvent.ENDPOINT_ID,
1238
- )
1239
- if df.empty:
1240
- return mm_schemas.ModelEndpointDriftValues(values=[])
1241
-
1242
- df["_wstart"] = pd.to_datetime(df["_wstart"])
1243
- return self._df_to_drift_data(df)
1244
-
1245
- # Note: this function serves as a reference for checking the TSDB for the existence of a metric.
1246
- #
1247
- # def read_prediction_metric_for_endpoint_if_exists(
1248
- # self, endpoint_id: str
1249
- # ) -> Optional[mm_schemas.ModelEndpointMonitoringMetric]:
1250
- # """
1251
- # Read the "invocations" metric for the provided model endpoint, and return the metric object
1252
- # if it exists.
1253
- #
1254
- # :param endpoint_id: The model endpoint identifier.
1255
- # :return: `None` if the invocations metric does not exist, otherwise return the
1256
- # corresponding metric object.
1257
- # """
1258
- # # Read just one record, because we just want to check if there is any data for this endpoint_id
1259
- # predictions = self.read_predictions(
1260
- # endpoint_id=endpoint_id,
1261
- # start=datetime.min,
1262
- # end=mlrun.utils.now_date(),
1263
- # limit=1,
1264
- # )
1265
- # if predictions:
1266
- # return get_invocations_metric(self.project)