mlrun 1.10.0rc16__py3-none-any.whl → 1.10.0rc42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (98) hide show
  1. mlrun/__init__.py +22 -2
  2. mlrun/artifacts/document.py +6 -1
  3. mlrun/artifacts/llm_prompt.py +21 -15
  4. mlrun/artifacts/model.py +3 -3
  5. mlrun/common/constants.py +9 -0
  6. mlrun/common/formatters/artifact.py +1 -0
  7. mlrun/common/model_monitoring/helpers.py +86 -0
  8. mlrun/common/schemas/__init__.py +2 -0
  9. mlrun/common/schemas/auth.py +2 -0
  10. mlrun/common/schemas/function.py +10 -0
  11. mlrun/common/schemas/hub.py +30 -18
  12. mlrun/common/schemas/model_monitoring/__init__.py +2 -0
  13. mlrun/common/schemas/model_monitoring/constants.py +30 -6
  14. mlrun/common/schemas/model_monitoring/functions.py +13 -4
  15. mlrun/common/schemas/model_monitoring/model_endpoints.py +11 -0
  16. mlrun/common/schemas/pipeline.py +1 -1
  17. mlrun/common/schemas/serving.py +3 -0
  18. mlrun/common/schemas/workflow.py +1 -0
  19. mlrun/common/secrets.py +22 -1
  20. mlrun/config.py +32 -10
  21. mlrun/datastore/__init__.py +11 -3
  22. mlrun/datastore/azure_blob.py +162 -47
  23. mlrun/datastore/datastore.py +9 -4
  24. mlrun/datastore/datastore_profile.py +61 -5
  25. mlrun/datastore/model_provider/huggingface_provider.py +363 -0
  26. mlrun/datastore/model_provider/mock_model_provider.py +87 -0
  27. mlrun/datastore/model_provider/model_provider.py +211 -74
  28. mlrun/datastore/model_provider/openai_provider.py +243 -71
  29. mlrun/datastore/s3.py +24 -2
  30. mlrun/datastore/storeytargets.py +2 -3
  31. mlrun/datastore/utils.py +15 -3
  32. mlrun/db/base.py +27 -19
  33. mlrun/db/httpdb.py +57 -48
  34. mlrun/db/nopdb.py +25 -10
  35. mlrun/execution.py +55 -13
  36. mlrun/hub/__init__.py +15 -0
  37. mlrun/hub/module.py +181 -0
  38. mlrun/k8s_utils.py +105 -16
  39. mlrun/launcher/base.py +13 -6
  40. mlrun/launcher/local.py +2 -0
  41. mlrun/model.py +9 -3
  42. mlrun/model_monitoring/api.py +66 -27
  43. mlrun/model_monitoring/applications/__init__.py +1 -1
  44. mlrun/model_monitoring/applications/base.py +372 -136
  45. mlrun/model_monitoring/applications/context.py +2 -4
  46. mlrun/model_monitoring/applications/results.py +4 -7
  47. mlrun/model_monitoring/controller.py +239 -101
  48. mlrun/model_monitoring/db/_schedules.py +36 -13
  49. mlrun/model_monitoring/db/_stats.py +4 -3
  50. mlrun/model_monitoring/db/tsdb/base.py +29 -9
  51. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +4 -5
  52. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +154 -50
  53. mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py +51 -0
  54. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +17 -4
  55. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +245 -51
  56. mlrun/model_monitoring/helpers.py +28 -5
  57. mlrun/model_monitoring/stream_processing.py +45 -14
  58. mlrun/model_monitoring/writer.py +220 -1
  59. mlrun/platforms/__init__.py +3 -2
  60. mlrun/platforms/iguazio.py +7 -3
  61. mlrun/projects/operations.py +6 -1
  62. mlrun/projects/pipelines.py +2 -2
  63. mlrun/projects/project.py +128 -45
  64. mlrun/run.py +94 -17
  65. mlrun/runtimes/__init__.py +18 -0
  66. mlrun/runtimes/base.py +14 -6
  67. mlrun/runtimes/daskjob.py +1 -0
  68. mlrun/runtimes/local.py +5 -2
  69. mlrun/runtimes/mounts.py +20 -2
  70. mlrun/runtimes/nuclio/__init__.py +1 -0
  71. mlrun/runtimes/nuclio/application/application.py +147 -17
  72. mlrun/runtimes/nuclio/function.py +70 -27
  73. mlrun/runtimes/nuclio/serving.py +85 -4
  74. mlrun/runtimes/pod.py +213 -21
  75. mlrun/runtimes/utils.py +49 -9
  76. mlrun/secrets.py +54 -13
  77. mlrun/serving/remote.py +79 -6
  78. mlrun/serving/routers.py +23 -41
  79. mlrun/serving/server.py +211 -40
  80. mlrun/serving/states.py +536 -156
  81. mlrun/serving/steps.py +62 -0
  82. mlrun/serving/system_steps.py +136 -81
  83. mlrun/serving/v2_serving.py +9 -10
  84. mlrun/utils/helpers.py +212 -82
  85. mlrun/utils/logger.py +3 -1
  86. mlrun/utils/notifications/notification/base.py +18 -0
  87. mlrun/utils/notifications/notification/git.py +2 -4
  88. mlrun/utils/notifications/notification/slack.py +2 -4
  89. mlrun/utils/notifications/notification/webhook.py +2 -5
  90. mlrun/utils/notifications/notification_pusher.py +1 -1
  91. mlrun/utils/version/version.json +2 -2
  92. {mlrun-1.10.0rc16.dist-info → mlrun-1.10.0rc42.dist-info}/METADATA +44 -45
  93. {mlrun-1.10.0rc16.dist-info → mlrun-1.10.0rc42.dist-info}/RECORD +97 -92
  94. mlrun/api/schemas/__init__.py +0 -259
  95. {mlrun-1.10.0rc16.dist-info → mlrun-1.10.0rc42.dist-info}/WHEEL +0 -0
  96. {mlrun-1.10.0rc16.dist-info → mlrun-1.10.0rc42.dist-info}/entry_points.txt +0 -0
  97. {mlrun-1.10.0rc16.dist-info → mlrun-1.10.0rc42.dist-info}/licenses/LICENSE +0 -0
  98. {mlrun-1.10.0rc16.dist-info → mlrun-1.10.0rc42.dist-info}/top_level.txt +0 -0
@@ -16,7 +16,7 @@ import json
16
16
  import sys
17
17
  from abc import ABC, abstractmethod
18
18
  from contextlib import AbstractContextManager
19
- from datetime import datetime, timezone
19
+ from datetime import datetime
20
20
  from types import TracebackType
21
21
  from typing import TYPE_CHECKING, Final, Optional
22
22
 
@@ -162,19 +162,29 @@ class ModelMonitoringSchedulesFileEndpoint(ModelMonitoringSchedulesFileBase):
162
162
  endpoint_id=model_endpoint.metadata.uid,
163
163
  )
164
164
 
165
- def get_application_time(self, application: str) -> Optional[int]:
165
+ def get_application_time(self, application: str) -> Optional[float]:
166
166
  self._check_open_schedules()
167
167
  return self._schedules.get(application)
168
168
 
169
- def update_application_time(self, application: str, timestamp: int) -> None:
169
+ def update_application_time(self, application: str, timestamp: float) -> None:
170
170
  self._check_open_schedules()
171
- self._schedules[application] = timestamp
171
+ self._schedules[application] = float(timestamp)
172
+
173
+ def delete_application_time(self, application: str) -> None:
174
+ self._check_open_schedules()
175
+ if application in self._schedules:
176
+ logger.debug(
177
+ "Deleting application time from schedules",
178
+ application=application,
179
+ endpoint_id=self._endpoint_id,
180
+ )
181
+ del self._schedules[application]
172
182
 
173
183
  def get_application_list(self) -> set[str]:
174
184
  self._check_open_schedules()
175
185
  return set(self._schedules.keys())
176
186
 
177
- def get_min_timestamp(self) -> Optional[int]:
187
+ def get_min_timestamp(self) -> Optional[float]:
178
188
  self._check_open_schedules()
179
189
  return min(self._schedules.values(), default=None)
180
190
 
@@ -198,7 +208,7 @@ class ModelMonitoringSchedulesFileChief(ModelMonitoringSchedulesFileBase):
198
208
  project=self._project
199
209
  )
200
210
 
201
- def get_endpoint_last_request(self, endpoint_uid: str) -> Optional[int]:
211
+ def get_endpoint_last_request(self, endpoint_uid: str) -> Optional[float]:
202
212
  self._check_open_schedules()
203
213
  if endpoint_uid in self._schedules:
204
214
  return self._schedules[endpoint_uid].get(
@@ -208,15 +218,19 @@ class ModelMonitoringSchedulesFileChief(ModelMonitoringSchedulesFileBase):
208
218
  return None
209
219
 
210
220
  def update_endpoint_timestamps(
211
- self, endpoint_uid: str, last_request: int, last_analyzed: int
221
+ self, endpoint_uid: str, last_request: float, last_analyzed: float
212
222
  ) -> None:
213
223
  self._check_open_schedules()
214
224
  self._schedules[endpoint_uid] = {
215
- schemas.model_monitoring.constants.ScheduleChiefFields.LAST_REQUEST: last_request,
216
- schemas.model_monitoring.constants.ScheduleChiefFields.LAST_ANALYZED: last_analyzed,
225
+ schemas.model_monitoring.constants.ScheduleChiefFields.LAST_REQUEST: float(
226
+ last_request
227
+ ),
228
+ schemas.model_monitoring.constants.ScheduleChiefFields.LAST_ANALYZED: float(
229
+ last_analyzed
230
+ ),
217
231
  }
218
232
 
219
- def get_endpoint_last_analyzed(self, endpoint_uid: str) -> Optional[int]:
233
+ def get_endpoint_last_analyzed(self, endpoint_uid: str) -> Optional[float]:
220
234
  self._check_open_schedules()
221
235
  if endpoint_uid in self._schedules:
222
236
  return self._schedules[endpoint_uid].get(
@@ -267,9 +281,18 @@ class ModelMonitoringSchedulesFileApplication(ModelMonitoringSchedulesFileBase):
267
281
  self, endpoint_uid: str, last_analyzed: datetime
268
282
  ) -> None:
269
283
  self._check_open_schedules()
270
- self._schedules[endpoint_uid] = last_analyzed.astimezone(
271
- timezone.utc
272
- ).isoformat()
284
+ self._schedules[endpoint_uid] = last_analyzed.isoformat()
285
+
286
+ def delete_endpoints_last_analyzed(self, endpoint_uids: list[str]) -> None:
287
+ self._check_open_schedules()
288
+ for endpoint_uid in endpoint_uids:
289
+ if endpoint_uid in self._schedules:
290
+ logger.debug(
291
+ "Deleting endpoint last analyzed from schedules",
292
+ endpoint_uid=endpoint_uid,
293
+ application=self._application,
294
+ )
295
+ del self._schedules[endpoint_uid]
273
296
 
274
297
 
275
298
  def _delete_folder(folder: str) -> None:
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
  import abc
15
15
  import json
16
+ import typing
16
17
  from abc import abstractmethod
17
18
  from datetime import datetime, timezone
18
19
  from typing import cast
@@ -73,7 +74,7 @@ class ModelMonitoringStatsFile(abc.ABC):
73
74
  path=self._item.url,
74
75
  )
75
76
 
76
- def read(self) -> tuple[dict, datetime]:
77
+ def read(self) -> tuple[dict, typing.Optional[datetime]]:
77
78
  """
78
79
  Read the stats data and timestamp saved in file
79
80
  :return: tuple[dict, str] dictionary with stats data and timestamp saved in file
@@ -99,13 +100,13 @@ class ModelMonitoringStatsFile(abc.ABC):
99
100
  ):
100
101
  raise
101
102
 
102
- logger.exception(
103
+ logger.warning(
103
104
  "The Stats file was not found. It should have been created "
104
105
  "as a part of the model endpoint's creation",
105
106
  path=self._path,
106
107
  error=err,
107
108
  )
108
- raise
109
+ return {}, None
109
110
 
110
111
  def write(self, stats: dict, timestamp: datetime) -> None:
111
112
  """
@@ -14,7 +14,7 @@
14
14
 
15
15
  from abc import ABC, abstractmethod
16
16
  from datetime import datetime, timedelta
17
- from typing import Callable, ClassVar, Literal, Optional, Union
17
+ from typing import ClassVar, Literal, Optional, Union
18
18
 
19
19
  import pandas as pd
20
20
  import pydantic.v1
@@ -60,6 +60,16 @@ class TSDBConnector(ABC):
60
60
  """
61
61
  pass
62
62
 
63
+ def apply_writer_steps(self, graph, after, **kwargs) -> None:
64
+ """
65
+ Apply TSDB steps on the provided writer graph. Throughout these steps, the graph stores metrics / results.
66
+ This data is being used by mlrun UI and the monitoring dashboards in grafana.
67
+ There are 2 different key metric dictionaries that are being generated throughout these steps:
68
+ - metrics (user-defined metrics) - model monitoring application metrics
69
+ - results (user-defined results) - model monitoring application results
70
+ """
71
+ pass
72
+
63
73
  @abstractmethod
64
74
  def handle_model_error(self, graph, **kwargs) -> None:
65
75
  """
@@ -96,14 +106,23 @@ class TSDBConnector(ABC):
96
106
  """
97
107
 
98
108
  @abstractmethod
99
- def delete_tsdb_records(
100
- self,
101
- endpoint_ids: list[str],
102
- ) -> None:
109
+ def delete_tsdb_records(self, endpoint_ids: list[str]) -> None:
103
110
  """
104
111
  Delete model endpoint records from the TSDB connector.
112
+
105
113
  :param endpoint_ids: List of model endpoint unique identifiers.
106
- :param delete_timeout: The timeout in seconds to wait for the deletion to complete.
114
+ """
115
+ pass
116
+
117
+ @abstractmethod
118
+ def delete_application_records(
119
+ self, application_name: str, endpoint_ids: Optional[list[str]] = None
120
+ ) -> None:
121
+ """
122
+ Delete application records from the TSDB for the given model endpoints or all if ``None``.
123
+
124
+ :param application_name: The name of the application to delete records for.
125
+ :param endpoint_ids: List of model endpoint unique identifiers.
107
126
  """
108
127
  pass
109
128
 
@@ -425,11 +444,9 @@ class TSDBConnector(ABC):
425
444
  ]
426
445
  """
427
446
 
428
- async def add_basic_metrics(
447
+ def add_basic_metrics(
429
448
  self,
430
449
  model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
431
- project: str,
432
- run_in_threadpool: Callable,
433
450
  metric_list: Optional[list[str]] = None,
434
451
  ) -> list[mlrun.common.schemas.ModelEndpoint]:
435
452
  raise NotImplementedError()
@@ -774,3 +791,6 @@ class TSDBConnector(ABC):
774
791
  )
775
792
  )
776
793
  return mm_schemas.ModelEndpointDriftValues(values=values)
794
+
795
+ def add_pre_writer_steps(self, graph, after):
796
+ return None
@@ -122,10 +122,7 @@ class TDEngineSchema:
122
122
  )
123
123
  return f"DELETE FROM {self.database}.{subtable} WHERE {values};"
124
124
 
125
- def drop_subtable_query(
126
- self,
127
- subtable: str,
128
- ) -> str:
125
+ def drop_subtable_query(self, subtable: str) -> str:
129
126
  return f"DROP TABLE if EXISTS {self.database}.`{subtable}`;"
130
127
 
131
128
  def drop_supertable_query(self) -> str:
@@ -145,8 +142,10 @@ class TDEngineSchema:
145
142
  values = f" {operator} ".join(
146
143
  f"{filter_tag} LIKE '{val}'" for val in filter_values
147
144
  )
145
+ return self._get_tables_query_by_condition(values)
148
146
 
149
- return f"SELECT DISTINCT tbname FROM {self.database}.{self.super_table} WHERE {values};"
147
+ def _get_tables_query_by_condition(self, condition: str) -> str:
148
+ return f"SELECT DISTINCT TBNAME FROM {self.database}.{self.super_table} WHERE {condition};"
150
149
 
151
150
  @staticmethod
152
151
  def _get_records_query(
@@ -14,7 +14,7 @@
14
14
 
15
15
  import threading
16
16
  from datetime import datetime, timedelta
17
- from typing import Callable, Final, Literal, Optional, Union
17
+ from typing import Final, Literal, Optional, Union
18
18
 
19
19
  import pandas as pd
20
20
  import taosws
@@ -22,7 +22,7 @@ import taosws
22
22
  import mlrun.common.schemas.model_monitoring as mm_schemas
23
23
  import mlrun.common.types
24
24
  import mlrun.model_monitoring.db.tsdb.tdengine.schemas as tdengine_schemas
25
- import mlrun.model_monitoring.db.tsdb.tdengine.stream_graph_steps
25
+ from mlrun.config import config
26
26
  from mlrun.datastore.datastore_profile import DatastoreProfile
27
27
  from mlrun.model_monitoring.db import TSDBConnector
28
28
  from mlrun.model_monitoring.db.tsdb.tdengine.tdengine_connection import (
@@ -55,14 +55,12 @@ class TDEngineConnector(TSDBConnector):
55
55
  """
56
56
 
57
57
  type: str = mm_schemas.TSDBTarget.TDEngine
58
- database = f"{tdengine_schemas._MODEL_MONITORING_DATABASE}_{mlrun.mlconf.system_id}"
59
58
 
60
59
  def __init__(
61
60
  self,
62
61
  project: str,
63
62
  profile: DatastoreProfile,
64
63
  timestamp_precision: TDEngineTimestampPrecision = TDEngineTimestampPrecision.MICROSECOND,
65
- **kwargs,
66
64
  ):
67
65
  super().__init__(project=project)
68
66
 
@@ -72,6 +70,15 @@ class TDEngineConnector(TSDBConnector):
72
70
  timestamp_precision
73
71
  )
74
72
 
73
+ if not mlrun.mlconf.system_id:
74
+ raise mlrun.errors.MLRunInvalidArgumentError(
75
+ "system_id is not set in mlrun.mlconf. "
76
+ "TDEngineConnector requires system_id to be configured for database name construction. "
77
+ "Please ensure MLRun configuration is properly loaded before creating TDEngineConnector."
78
+ )
79
+ self.database = (
80
+ f"{tdengine_schemas._MODEL_MONITORING_DATABASE}_{mlrun.mlconf.system_id}"
81
+ )
75
82
  self._init_super_tables()
76
83
 
77
84
  @property
@@ -205,7 +212,7 @@ class TDEngineConnector(TSDBConnector):
205
212
  @staticmethod
206
213
  def _generate_filter_query(
207
214
  filter_column: str, filter_values: Union[str, list[Union[str, int]]]
208
- ) -> Optional[str]:
215
+ ) -> str:
209
216
  """
210
217
  Generate a filter query for TDEngine based on the provided column and values.
211
218
 
@@ -213,15 +220,14 @@ class TDEngineConnector(TSDBConnector):
213
220
  :param filter_values: A single value or a list of values to filter by.
214
221
 
215
222
  :return: A string representing the filter query.
216
- :raise: MLRunInvalidArgumentError if the filter values are not of type string or list.
223
+ :raise: ``MLRunValueError`` if the filter values are not of type string or list.
217
224
  """
218
-
219
225
  if isinstance(filter_values, str):
220
226
  return f"{filter_column}='{filter_values}'"
221
227
  elif isinstance(filter_values, list):
222
228
  return f"{filter_column} IN ({', '.join(repr(v) for v in filter_values)}) "
223
229
  else:
224
- raise mlrun.errors.MLRunInvalidArgumentError(
230
+ raise mlrun.errors.MLRunValueError(
225
231
  f"Invalid filter values {filter_values}: must be a string or a list, "
226
232
  f"got {type(filter_values).__name__}; filter values: {filter_values}"
227
233
  )
@@ -279,6 +285,65 @@ class TDEngineConnector(TSDBConnector):
279
285
  after="ProcessBeforeTDEngine",
280
286
  )
281
287
 
288
+ def add_pre_writer_steps(self, graph, after):
289
+ return graph.add_step(
290
+ "mlrun.model_monitoring.db.tsdb.tdengine.writer_graph_steps.ProcessBeforeTDEngine",
291
+ name="ProcessBeforeTDEngine",
292
+ after=after,
293
+ )
294
+
295
+ def apply_writer_steps(self, graph, after, **kwargs) -> None:
296
+ graph.add_step(
297
+ "mlrun.datastore.storeytargets.TDEngineStoreyTarget",
298
+ name="tsdb_metrics",
299
+ after=after,
300
+ url=f"ds://{self._tdengine_connection_profile.name}",
301
+ supertable=self.tables[mm_schemas.TDEngineSuperTables.METRICS].super_table,
302
+ table_col=mm_schemas.EventFieldType.TABLE_COLUMN,
303
+ time_col=mm_schemas.WriterEvent.END_INFER_TIME,
304
+ database=self.database,
305
+ graph_shape="cylinder",
306
+ columns=[
307
+ mm_schemas.WriterEvent.START_INFER_TIME,
308
+ mm_schemas.MetricData.METRIC_VALUE,
309
+ ],
310
+ tag_cols=[
311
+ mm_schemas.WriterEvent.ENDPOINT_ID,
312
+ mm_schemas.WriterEvent.APPLICATION_NAME,
313
+ mm_schemas.MetricData.METRIC_NAME,
314
+ ],
315
+ max_events=config.model_endpoint_monitoring.writer_graph.max_events,
316
+ flush_after_seconds=config.model_endpoint_monitoring.writer_graph.flush_after_seconds,
317
+ )
318
+
319
+ graph.add_step(
320
+ "mlrun.datastore.storeytargets.TDEngineStoreyTarget",
321
+ name="tsdb_app_results",
322
+ after=after,
323
+ url=f"ds://{self._tdengine_connection_profile.name}",
324
+ supertable=self.tables[
325
+ mm_schemas.TDEngineSuperTables.APP_RESULTS
326
+ ].super_table,
327
+ table_col=mm_schemas.EventFieldType.TABLE_COLUMN,
328
+ time_col=mm_schemas.WriterEvent.END_INFER_TIME,
329
+ database=self.database,
330
+ graph_shape="cylinder",
331
+ columns=[
332
+ mm_schemas.WriterEvent.START_INFER_TIME,
333
+ mm_schemas.ResultData.RESULT_VALUE,
334
+ mm_schemas.ResultData.RESULT_STATUS,
335
+ mm_schemas.ResultData.RESULT_EXTRA_DATA,
336
+ ],
337
+ tag_cols=[
338
+ mm_schemas.WriterEvent.ENDPOINT_ID,
339
+ mm_schemas.WriterEvent.APPLICATION_NAME,
340
+ mm_schemas.ResultData.RESULT_NAME,
341
+ mm_schemas.ResultData.RESULT_KIND,
342
+ ],
343
+ max_events=config.model_endpoint_monitoring.writer_graph.max_events,
344
+ flush_after_seconds=config.model_endpoint_monitoring.writer_graph.flush_after_seconds,
345
+ )
346
+
282
347
  def handle_model_error(
283
348
  self,
284
349
  graph,
@@ -311,10 +376,7 @@ class TDEngineConnector(TSDBConnector):
311
376
  flush_after_seconds=tsdb_batching_timeout_secs,
312
377
  )
313
378
 
314
- def delete_tsdb_records(
315
- self,
316
- endpoint_ids: list[str],
317
- ):
379
+ def delete_tsdb_records(self, endpoint_ids: list[str]) -> None:
318
380
  """
319
381
  To delete subtables within TDEngine, we first query the subtables names with the provided endpoint_ids.
320
382
  Then, we drop each subtable.
@@ -332,9 +394,7 @@ class TDEngineConnector(TSDBConnector):
332
394
  get_subtable_query = self.tables[table]._get_subtables_query_by_tag(
333
395
  filter_tag="endpoint_id", filter_values=endpoint_ids
334
396
  )
335
- subtables_result = self.connection.run(
336
- query=get_subtable_query,
337
- )
397
+ subtables_result = self.connection.run(query=get_subtable_query)
338
398
  subtables.extend([subtable[0] for subtable in subtables_result.data])
339
399
  except Exception as e:
340
400
  logger.warning(
@@ -346,15 +406,13 @@ class TDEngineConnector(TSDBConnector):
346
406
  )
347
407
 
348
408
  # Prepare the drop statements
349
- drop_statements = []
350
- for subtable in subtables:
351
- drop_statements.append(
352
- self.tables[table].drop_subtable_query(subtable=subtable)
353
- )
409
+ drop_statements = [
410
+ self.tables[table].drop_subtable_query(subtable=subtable)
411
+ for subtable in subtables
412
+ ]
354
413
  try:
355
- self.connection.run(
356
- statements=drop_statements,
357
- )
414
+ logger.debug("Dropping subtables", drop_statements=drop_statements)
415
+ self.connection.run(statements=drop_statements)
358
416
  except Exception as e:
359
417
  logger.warning(
360
418
  "Failed to delete model endpoint resources. You may need to delete them manually. "
@@ -369,6 +427,48 @@ class TDEngineConnector(TSDBConnector):
369
427
  number_of_endpoints_to_delete=len(endpoint_ids),
370
428
  )
371
429
 
430
+ def delete_application_records(
431
+ self, application_name: str, endpoint_ids: Optional[list[str]] = None
432
+ ) -> None:
433
+ """
434
+ Delete application records from the TSDB for the given model endpoints or all if ``endpoint_ids`` is ``None``.
435
+ """
436
+ logger.debug(
437
+ "Deleting application records",
438
+ project=self.project,
439
+ application_name=application_name,
440
+ endpoint_ids=endpoint_ids,
441
+ )
442
+ tables = [
443
+ self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS],
444
+ self.tables[mm_schemas.TDEngineSuperTables.METRICS],
445
+ ]
446
+
447
+ filter_query = self._generate_filter_query(
448
+ filter_column=mm_schemas.ApplicationEvent.APPLICATION_NAME,
449
+ filter_values=application_name,
450
+ )
451
+ if endpoint_ids:
452
+ endpoint_ids_filter = self._generate_filter_query(
453
+ filter_column=mm_schemas.EventFieldType.ENDPOINT_ID,
454
+ filter_values=endpoint_ids,
455
+ )
456
+ filter_query += f" AND {endpoint_ids_filter}"
457
+
458
+ drop_statements: list[str] = []
459
+ for table in tables:
460
+ get_subtable_query = table._get_tables_query_by_condition(filter_query)
461
+ subtables_result = self.connection.run(query=get_subtable_query)
462
+ drop_statements.extend(
463
+ [
464
+ table.drop_subtable_query(subtable=subtable[0])
465
+ for subtable in subtables_result.data
466
+ ]
467
+ )
468
+
469
+ logger.debug("Dropping application records", drop_statements=drop_statements)
470
+ self.connection.run(statements=drop_statements)
471
+
372
472
  def delete_tsdb_resources(self):
373
473
  """
374
474
  Delete all project resources in the TSDB connector, such as model endpoints data and drift results.
@@ -688,7 +788,9 @@ class TDEngineConnector(TSDBConnector):
688
788
  endpoint_ids: Union[str, list[str]],
689
789
  start: Optional[datetime] = None,
690
790
  end: Optional[datetime] = None,
691
- ) -> pd.DataFrame:
791
+ ) -> Union[pd.DataFrame, dict[str, float]]:
792
+ if not endpoint_ids:
793
+ return {}
692
794
  filter_query = self._generate_filter_query(
693
795
  filter_column=mm_schemas.EventFieldType.ENDPOINT_ID,
694
796
  filter_values=endpoint_ids,
@@ -823,7 +925,7 @@ class TDEngineConnector(TSDBConnector):
823
925
  # Convert DataFrame to a dictionary
824
926
  return {
825
927
  (
826
- row[mm_schemas.WriterEvent.APPLICATION_NAME],
928
+ row[mm_schemas.WriterEvent.APPLICATION_NAME].lower(),
827
929
  row[mm_schemas.ResultData.RESULT_STATUS],
828
930
  ): row["count(result_value)"]
829
931
  for _, row in df.iterrows()
@@ -908,26 +1010,34 @@ class TDEngineConnector(TSDBConnector):
908
1010
  mm_schemas.WriterEvent.END_INFER_TIME,
909
1011
  mm_schemas.WriterEvent.APPLICATION_NAME,
910
1012
  ]
1013
+ agg_columns = [mm_schemas.WriterEvent.END_INFER_TIME]
1014
+ group_by_columns = [mm_schemas.WriterEvent.APPLICATION_NAME]
911
1015
  if record_type == "results":
912
1016
  table = self.tables[
913
1017
  mm_schemas.TDEngineSuperTables.APP_RESULTS
914
1018
  ].super_table
915
1019
  columns += [
916
1020
  mm_schemas.ResultData.RESULT_NAME,
1021
+ mm_schemas.ResultData.RESULT_KIND,
1022
+ mm_schemas.ResultData.RESULT_STATUS,
1023
+ mm_schemas.ResultData.RESULT_VALUE,
1024
+ ]
1025
+ agg_columns += [
917
1026
  mm_schemas.ResultData.RESULT_VALUE,
918
1027
  mm_schemas.ResultData.RESULT_STATUS,
919
1028
  mm_schemas.ResultData.RESULT_KIND,
920
1029
  ]
921
- agg_column = mm_schemas.ResultData.RESULT_VALUE
1030
+ group_by_columns += [mm_schemas.ResultData.RESULT_NAME]
922
1031
  else:
923
1032
  table = self.tables[mm_schemas.TDEngineSuperTables.METRICS].super_table
924
1033
  columns += [
925
1034
  mm_schemas.MetricData.METRIC_NAME,
926
1035
  mm_schemas.MetricData.METRIC_VALUE,
927
1036
  ]
928
- agg_column = mm_schemas.MetricData.METRIC_VALUE
1037
+ agg_columns += [mm_schemas.MetricData.METRIC_VALUE]
1038
+ group_by_columns += [mm_schemas.MetricData.METRIC_NAME]
929
1039
 
930
- return self._get_records(
1040
+ df = self._get_records(
931
1041
  table=table,
932
1042
  start=start,
933
1043
  end=end,
@@ -935,10 +1045,17 @@ class TDEngineConnector(TSDBConnector):
935
1045
  filter_query=filter_query,
936
1046
  timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
937
1047
  # Aggregate per application/metric pair regardless of timestamp
938
- group_by=columns[1:],
939
- preform_agg_columns=[agg_column],
1048
+ group_by=group_by_columns,
1049
+ preform_agg_columns=agg_columns,
940
1050
  agg_funcs=["last"],
941
1051
  )
1052
+ if not df.empty:
1053
+ for column in agg_columns:
1054
+ df.rename(
1055
+ columns={f"last({column})": column},
1056
+ inplace=True,
1057
+ )
1058
+ return df
942
1059
 
943
1060
  df_results = get_latest_metrics_records(record_type="results")
944
1061
  df_metrics = get_latest_metrics_records(record_type="metrics")
@@ -955,19 +1072,14 @@ class TDEngineConnector(TSDBConnector):
955
1072
  ]
956
1073
  ):
957
1074
  metric_objects = []
958
-
959
1075
  if not df_results.empty:
960
- df_results.rename(
961
- columns={
962
- f"last({mm_schemas.ResultData.RESULT_VALUE})": mm_schemas.ResultData.RESULT_VALUE,
963
- },
964
- inplace=True,
965
- )
966
1076
  for _, row in df_results.iterrows():
967
1077
  metric_objects.append(
968
1078
  mm_schemas.ApplicationResultRecord(
969
1079
  time=datetime.fromisoformat(
970
- row[mm_schemas.WriterEvent.END_INFER_TIME]
1080
+ row[mm_schemas.WriterEvent.END_INFER_TIME].replace(
1081
+ " +", "+"
1082
+ )
971
1083
  ),
972
1084
  result_name=row[mm_schemas.ResultData.RESULT_NAME],
973
1085
  kind=row[mm_schemas.ResultData.RESULT_KIND],
@@ -977,17 +1089,13 @@ class TDEngineConnector(TSDBConnector):
977
1089
  )
978
1090
 
979
1091
  if not df_metrics.empty:
980
- df_metrics.rename(
981
- columns={
982
- f"last({mm_schemas.MetricData.METRIC_VALUE})": mm_schemas.MetricData.METRIC_VALUE,
983
- },
984
- inplace=True,
985
- )
986
1092
  for _, row in df_metrics.iterrows():
987
1093
  metric_objects.append(
988
1094
  mm_schemas.ApplicationMetricRecord(
989
1095
  time=datetime.fromisoformat(
990
- row[mm_schemas.WriterEvent.END_INFER_TIME]
1096
+ row[mm_schemas.WriterEvent.END_INFER_TIME].replace(
1097
+ " +", "+"
1098
+ )
991
1099
  ),
992
1100
  metric_name=row[mm_schemas.MetricData.METRIC_NAME],
993
1101
  value=row[mm_schemas.MetricData.METRIC_VALUE],
@@ -1146,11 +1254,9 @@ class TDEngineConnector(TSDBConnector):
1146
1254
  df.dropna(inplace=True)
1147
1255
  return df
1148
1256
 
1149
- async def add_basic_metrics(
1257
+ def add_basic_metrics(
1150
1258
  self,
1151
1259
  model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
1152
- project: str,
1153
- run_in_threadpool: Callable,
1154
1260
  metric_list: Optional[list[str]] = None,
1155
1261
  ) -> list[mlrun.common.schemas.ModelEndpoint]:
1156
1262
  """
@@ -1158,8 +1264,6 @@ class TDEngineConnector(TSDBConnector):
1158
1264
 
1159
1265
  :param model_endpoint_objects: A list of `ModelEndpoint` objects that will
1160
1266
  be filled with the relevant basic metrics.
1161
- :param project: The name of the project.
1162
- :param run_in_threadpool: A function that runs another function in a thread pool.
1163
1267
  :param metric_list: List of metrics to include from the time series DB. Defaults to all metrics.
1164
1268
 
1165
1269
  :return: A list of `ModelEndpointMonitoringMetric` objects.
@@ -0,0 +1,51 @@
1
+ # Copyright 2025 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from datetime import datetime
16
+
17
+ import mlrun.common.schemas.model_monitoring as mm_schemas
18
+ import mlrun.feature_store.steps
19
+ from mlrun.utils import logger
20
+
21
+
22
+ class ProcessBeforeTDEngine(mlrun.feature_store.steps.MapClass):
23
+ def __init__(self, **kwargs):
24
+ """
25
+ Process the data before writing to TDEngine. This step create the table name.
26
+
27
+ :returns: Event as a dictionary which will be written into the TDEngine Metrics/Results tables.
28
+ """
29
+ super().__init__(**kwargs)
30
+
31
+ def do(self, event):
32
+ logger.info("Process event before writing to TDEngine", event=event)
33
+ kind = event.get("kind")
34
+ table_name = (
35
+ f"{event[mm_schemas.WriterEvent.ENDPOINT_ID]}_"
36
+ f"{event[mm_schemas.WriterEvent.APPLICATION_NAME]}"
37
+ )
38
+ if kind == mm_schemas.WriterEventKind.RESULT:
39
+ # Write a new result
40
+ event[mm_schemas.EventFieldType.TABLE_COLUMN] = (
41
+ f"{table_name}_{event[mm_schemas.ResultData.RESULT_NAME]}"
42
+ ).replace("-", "_")
43
+ elif kind == mm_schemas.WriterEventKind.METRIC:
44
+ # Write a new metric
45
+ event[mm_schemas.EventFieldType.TABLE_COLUMN] = (
46
+ f"{table_name}_{event[mm_schemas.MetricData.METRIC_NAME]}"
47
+ ).replace("-", "_")
48
+ event[mm_schemas.WriterEvent.START_INFER_TIME] = datetime.fromisoformat(
49
+ event[mm_schemas.WriterEvent.START_INFER_TIME]
50
+ )
51
+ return event