mlrun 1.7.0rc37__py3-none-any.whl → 1.7.0rc38__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

mlrun/alerts/alert.py CHANGED
@@ -29,6 +29,7 @@ class AlertConfig(ModelObj):
29
29
  "reset_policy",
30
30
  "state",
31
31
  "count",
32
+ "created",
32
33
  ]
33
34
  _fields_to_serialize = ModelObj._fields_to_serialize + [
34
35
  "entities",
@@ -55,12 +56,12 @@ class AlertConfig(ModelObj):
55
56
  created: str = None,
56
57
  count: int = None,
57
58
  ):
58
- """
59
- Alert config object
59
+ """Alert config object
60
60
 
61
61
  Example::
62
+
62
63
  # create an alert on endpoint_id, which will be triggered to slack if there is a "data_drift_detected" event
63
- 3 times in the next hour.
64
+ # 3 times in the next hour.
64
65
  from mlrun.alerts import AlertConfig
65
66
  import mlrun.common.schemas.alert as alert_objects
66
67
 
@@ -53,9 +53,11 @@ class EventFieldType:
53
53
  PREDICTIONS = "predictions"
54
54
  NAMED_PREDICTIONS = "named_predictions"
55
55
  ERROR_COUNT = "error_count"
56
+ MODEL_ERROR = "model_error"
56
57
  ENTITIES = "entities"
57
58
  FIRST_REQUEST = "first_request"
58
59
  LAST_REQUEST = "last_request"
60
+ LAST_REQUEST_TIMESTAMP = "last_request_timestamp"
59
61
  METRIC = "metric"
60
62
  METRICS = "metrics"
61
63
  BATCH_INTERVALS_DICT = "batch_intervals_dict"
@@ -217,6 +219,7 @@ class FileTargetKind:
217
219
  APP_METRICS = "app_metrics"
218
220
  MONITORING_SCHEDULES = "monitoring_schedules"
219
221
  MONITORING_APPLICATION = "monitoring_application"
222
+ ERRORS = "errors"
220
223
 
221
224
 
222
225
  class ModelMonitoringMode(str, Enum):
@@ -240,6 +243,7 @@ class V3IOTSDBTables(MonitoringStrEnum):
240
243
  APP_RESULTS = "app-results"
241
244
  METRICS = "metrics"
242
245
  EVENTS = "events"
246
+ ERRORS = "errors"
243
247
 
244
248
 
245
249
  class TDEngineSuperTables(MonitoringStrEnum):
@@ -71,9 +71,9 @@ class Notification(pydantic.BaseModel):
71
71
 
72
72
  kind: NotificationKind
73
73
  name: str
74
- message: str
75
- severity: NotificationSeverity
76
- when: list[str]
74
+ message: typing.Optional[str] = None
75
+ severity: typing.Optional[NotificationSeverity] = None
76
+ when: typing.Optional[list[str]] = None
77
77
  condition: typing.Optional[str] = None
78
78
  params: typing.Optional[dict[str, typing.Any]] = None
79
79
  status: typing.Optional[NotificationStatus] = None
@@ -16,6 +16,7 @@ import time
16
16
  from pathlib import Path
17
17
  from urllib.parse import urlparse
18
18
 
19
+ from azure.storage.blob import BlobServiceClient
19
20
  from azure.storage.blob._shared.base_client import parse_connection_str
20
21
  from fsspec.registry import get_filesystem_class
21
22
 
@@ -29,47 +30,128 @@ from .base import DataStore, FileStats, makeDatastoreSchemaSanitizer
29
30
 
30
31
  class AzureBlobStore(DataStore):
31
32
  using_bucket = True
33
+ max_concurrency = 100
34
+ max_blocksize = 1024 * 1024 * 4
35
+ max_single_put_size = (
36
+ 1024 * 1024 * 8
37
+ ) # for service_client property only, does not affect filesystem
32
38
 
33
39
  def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
34
40
  super().__init__(parent, name, schema, endpoint, secrets=secrets)
41
+ self._service_client = None
42
+ self._storage_options = None
43
+
44
+ @property
45
+ def storage_options(self):
46
+ if not self._storage_options:
47
+ res = dict(
48
+ account_name=self._get_secret_or_env("account_name")
49
+ or self._get_secret_or_env("AZURE_STORAGE_ACCOUNT_NAME"),
50
+ account_key=self._get_secret_or_env("account_key")
51
+ or self._get_secret_or_env("AZURE_STORAGE_ACCOUNT_KEY"),
52
+ connection_string=self._get_secret_or_env("connection_string")
53
+ or self._get_secret_or_env("AZURE_STORAGE_CONNECTION_STRING"),
54
+ tenant_id=self._get_secret_or_env("tenant_id")
55
+ or self._get_secret_or_env("AZURE_STORAGE_TENANT_ID"),
56
+ client_id=self._get_secret_or_env("client_id")
57
+ or self._get_secret_or_env("AZURE_STORAGE_CLIENT_ID"),
58
+ client_secret=self._get_secret_or_env("client_secret")
59
+ or self._get_secret_or_env("AZURE_STORAGE_CLIENT_SECRET"),
60
+ sas_token=self._get_secret_or_env("sas_token")
61
+ or self._get_secret_or_env("AZURE_STORAGE_SAS_TOKEN"),
62
+ credential=self._get_secret_or_env("credential"),
63
+ )
64
+ self._storage_options = self._sanitize_storage_options(res)
65
+ return self._storage_options
35
66
 
36
67
  @property
37
68
  def filesystem(self):
38
69
  """return fsspec file system object, if supported"""
39
- if self._filesystem:
40
- return self._filesystem
41
70
  try:
42
71
  import adlfs # noqa
43
72
  except ImportError as exc:
44
73
  raise ImportError("Azure adlfs not installed") from exc
45
- # in order to support az and wasbs kinds.
46
- filesystem_class = get_filesystem_class(protocol=self.kind)
47
- self._filesystem = makeDatastoreSchemaSanitizer(
48
- filesystem_class,
49
- using_bucket=self.using_bucket,
50
- **self.get_storage_options(),
51
- )
74
+
75
+ if not self._filesystem:
76
+ # in order to support az and wasbs kinds
77
+ filesystem_class = get_filesystem_class(protocol=self.kind)
78
+ self._filesystem = makeDatastoreSchemaSanitizer(
79
+ filesystem_class,
80
+ using_bucket=self.using_bucket,
81
+ blocksize=self.max_blocksize,
82
+ **self.storage_options,
83
+ )
52
84
  return self._filesystem
53
85
 
54
- def get_storage_options(self):
55
- res = dict(
56
- account_name=self._get_secret_or_env("account_name")
57
- or self._get_secret_or_env("AZURE_STORAGE_ACCOUNT_NAME"),
58
- account_key=self._get_secret_or_env("account_key")
59
- or self._get_secret_or_env("AZURE_STORAGE_KEY"),
60
- connection_string=self._get_secret_or_env("connection_string")
61
- or self._get_secret_or_env("AZURE_STORAGE_CONNECTION_STRING"),
62
- tenant_id=self._get_secret_or_env("tenant_id")
63
- or self._get_secret_or_env("AZURE_STORAGE_TENANT_ID"),
64
- client_id=self._get_secret_or_env("client_id")
65
- or self._get_secret_or_env("AZURE_STORAGE_CLIENT_ID"),
66
- client_secret=self._get_secret_or_env("client_secret")
67
- or self._get_secret_or_env("AZURE_STORAGE_CLIENT_SECRET"),
68
- sas_token=self._get_secret_or_env("sas_token")
69
- or self._get_secret_or_env("AZURE_STORAGE_SAS_TOKEN"),
70
- credential=self._get_secret_or_env("credential"),
71
- )
72
- return self._sanitize_storage_options(res)
86
+ @property
87
+ def service_client(self):
88
+ try:
89
+ import azure # noqa
90
+ except ImportError as exc:
91
+ raise ImportError("Azure not installed") from exc
92
+
93
+ if not self._service_client:
94
+ self._do_connect()
95
+ return self._service_client
96
+
97
+ def _do_connect(self):
98
+ """
99
+
100
+ Creates a client for azure.
101
+ Raises MLRunInvalidArgumentError if none of the connection details are available
102
+ based on do_connect in AzureBlobFileSystem:
103
+ https://github.com/fsspec/adlfs/blob/2023.9.0/adlfs/spec.py#L422
104
+ """
105
+ from azure.identity import ClientSecretCredential
106
+
107
+ storage_options = self.storage_options
108
+ connection_string = storage_options.get("connection_string")
109
+ client_name = storage_options.get("account_name")
110
+ account_key = storage_options.get("account_key")
111
+ sas_token = storage_options.get("sas_token")
112
+ client_id = storage_options.get("client_id")
113
+ credential = storage_options.get("credential")
114
+
115
+ credential_from_client_id = None
116
+ if (
117
+ credential is None
118
+ and account_key is None
119
+ and sas_token is None
120
+ and client_id is not None
121
+ ):
122
+ credential_from_client_id = ClientSecretCredential(
123
+ tenant_id=storage_options.get("tenant_id"),
124
+ client_id=client_id,
125
+ client_secret=storage_options.get("client_secret"),
126
+ )
127
+ try:
128
+ if connection_string is not None:
129
+ self._service_client = BlobServiceClient.from_connection_string(
130
+ conn_str=connection_string,
131
+ max_block_size=self.max_blocksize,
132
+ max_single_put_size=self.max_single_put_size,
133
+ )
134
+ elif client_name is not None:
135
+ account_url = f"https://{client_name}.blob.core.windows.net"
136
+ cred = credential_from_client_id or credential or account_key
137
+ if not cred and sas_token is not None:
138
+ if not sas_token.startswith("?"):
139
+ sas_token = f"?{sas_token}"
140
+ account_url = account_url + sas_token
141
+ self._service_client = BlobServiceClient(
142
+ account_url=account_url,
143
+ credential=cred,
144
+ max_block_size=self.max_blocksize,
145
+ max_single_put_size=self.max_single_put_size,
146
+ )
147
+ else:
148
+ raise mlrun.errors.MLRunInvalidArgumentError(
149
+ "Must provide either a connection_string or account_name with credentials"
150
+ )
151
+ except Exception as e:
152
+ raise mlrun.errors.MLRunInvalidArgumentError(
153
+ f"unable to connect to account for {e}"
154
+ )
73
155
 
74
156
  def _convert_key_to_remote_path(self, key):
75
157
  key = key.strip("/")
@@ -82,7 +164,15 @@ class AzureBlobStore(DataStore):
82
164
 
83
165
  def upload(self, key, src_path):
84
166
  remote_path = self._convert_key_to_remote_path(key)
85
- self.filesystem.put_file(src_path, remote_path, overwrite=True)
167
+ container, remote_path = remote_path.split("/", 1)
168
+ container_client = self.service_client.get_container_client(container=container)
169
+ with open(file=src_path, mode="rb") as data:
170
+ container_client.upload_blob(
171
+ name=remote_path,
172
+ data=data,
173
+ overwrite=True,
174
+ max_concurrency=self.max_concurrency,
175
+ )
86
176
 
87
177
  def get(self, key, size=None, offset=0):
88
178
  remote_path = self._convert_key_to_remote_path(key)
@@ -135,7 +225,7 @@ class AzureBlobStore(DataStore):
135
225
 
136
226
  def get_spark_options(self):
137
227
  res = {}
138
- st = self.get_storage_options()
228
+ st = self.storage_options()
139
229
  service = "blob"
140
230
  primary_url = None
141
231
  if st.get("connection_string"):
@@ -37,17 +37,12 @@ def parse_feature_string(feature):
37
37
  raise mlrun.errors.MLRunInvalidArgumentError(
38
38
  f"feature {feature} must be {expected_message}"
39
39
  )
40
- splitted = feature.split(feature_separator)
41
- if len(splitted) > 2:
42
- raise mlrun.errors.MLRunInvalidArgumentError(
43
- f"feature {feature} must be {expected_message}, cannot have more than one '.'"
44
- )
45
- feature_set = splitted[0]
46
- feature_name = splitted[1]
47
- splitted = feature_name.split(" as ")
48
- if len(splitted) > 1:
49
- return feature_set.strip(), splitted[0].strip(), splitted[1].strip()
50
- return feature_set.strip(), feature_name.strip(), None
40
+ feature_set, feature_name = feature.rsplit(feature_separator, 1)
41
+ feature_set = feature_set.strip()
42
+ split_result = feature_name.split(" as ", 1)
43
+ feature_name = split_result[0].strip()
44
+ alias = split_result[1].strip() if len(split_result) > 1 else None
45
+ return feature_set, feature_name, alias
51
46
 
52
47
 
53
48
  def parse_project_name_from_feature_string(feature):
mlrun/model.py CHANGED
@@ -1789,6 +1789,11 @@ class RunObject(RunTemplate):
1789
1789
 
1790
1790
  return state
1791
1791
 
1792
+ def abort(self):
1793
+ """abort the run"""
1794
+ db = mlrun.get_run_db()
1795
+ db.abort_run(self.metadata.uid, self.metadata.project)
1796
+
1792
1797
  @staticmethod
1793
1798
  def create_uri(project: str, uid: str, iteration: Union[int, str], tag: str = ""):
1794
1799
  if tag:
@@ -11,7 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
-
14
+ import http
15
15
  import json
16
16
  import typing
17
17
  from dataclasses import dataclass
@@ -417,11 +417,14 @@ class KVStoreBase(StoreBase):
417
417
  )
418
418
  return response.output.item[mm_schemas.SchedulingKeys.LAST_ANALYZED]
419
419
  except v3io.dataplane.response.HttpResponseError as err:
420
- logger.debug("Error while getting last analyzed time", err=err)
421
- raise mlrun.errors.MLRunNotFoundError(
422
- f"No last analyzed value has been found for {application_name} "
423
- f"that processes model endpoint {endpoint_id}",
424
- )
420
+ if err.status_code == http.HTTPStatus.NOT_FOUND:
421
+ logger.debug("Last analyzed time not found", err=err)
422
+ raise mlrun.errors.MLRunNotFoundError(
423
+ f"No last analyzed value has been found for {application_name} "
424
+ f"that processes model endpoint {endpoint_id}",
425
+ )
426
+ logger.error("Error while getting last analyzed time", err=err)
427
+ raise err
425
428
 
426
429
  def update_last_analyzed(
427
430
  self, endpoint_id: str, application_name: str, last_analyzed: int
@@ -15,6 +15,7 @@
15
15
  import typing
16
16
  from abc import ABC, abstractmethod
17
17
  from datetime import datetime
18
+ from typing import Union
18
19
 
19
20
  import pandas as pd
20
21
  import pydantic
@@ -47,7 +48,7 @@ class TSDBConnector(ABC):
47
48
  self.project = project
48
49
 
49
50
  @abstractmethod
50
- def apply_monitoring_stream_steps(self, graph):
51
+ def apply_monitoring_stream_steps(self, graph) -> None:
51
52
  """
52
53
  Apply TSDB steps on the provided monitoring graph. Throughout these steps, the graph stores live data of
53
54
  different key metric dictionaries. This data is being used by the monitoring dashboards in
@@ -59,6 +60,14 @@ class TSDBConnector(ABC):
59
60
  """
60
61
  pass
61
62
 
63
+ @abstractmethod
64
+ def handle_model_error(self, graph, **kwargs) -> None:
65
+ """
66
+ Adds a branch to the stream pod graph to handle events that
67
+ arrive with errors from the model server and saves them to the error TSDB table.
68
+ The first step that generates by this method should come after `ForwardError` step.
69
+ """
70
+
62
71
  @abstractmethod
63
72
  def write_application_event(
64
73
  self,
@@ -181,6 +190,117 @@ class TSDBConnector(ABC):
181
190
  :return: Metric values object or no data object.
182
191
  """
183
192
 
193
+ @abstractmethod
194
+ def get_last_request(
195
+ self,
196
+ endpoint_ids: Union[str, list[str]],
197
+ start: Union[datetime, str] = "0",
198
+ end: Union[datetime, str] = "now",
199
+ ) -> pd.DataFrame:
200
+ """
201
+ Fetches data from the predictions TSDB table and returns the most recent request
202
+ timestamp for each specified endpoint.
203
+
204
+ :param endpoint_ids: A list of model endpoint identifiers.
205
+ :param start: The start time for the query.
206
+ :param end: The end time for the query.
207
+
208
+ :return: A pd.DataFrame containing the columns [endpoint_id, last_request, last_latency].
209
+ If an endpoint has not been invoked within the specified time range, it will not appear in the result.
210
+ """
211
+
212
+ @abstractmethod
213
+ def get_drift_status(
214
+ self,
215
+ endpoint_ids: Union[str, list[str]],
216
+ start: Union[datetime, str] = "now-24h",
217
+ end: Union[datetime, str] = "now",
218
+ ) -> pd.DataFrame:
219
+ """
220
+ Fetches data from the app-results TSDB table and returns the highest status among all
221
+ the result in the provided time range, which by default is the last 24 hours, for each specified endpoint.
222
+
223
+ :param endpoint_ids: A list of model endpoint identifiers.
224
+ :param start: The start time for the query.
225
+ :param end: The end time for the query.
226
+
227
+ :return: A pd.DataFrame containing the columns [result_status, endpoint_id].
228
+ If an endpoint has not been monitored within the specified time range (last 24 hours),
229
+ it will not appear in the result.
230
+ """
231
+
232
+ @abstractmethod
233
+ def get_metrics_metadata(
234
+ self,
235
+ endpoint_id: str,
236
+ start: Union[datetime, str] = "0",
237
+ end: Union[datetime, str] = "now",
238
+ ) -> pd.DataFrame:
239
+ """
240
+ Fetches distinct metrics metadata from the metrics TSDB table for a specified model endpoint.
241
+
242
+ :param endpoint_id: The model endpoint identifier.
243
+ :param start: The start time of the query.
244
+ :param end: The end time of the query.
245
+
246
+ :return: A pd.DataFrame containing all distinct metrics for the specified endpoint within the given time range.
247
+ Containing the columns [application_name, metric_name, endpoint_id]
248
+ """
249
+
250
+ @abstractmethod
251
+ def get_results_metadata(
252
+ self,
253
+ endpoint_id: str,
254
+ start: Union[datetime, str] = "0",
255
+ end: Union[datetime, str] = "now",
256
+ ) -> pd.DataFrame:
257
+ """
258
+ Fetches distinct results metadata from the app-results TSDB table for a specified model endpoint.
259
+
260
+ :param endpoint_id: The model endpoint identifier.
261
+ :param start: The start time of the query.
262
+ :param end: The end time of the query.
263
+
264
+ :return: A pd.DataFrame containing all distinct results for the specified endpoint within the given time range.
265
+ Containing the columns [application_name, result_name, result_kind, endpoint_id]
266
+ """
267
+
268
+ @abstractmethod
269
+ def get_error_count(
270
+ self,
271
+ endpoint_ids: Union[str, list[str]],
272
+ start: Union[datetime, str] = "0",
273
+ end: Union[datetime, str] = "now",
274
+ ) -> pd.DataFrame:
275
+ """
276
+ Fetches data from the error TSDB table and returns the error count for each specified endpoint.
277
+
278
+ :param endpoint_ids: A list of model endpoint identifiers.
279
+ :param start: The start time for the query.
280
+ :param end: The end time for the query.
281
+
282
+ :return: A pd.DataFrame containing the columns [error_count, endpoint_id].
283
+ If an endpoint have not raised error within the specified time range, it will not appear in the result.
284
+ """
285
+
286
+ @abstractmethod
287
+ def get_avg_latency(
288
+ self,
289
+ endpoint_ids: Union[str, list[str]],
290
+ start: Union[datetime, str] = "0",
291
+ end: Union[datetime, str] = "now",
292
+ ) -> pd.DataFrame:
293
+ """
294
+ Fetches data from the predictions TSDB table and returns the average latency for each specified endpoint
295
+
296
+ :param endpoint_ids: A list of model endpoint identifiers.
297
+ :param start: The start time for the query.
298
+ :param end: The end time for the query.
299
+
300
+ :return: A pd.DataFrame containing the columns [avg_latency, endpoint_id].
301
+ If an endpoint has not been invoked within the specified time range, it will not appear in the result.
302
+ """
303
+
184
304
  @staticmethod
185
305
  def df_to_metrics_values(
186
306
  *,
@@ -14,6 +14,7 @@
14
14
 
15
15
  import typing
16
16
  from datetime import datetime
17
+ from typing import Union
17
18
 
18
19
  import pandas as pd
19
20
  import taosws
@@ -156,6 +157,9 @@ class TDEngineConnector(TSDBConnector):
156
157
  after="ProcessBeforeTDEngine",
157
158
  )
158
159
 
160
+ def handle_model_error(self, graph, **kwargs) -> None:
161
+ pass
162
+
159
163
  def delete_tsdb_resources(self):
160
164
  """
161
165
  Delete all project resources in the TSDB connector, such as model endpoints data and drift results.
@@ -246,11 +250,9 @@ class TDEngineConnector(TSDBConnector):
246
250
  raise mlrun.errors.MLRunInvalidArgumentError(
247
251
  f"Failed to query table {table} in database {self.database}, {str(e)}"
248
252
  )
249
- columns = []
250
- for column in query_result.fields:
251
- columns.append(column.name())
252
253
 
253
- return pd.DataFrame(query_result, columns=columns)
254
+ df_columns = [field.name() for field in query_result.fields]
255
+ return pd.DataFrame(query_result, columns=df_columns)
254
256
 
255
257
  def read_metrics_data(
256
258
  self,
@@ -274,13 +276,22 @@ class TDEngineConnector(TSDBConnector):
274
276
  ],
275
277
  ],
276
278
  ]:
279
+ timestamp_column = mm_schemas.WriterEvent.END_INFER_TIME
280
+ columns = [timestamp_column, mm_schemas.WriterEvent.APPLICATION_NAME]
277
281
  if type == "metrics":
278
282
  table = mm_schemas.TDEngineSuperTables.METRICS
279
283
  name = mm_schemas.MetricData.METRIC_NAME
284
+ columns += [name, mm_schemas.MetricData.METRIC_VALUE]
280
285
  df_handler = self.df_to_metrics_values
281
286
  elif type == "results":
282
287
  table = mm_schemas.TDEngineSuperTables.APP_RESULTS
283
288
  name = mm_schemas.ResultData.RESULT_NAME
289
+ columns += [
290
+ name,
291
+ mm_schemas.ResultData.RESULT_VALUE,
292
+ mm_schemas.ResultData.RESULT_STATUS,
293
+ mm_schemas.ResultData.RESULT_KIND,
294
+ ]
284
295
  df_handler = self.df_to_results_values
285
296
  else:
286
297
  raise mlrun.errors.MLRunInvalidArgumentError(
@@ -300,7 +311,8 @@ class TDEngineConnector(TSDBConnector):
300
311
  start=start,
301
312
  end=end,
302
313
  filter_query=filter_query,
303
- timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
314
+ timestamp_column=timestamp_column,
315
+ columns=columns,
304
316
  )
305
317
 
306
318
  df[mm_schemas.WriterEvent.END_INFER_TIME] = pd.to_datetime(
@@ -377,6 +389,54 @@ class TDEngineConnector(TSDBConnector):
377
389
  ), # pyright: ignore[reportArgumentType]
378
390
  )
379
391
 
392
+ def get_last_request(
393
+ self,
394
+ endpoint_ids: Union[str, list[str]],
395
+ start: Union[datetime, str] = "0",
396
+ end: Union[datetime, str] = "now",
397
+ ) -> pd.DataFrame:
398
+ pass
399
+
400
+ def get_drift_status(
401
+ self,
402
+ endpoint_ids: Union[str, list[str]],
403
+ start: Union[datetime, str] = "now-24h",
404
+ end: Union[datetime, str] = "now",
405
+ ) -> pd.DataFrame:
406
+ pass
407
+
408
+ def get_metrics_metadata(
409
+ self,
410
+ endpoint_id: str,
411
+ start: Union[datetime, str] = "0",
412
+ end: Union[datetime, str] = "now",
413
+ ) -> pd.DataFrame:
414
+ pass
415
+
416
+ def get_results_metadata(
417
+ self,
418
+ endpoint_id: str,
419
+ start: Union[datetime, str] = "0",
420
+ end: Union[datetime, str] = "now",
421
+ ) -> pd.DataFrame:
422
+ pass
423
+
424
+ def get_error_count(
425
+ self,
426
+ endpoint_ids: Union[str, list[str]],
427
+ start: Union[datetime, str] = "0",
428
+ end: Union[datetime, str] = "now",
429
+ ) -> pd.DataFrame:
430
+ pass
431
+
432
+ def get_avg_latency(
433
+ self,
434
+ endpoint_ids: Union[str, list[str]],
435
+ start: Union[datetime, str] = "0",
436
+ end: Union[datetime, str] = "now",
437
+ ) -> pd.DataFrame:
438
+ pass
439
+
380
440
  # Note: this function serves as a reference for checking the TSDB for the existence of a metric.
381
441
  #
382
442
  # def read_prediction_metric_for_endpoint_if_exists(
@@ -11,7 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
-
14
+ from datetime import datetime
15
15
  from typing import Any
16
16
 
17
17
  import mlrun.feature_store.steps
@@ -20,6 +20,7 @@ from mlrun.common.schemas.model_monitoring import (
20
20
  EventKeyMetrics,
21
21
  EventLiveStats,
22
22
  )
23
+ from mlrun.utils import logger
23
24
 
24
25
 
25
26
  def _normalize_dict_for_v3io_frames(event: dict[str, Any]) -> dict[str, Any]:
@@ -134,3 +135,24 @@ class FilterAndUnpackKeys(mlrun.feature_store.steps.MapClass):
134
135
  else:
135
136
  unpacked[key] = new_event[key]
136
137
  return unpacked if unpacked else None
138
+
139
+
140
+ class ErrorExtractor(mlrun.feature_store.steps.MapClass):
141
+ def __init__(self, **kwargs):
142
+ """
143
+ Prepare the event for insertion into the errors TSDB table.
144
+ """
145
+ super().__init__(**kwargs)
146
+
147
+ def do(self, event):
148
+ error = event.get("error")
149
+ timestamp = datetime.fromisoformat(event.get("when"))
150
+ endpoint_id = event[EventFieldType.ENDPOINT_ID]
151
+ event = {
152
+ EventFieldType.MODEL_ERROR: str(error),
153
+ EventFieldType.ENDPOINT_ID: endpoint_id,
154
+ EventFieldType.TIMESTAMP: timestamp,
155
+ EventFieldType.ERROR_COUNT: 1.0,
156
+ }
157
+ logger.info("Write error to errors TSDB table", event=event)
158
+ return event