mlrun 1.8.0rc4__py3-none-any.whl → 1.8.0rc6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (69) hide show
  1. mlrun/__init__.py +4 -3
  2. mlrun/alerts/alert.py +129 -2
  3. mlrun/artifacts/__init__.py +1 -1
  4. mlrun/artifacts/base.py +12 -1
  5. mlrun/artifacts/document.py +59 -38
  6. mlrun/common/model_monitoring/__init__.py +0 -2
  7. mlrun/common/model_monitoring/helpers.py +0 -28
  8. mlrun/common/schemas/__init__.py +1 -4
  9. mlrun/common/schemas/alert.py +3 -0
  10. mlrun/common/schemas/artifact.py +4 -0
  11. mlrun/common/schemas/client_spec.py +0 -1
  12. mlrun/common/schemas/model_monitoring/__init__.py +0 -6
  13. mlrun/common/schemas/model_monitoring/constants.py +11 -9
  14. mlrun/common/schemas/model_monitoring/model_endpoints.py +77 -149
  15. mlrun/common/schemas/notification.py +6 -0
  16. mlrun/config.py +0 -2
  17. mlrun/datastore/datastore_profile.py +57 -17
  18. mlrun/datastore/vectorstore.py +67 -59
  19. mlrun/db/base.py +22 -18
  20. mlrun/db/factory.py +0 -3
  21. mlrun/db/httpdb.py +122 -150
  22. mlrun/db/nopdb.py +33 -17
  23. mlrun/execution.py +43 -29
  24. mlrun/model.py +7 -0
  25. mlrun/model_monitoring/__init__.py +3 -2
  26. mlrun/model_monitoring/api.py +40 -43
  27. mlrun/model_monitoring/applications/_application_steps.py +4 -2
  28. mlrun/model_monitoring/applications/base.py +65 -6
  29. mlrun/model_monitoring/applications/context.py +64 -33
  30. mlrun/model_monitoring/applications/evidently_base.py +0 -1
  31. mlrun/model_monitoring/applications/histogram_data_drift.py +2 -6
  32. mlrun/model_monitoring/controller.py +43 -37
  33. mlrun/model_monitoring/db/__init__.py +0 -2
  34. mlrun/model_monitoring/db/tsdb/base.py +2 -1
  35. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +2 -1
  36. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +43 -0
  37. mlrun/model_monitoring/helpers.py +12 -66
  38. mlrun/model_monitoring/stream_processing.py +83 -270
  39. mlrun/model_monitoring/writer.py +1 -10
  40. mlrun/projects/project.py +87 -74
  41. mlrun/runtimes/nuclio/function.py +7 -6
  42. mlrun/runtimes/nuclio/serving.py +7 -1
  43. mlrun/serving/routers.py +158 -145
  44. mlrun/serving/server.py +6 -0
  45. mlrun/serving/states.py +2 -0
  46. mlrun/serving/v2_serving.py +69 -60
  47. mlrun/utils/helpers.py +14 -30
  48. mlrun/utils/notifications/notification/mail.py +36 -9
  49. mlrun/utils/notifications/notification_pusher.py +34 -13
  50. mlrun/utils/version/version.json +2 -2
  51. {mlrun-1.8.0rc4.dist-info → mlrun-1.8.0rc6.dist-info}/METADATA +5 -4
  52. {mlrun-1.8.0rc4.dist-info → mlrun-1.8.0rc6.dist-info}/RECORD +56 -69
  53. mlrun/common/schemas/model_monitoring/model_endpoint_v2.py +0 -149
  54. mlrun/model_monitoring/db/stores/__init__.py +0 -136
  55. mlrun/model_monitoring/db/stores/base/__init__.py +0 -15
  56. mlrun/model_monitoring/db/stores/base/store.py +0 -154
  57. mlrun/model_monitoring/db/stores/sqldb/__init__.py +0 -13
  58. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -46
  59. mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -93
  60. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -47
  61. mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -25
  62. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -408
  63. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +0 -13
  64. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -464
  65. mlrun/model_monitoring/model_endpoint.py +0 -120
  66. {mlrun-1.8.0rc4.dist-info → mlrun-1.8.0rc6.dist-info}/LICENSE +0 -0
  67. {mlrun-1.8.0rc4.dist-info → mlrun-1.8.0rc6.dist-info}/WHEEL +0 -0
  68. {mlrun-1.8.0rc4.dist-info → mlrun-1.8.0rc6.dist-info}/entry_points.txt +0 -0
  69. {mlrun-1.8.0rc4.dist-info → mlrun-1.8.0rc6.dist-info}/top_level.txt +0 -0
@@ -31,11 +31,11 @@ import mlrun.common.schemas.model_monitoring.constants as mm_constants
31
31
  import mlrun.data_types.infer
32
32
  import mlrun.model_monitoring
33
33
  import mlrun.utils.helpers
34
+ from mlrun.common.schemas import ModelEndpoint
34
35
  from mlrun.common.schemas.model_monitoring.model_endpoints import (
35
36
  ModelEndpointMonitoringMetric,
36
37
  _compose_full_name,
37
38
  )
38
- from mlrun.model_monitoring.model_endpoint import ModelEndpoint
39
39
  from mlrun.utils import logger
40
40
 
41
41
 
@@ -162,24 +162,6 @@ def get_monitoring_drift_measures_data(project: str, endpoint_id: str) -> "DataI
162
162
  )
163
163
 
164
164
 
165
- def get_connection_string(
166
- secret_provider: typing.Optional[typing.Callable[[str], str]] = None,
167
- ) -> str:
168
- """Get endpoint store connection string from the project secret. If wasn't set, take it from the system
169
- configurations.
170
-
171
- :param secret_provider: An optional secret provider to get the connection string secret.
172
-
173
- :return: Valid SQL connection string.
174
-
175
- """
176
-
177
- return mlrun.get_secret_or_env(
178
- key=mm_constants.ProjectSecretKeys.ENDPOINT_STORE_CONNECTION,
179
- secret_provider=secret_provider,
180
- )
181
-
182
-
183
165
  def get_tsdb_connection_string(
184
166
  secret_provider: typing.Optional[typing.Callable[[str], str]] = None,
185
167
  ) -> str:
@@ -252,19 +234,24 @@ def update_model_endpoint_last_request(
252
234
  :param current_request: current request time
253
235
  :param db: DB interface.
254
236
  """
255
- is_model_server_endpoint = model_endpoint.spec.stream_path != ""
256
- if is_model_server_endpoint:
257
- current_request = current_request.isoformat()
237
+ is_batch_endpoint = (
238
+ model_endpoint.metadata.endpoint_type == mm_constants.EndpointType.BATCH_EP
239
+ )
240
+ if not is_batch_endpoint:
258
241
  logger.info(
259
242
  "Update model endpoint last request time (EP with serving)",
260
243
  project=project,
261
244
  endpoint_id=model_endpoint.metadata.uid,
245
+ name=model_endpoint.metadata.name,
246
+ function_name=model_endpoint.spec.function_name,
262
247
  last_request=model_endpoint.status.last_request,
263
248
  current_request=current_request,
264
249
  )
265
250
  db.patch_model_endpoint(
266
251
  project=project,
267
252
  endpoint_id=model_endpoint.metadata.uid,
253
+ name=model_endpoint.metadata.name,
254
+ function_name=model_endpoint.spec.function_name,
268
255
  attributes={mm_constants.EventFieldType.LAST_REQUEST: current_request},
269
256
  )
270
257
  else: # model endpoint without any serving function - close the window "manually"
@@ -283,7 +270,7 @@ def update_model_endpoint_last_request(
283
270
  + datetime.timedelta(
284
271
  seconds=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_timeout_secs
285
272
  )
286
- ).isoformat()
273
+ )
287
274
  logger.info(
288
275
  "Bumping model endpoint last request time (EP without serving)",
289
276
  project=project,
@@ -295,6 +282,8 @@ def update_model_endpoint_last_request(
295
282
  db.patch_model_endpoint(
296
283
  project=project,
297
284
  endpoint_id=model_endpoint.metadata.uid,
285
+ name=model_endpoint.metadata.name,
286
+ function_name=model_endpoint.spec.function_name,
298
287
  attributes={mm_constants.EventFieldType.LAST_REQUEST: bumped_last_request},
299
288
  )
300
289
 
@@ -336,17 +325,6 @@ def calculate_inputs_statistics(
336
325
  return inputs_statistics
337
326
 
338
327
 
339
- def get_endpoint_record(
340
- project: str,
341
- endpoint_id: str,
342
- secret_provider: typing.Optional[typing.Callable[[str], str]] = None,
343
- ) -> dict[str, typing.Any]:
344
- model_endpoint_store = mlrun.model_monitoring.get_store_object(
345
- project=project, secret_provider=secret_provider
346
- )
347
- return model_endpoint_store.get_model_endpoint(endpoint_id=endpoint_id)
348
-
349
-
350
328
  def get_result_instance_fqn(
351
329
  model_endpoint_id: str, app_name: str, result_name: str
352
330
  ) -> str:
@@ -386,38 +364,6 @@ def get_invocations_metric(project: str) -> ModelEndpointMonitoringMetric:
386
364
  )
387
365
 
388
366
 
389
- def enrich_model_endpoint_with_model_uri(
390
- model_endpoint: ModelEndpoint,
391
- model_obj: mlrun.artifacts.ModelArtifact,
392
- ):
393
- """
394
- Enrich the model endpoint object with the model uri from the model object. We will use a unique reference
395
- to the model object that includes the project, db_key, iter, and tree.
396
- In addition, we verify that the model object is of type `ModelArtifact`.
397
-
398
- :param model_endpoint: An object representing the model endpoint that will be enriched with the model uri.
399
- :param model_obj: An object representing the model artifact.
400
-
401
- :raise: `MLRunInvalidArgumentError` if the model object is not of type `ModelArtifact`.
402
- """
403
- mlrun.utils.helpers.verify_field_of_type(
404
- field_name="model_endpoint.spec.model_uri",
405
- field_value=model_obj,
406
- expected_type=mlrun.artifacts.ModelArtifact,
407
- )
408
-
409
- # Update model_uri with a unique reference to handle future changes
410
- model_artifact_uri = mlrun.utils.helpers.generate_artifact_uri(
411
- project=model_endpoint.metadata.project,
412
- key=model_obj.db_key,
413
- iter=model_obj.iter,
414
- tree=model_obj.tree,
415
- )
416
- model_endpoint.spec.model_uri = mlrun.datastore.get_store_uri(
417
- kind=mlrun.utils.helpers.StorePrefix.Model, uri=model_artifact_uri
418
- )
419
-
420
-
421
367
  def _get_monitoring_schedules_folder_path(project: str) -> str:
422
368
  return typing.cast(
423
369
  str,
@@ -14,7 +14,6 @@
14
14
 
15
15
  import collections
16
16
  import datetime
17
- import json
18
17
  import os
19
18
  import typing
20
19
 
@@ -32,13 +31,10 @@ import mlrun.utils
32
31
  from mlrun.common.schemas.model_monitoring.constants import (
33
32
  EndpointType,
34
33
  EventFieldType,
35
- EventKeyMetrics,
36
- EventLiveStats,
37
34
  FileTargetKind,
38
- ModelEndpointTarget,
39
35
  ProjectSecretKeys,
40
36
  )
41
- from mlrun.model_monitoring.db import StoreBase, TSDBConnector
37
+ from mlrun.model_monitoring.db import TSDBConnector
42
38
  from mlrun.utils import logger
43
39
 
44
40
 
@@ -102,18 +98,6 @@ class EventStreamProcessor:
102
98
  v3io_access_key=self.model_monitoring_access_key, v3io_api=self.v3io_api
103
99
  )
104
100
 
105
- # KV path
106
- kv_path = mlrun.mlconf.get_model_monitoring_file_target_path(
107
- project=self.project, kind=FileTargetKind.ENDPOINTS
108
- )
109
- (
110
- _,
111
- self.kv_container,
112
- self.kv_path,
113
- ) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
114
- kv_path
115
- )
116
-
117
101
  # TSDB path and configurations
118
102
  tsdb_path = mlrun.mlconf.get_model_monitoring_file_target_path(
119
103
  project=self.project, kind=FileTargetKind.EVENTS
@@ -134,7 +118,6 @@ class EventStreamProcessor:
134
118
  self,
135
119
  fn: mlrun.runtimes.ServingRuntime,
136
120
  tsdb_connector: TSDBConnector,
137
- endpoint_store: StoreBase,
138
121
  ) -> None:
139
122
  """
140
123
  Apply monitoring serving graph to a given serving function. The following serving graph includes about 4 main
@@ -163,31 +146,23 @@ class EventStreamProcessor:
163
146
 
164
147
  :param fn: A serving function.
165
148
  :param tsdb_connector: Time series database connector.
166
- :param endpoint_store: KV/SQL store used for endpoint data.
167
149
  """
168
150
 
169
151
  graph = typing.cast(
170
152
  mlrun.serving.states.RootFlowStep,
171
153
  fn.set_topology(mlrun.serving.states.StepKinds.flow),
172
154
  )
173
- graph.add_step(
174
- "ExtractEndpointID",
175
- "extract_endpoint",
176
- full_event=True,
177
- )
178
155
 
179
156
  # split the graph between event with error vs valid event
180
157
  graph.add_step(
181
158
  "storey.Filter",
182
159
  "FilterError",
183
- after="extract_endpoint",
184
160
  _fn="(event.get('error') is None)",
185
161
  )
186
162
 
187
163
  graph.add_step(
188
164
  "storey.Filter",
189
165
  "ForwardError",
190
- after="extract_endpoint",
191
166
  _fn="(event.get('error') is not None)",
192
167
  )
193
168
 
@@ -199,7 +174,7 @@ class EventStreamProcessor:
199
174
  def apply_process_endpoint_event():
200
175
  graph.add_step(
201
176
  "ProcessEndpointEvent",
202
- after="extract_endpoint", # TODO: change this to FilterError in ML-7456
177
+ after="FilterError",
203
178
  full_event=True,
204
179
  project=self.project,
205
180
  )
@@ -234,79 +209,11 @@ class EventStreamProcessor:
234
209
  )
235
210
 
236
211
  apply_map_feature_names()
237
-
238
- # Calculate number of predictions and average latency
239
- def apply_storey_aggregations():
240
- # Calculate number of predictions for each window (5 min and 1 hour by default)
241
- graph.add_step(
242
- class_name="storey.AggregateByKey",
243
- aggregates=[
244
- {
245
- "name": EventFieldType.LATENCY,
246
- "column": EventFieldType.LATENCY,
247
- "operations": ["count", "avg"],
248
- "windows": self.aggregate_windows,
249
- "period": self.aggregate_period,
250
- }
251
- ],
252
- name=EventFieldType.LATENCY,
253
- after="MapFeatureNames",
254
- step_name="Aggregates",
255
- table=".",
256
- key_field=EventFieldType.ENDPOINT_ID,
257
- )
258
- # Calculate average latency time for each window (5 min and 1 hour by default)
259
- graph.add_step(
260
- class_name="storey.Rename",
261
- mapping={
262
- "latency_count_5m": EventLiveStats.PREDICTIONS_COUNT_5M,
263
- "latency_count_1h": EventLiveStats.PREDICTIONS_COUNT_1H,
264
- },
265
- name="Rename",
266
- after=EventFieldType.LATENCY,
267
- )
268
-
269
- apply_storey_aggregations()
270
-
271
- # KV/SQL branch
272
- # Filter relevant keys from the event before writing the data into the database table
273
- def apply_process_before_endpoint_update():
274
- graph.add_step(
275
- "ProcessBeforeEndpointUpdate",
276
- name="ProcessBeforeEndpointUpdate",
277
- after="Rename",
278
- )
279
-
280
- apply_process_before_endpoint_update()
281
-
282
- # Write the filtered event to KV/SQL table. At this point, the serving graph updates the stats
283
- # about average latency and the amount of predictions over time
284
- def apply_update_endpoint():
285
- graph.add_step(
286
- "UpdateEndpoint",
287
- name="UpdateEndpoint",
288
- after="ProcessBeforeEndpointUpdate",
289
- project=self.project,
290
- )
291
-
292
- apply_update_endpoint()
293
-
294
- # (only for V3IO KV target) - Apply infer_schema on the model endpoints table for generating schema file
295
- # which will be used by Grafana monitoring dashboards
296
- def apply_infer_schema():
297
- graph.add_step(
298
- "InferSchema",
299
- name="InferSchema",
300
- after="UpdateEndpoint",
301
- v3io_framesd=self.v3io_framesd,
302
- container=self.kv_container,
303
- table=self.kv_path,
304
- )
305
-
306
- if endpoint_store.type == ModelEndpointTarget.V3IO_NOSQL:
307
- apply_infer_schema()
308
-
309
- tsdb_connector.apply_monitoring_stream_steps(graph=graph)
212
+ tsdb_connector.apply_monitoring_stream_steps(
213
+ graph=graph,
214
+ aggregate_windows=self.aggregate_windows,
215
+ aggregate_period=self.aggregate_period,
216
+ )
310
217
 
311
218
  # Parquet branch
312
219
  # Filter and validate different keys before writing the data to Parquet target
@@ -342,91 +249,6 @@ class EventStreamProcessor:
342
249
  apply_parquet_target()
343
250
 
344
251
 
345
- class ProcessBeforeEndpointUpdate(mlrun.feature_store.steps.MapClass):
346
- def __init__(self, **kwargs):
347
- """
348
- Filter relevant keys from the event before writing the data to database table (in EndpointUpdate step).
349
- Note that in the endpoint table we only keep metadata (function_uri, model_class, etc.) and stats about the
350
- average latency and the number of predictions (per 5min and 1hour).
351
-
352
- :returns: A filtered event as a dictionary which will be written to the endpoint table in the next step.
353
- """
354
- super().__init__(**kwargs)
355
-
356
- def do(self, event):
357
- # Compute prediction per second
358
- event[EventLiveStats.PREDICTIONS_PER_SECOND] = (
359
- float(event[EventLiveStats.PREDICTIONS_COUNT_5M]) / 300
360
- )
361
- # Filter relevant keys
362
- e = {
363
- k: event[k]
364
- for k in [
365
- EventFieldType.FUNCTION_URI,
366
- EventFieldType.MODEL,
367
- EventFieldType.MODEL_CLASS,
368
- EventFieldType.ENDPOINT_ID,
369
- EventFieldType.LABELS,
370
- EventFieldType.FIRST_REQUEST,
371
- EventFieldType.LAST_REQUEST,
372
- EventFieldType.ERROR_COUNT,
373
- ]
374
- }
375
-
376
- # Add generic metrics statistics
377
- generic_metrics = {
378
- k: event[k]
379
- for k in [
380
- EventLiveStats.LATENCY_AVG_5M,
381
- EventLiveStats.LATENCY_AVG_1H,
382
- EventLiveStats.PREDICTIONS_PER_SECOND,
383
- EventLiveStats.PREDICTIONS_COUNT_5M,
384
- EventLiveStats.PREDICTIONS_COUNT_1H,
385
- ]
386
- }
387
-
388
- e[EventFieldType.METRICS] = json.dumps(
389
- {EventKeyMetrics.GENERIC: generic_metrics}
390
- )
391
-
392
- # Write labels as json string as required by the DB format
393
- e[EventFieldType.LABELS] = json.dumps(e[EventFieldType.LABELS])
394
-
395
- return e
396
-
397
-
398
- class ExtractEndpointID(mlrun.feature_store.steps.MapClass):
399
- def __init__(self, **kwargs) -> None:
400
- """
401
- Generate the model endpoint ID based on the event parameters and attach it to the event.
402
- """
403
- super().__init__(**kwargs)
404
-
405
- def do(self, full_event) -> typing.Union[storey.Event, None]:
406
- # Getting model version and function uri from event
407
- # and use them for retrieving the endpoint_id
408
- function_uri = full_event.body.get(EventFieldType.FUNCTION_URI)
409
- if not is_not_none(function_uri, [EventFieldType.FUNCTION_URI]):
410
- return None
411
-
412
- model = full_event.body.get(EventFieldType.MODEL)
413
- if not is_not_none(model, [EventFieldType.MODEL]):
414
- return None
415
-
416
- version = full_event.body.get(EventFieldType.VERSION)
417
- versioned_model = f"{model}:{version}" if version else f"{model}:latest"
418
-
419
- endpoint_id = mlrun.common.model_monitoring.create_model_endpoint_uid(
420
- function_uri=function_uri,
421
- versioned_model=versioned_model,
422
- )
423
-
424
- endpoint_id = str(endpoint_id)
425
- full_event.body[EventFieldType.ENDPOINT_ID] = endpoint_id
426
- full_event.body[EventFieldType.VERSIONED_MODEL] = versioned_model
427
- return full_event
428
-
429
-
430
252
  class ProcessBeforeParquet(mlrun.feature_store.steps.MapClass):
431
253
  def __init__(self, **kwargs):
432
254
  """
@@ -499,20 +321,27 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
499
321
 
500
322
  def do(self, full_event):
501
323
  event = full_event.body
324
+ # Getting model version and function uri from event
325
+ # and use them for retrieving the endpoint_id
326
+ function_uri = full_event.body.get(EventFieldType.FUNCTION_URI)
327
+ if not is_not_none(function_uri, [EventFieldType.FUNCTION_URI]):
328
+ return None
329
+
330
+ model = full_event.body.get(EventFieldType.MODEL)
331
+ if not is_not_none(model, [EventFieldType.MODEL]):
332
+ return None
333
+
334
+ version = full_event.body.get(EventFieldType.VERSION)
335
+ versioned_model = f"{model}:{version}" if version else f"{model}:latest"
502
336
 
503
- versioned_model = event[EventFieldType.VERSIONED_MODEL]
337
+ full_event.body[EventFieldType.VERSIONED_MODEL] = versioned_model
504
338
  endpoint_id = event[EventFieldType.ENDPOINT_ID]
505
- function_uri = event[EventFieldType.FUNCTION_URI]
506
339
 
507
340
  # In case this process fails, resume state from existing record
508
- self.resume_state(endpoint_id)
509
-
510
- # If error key has been found in the current event,
511
- # increase the error counter by 1 and raise the error description
512
- error = event.get("error")
513
- if error: # TODO: delete this in ML-7456
514
- self.error_count[endpoint_id] += 1
515
- raise mlrun.errors.MLRunInvalidArgumentError(str(error))
341
+ self.resume_state(
342
+ endpoint_id,
343
+ full_event.body.get(EventFieldType.MODEL),
344
+ )
516
345
 
517
346
  # Validate event fields
518
347
  model_class = event.get("model_class") or event.get("class")
@@ -536,11 +365,6 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
536
365
  # Set time for the first request of the current endpoint
537
366
  self.first_request[endpoint_id] = timestamp
538
367
 
539
- # Validate that the request time of the current event is later than the previous request time
540
- self._validate_last_request_timestamp(
541
- endpoint_id=endpoint_id, timestamp=timestamp
542
- )
543
-
544
368
  # Set time for the last reqeust of the current endpoint
545
369
  self.last_request[endpoint_id] = timestamp
546
370
 
@@ -610,6 +434,7 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
610
434
  {
611
435
  EventFieldType.FUNCTION_URI: function_uri,
612
436
  EventFieldType.MODEL: versioned_model,
437
+ EventFieldType.ENDPOINT_NAME: event.get(EventFieldType.MODEL),
613
438
  EventFieldType.MODEL_CLASS: model_class,
614
439
  EventFieldType.TIMESTAMP: timestamp,
615
440
  EventFieldType.ENDPOINT_ID: endpoint_id,
@@ -636,33 +461,19 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
636
461
  storey_event = storey.Event(body=events, key=endpoint_id)
637
462
  return storey_event
638
463
 
639
- def _validate_last_request_timestamp(self, endpoint_id: str, timestamp: str):
640
- """Validate that the request time of the current event is later than the previous request time that has
641
- already been processed.
642
-
643
- :param endpoint_id: The unique id of the model endpoint.
644
- :param timestamp: Event request time as a string.
645
-
646
- :raise MLRunPreconditionFailedError: If the request time of the current is later than the previous request time.
647
- """
648
-
649
- if (
650
- endpoint_id in self.last_request
651
- and self.last_request[endpoint_id] > timestamp
652
- ):
653
- logger.error(
654
- f"current event request time {timestamp} is earlier than the last request time "
655
- f"{self.last_request[endpoint_id]} - write to TSDB will be rejected"
656
- )
657
-
658
- def resume_state(self, endpoint_id):
464
+ def resume_state(self, endpoint_id, endpoint_name):
659
465
  # Make sure process is resumable, if process fails for any reason, be able to pick things up close to where we
660
466
  # left them
661
467
  if endpoint_id not in self.endpoints:
662
468
  logger.info("Trying to resume state", endpoint_id=endpoint_id)
663
- endpoint_record = mlrun.model_monitoring.helpers.get_endpoint_record(
664
- project=self.project,
665
- endpoint_id=endpoint_id,
469
+ endpoint_record = (
470
+ mlrun.db.get_run_db()
471
+ .get_model_endpoint(
472
+ project=self.project,
473
+ endpoint_id=endpoint_id,
474
+ name=endpoint_name,
475
+ )
476
+ .flat_dict()
666
477
  )
667
478
 
668
479
  # If model endpoint found, get first_request, last_request and error_count values
@@ -736,6 +547,7 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
736
547
  # and labels columns were not found in the current event
737
548
  self.feature_names = {}
738
549
  self.label_columns = {}
550
+ self.first_request = {}
739
551
 
740
552
  # Dictionary to manage the model endpoint types - important for the V3IO TSDB
741
553
  self.endpoint_type = {}
@@ -767,17 +579,22 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
767
579
  if isinstance(feature_value, int):
768
580
  feature_values[index] = float(feature_value)
769
581
 
582
+ attributes_to_update = {}
583
+ endpoint_record = None
770
584
  # Get feature names and label columns
771
585
  if endpoint_id not in self.feature_names:
772
- endpoint_record = mlrun.model_monitoring.helpers.get_endpoint_record(
773
- project=self.project,
774
- endpoint_id=endpoint_id,
586
+ endpoint_record = (
587
+ mlrun.db.get_run_db()
588
+ .get_model_endpoint(
589
+ project=self.project,
590
+ endpoint_id=endpoint_id,
591
+ name=event[EventFieldType.ENDPOINT_NAME],
592
+ )
593
+ .flat_dict()
775
594
  )
776
595
  feature_names = endpoint_record.get(EventFieldType.FEATURE_NAMES)
777
- feature_names = json.loads(feature_names) if feature_names else None
778
596
 
779
597
  label_columns = endpoint_record.get(EventFieldType.LABEL_NAMES)
780
- label_columns = json.loads(label_columns) if label_columns else None
781
598
 
782
599
  # If feature names were not found,
783
600
  # try to retrieve them from the previous events of the current process
@@ -795,13 +612,7 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
795
612
  ]
796
613
 
797
614
  # Update the endpoint record with the generated features
798
- update_endpoint_record(
799
- project=self.project,
800
- endpoint_id=endpoint_id,
801
- attributes={
802
- EventFieldType.FEATURE_NAMES: json.dumps(feature_names)
803
- },
804
- )
615
+ attributes_to_update[EventFieldType.FEATURE_NAMES] = feature_names
805
616
 
806
617
  if endpoint_type != EndpointType.ROUTER.value:
807
618
  update_monitoring_feature_set(
@@ -822,12 +633,7 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
822
633
  label_columns = [
823
634
  f"p{i}" for i, _ in enumerate(event[EventFieldType.PREDICTION])
824
635
  ]
825
-
826
- update_endpoint_record(
827
- project=self.project,
828
- endpoint_id=endpoint_id,
829
- attributes={EventFieldType.LABEL_NAMES: json.dumps(label_columns)},
830
- )
636
+ attributes_to_update[EventFieldType.LABEL_NAMES] = label_columns
831
637
  if endpoint_type != EndpointType.ROUTER.value:
832
638
  update_monitoring_feature_set(
833
639
  endpoint_record=endpoint_record,
@@ -848,6 +654,37 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
848
654
  # Update the endpoint type within the endpoint types dictionary
849
655
  self.endpoint_type[endpoint_id] = endpoint_type
850
656
 
657
+ # Update the first request time in the endpoint record
658
+ if endpoint_id not in self.first_request:
659
+ endpoint_record = endpoint_record or (
660
+ mlrun.db.get_run_db()
661
+ .get_model_endpoint(
662
+ project=self.project,
663
+ endpoint_id=endpoint_id,
664
+ name=event[EventFieldType.ENDPOINT_NAME],
665
+ )
666
+ .flat_dict()
667
+ )
668
+ if not endpoint_record.get(EventFieldType.FIRST_REQUEST):
669
+ attributes_to_update[EventFieldType.FIRST_REQUEST] = (
670
+ mlrun.utils.enrich_datetime_with_tz_info(
671
+ event[EventFieldType.FIRST_REQUEST]
672
+ )
673
+ )
674
+ self.first_request[endpoint_id] = True
675
+ if attributes_to_update:
676
+ logger.info(
677
+ "Updating endpoint record",
678
+ endpoint_id=endpoint_id,
679
+ attributes=attributes_to_update,
680
+ )
681
+ update_endpoint_record(
682
+ project=self.project,
683
+ endpoint_id=endpoint_id,
684
+ attributes=attributes_to_update,
685
+ endpoint_name=event[EventFieldType.ENDPOINT_NAME],
686
+ )
687
+
851
688
  # Add feature_name:value pairs along with a mapping dictionary of all of these pairs
852
689
  feature_names = self.feature_names[endpoint_id]
853
690
  self._map_dictionary_values(
@@ -898,30 +735,6 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
898
735
  event[mapping_dictionary][name] = value
899
736
 
900
737
 
901
- class UpdateEndpoint(mlrun.feature_store.steps.MapClass):
902
- def __init__(self, project: str, **kwargs):
903
- """
904
- Update the model endpoint record in the DB. Note that the event at this point includes metadata and stats about
905
- the average latency and the amount of predictions over time. This data will be used in the monitoring dashboards
906
- such as "Model Monitoring - Performance" which can be found in Grafana.
907
-
908
- :returns: Event as a dictionary (without any changes) for the next step (InferSchema).
909
- """
910
- super().__init__(**kwargs)
911
- self.project = project
912
-
913
- def do(self, event: dict):
914
- # Remove labels from the event
915
- event.pop(EventFieldType.LABELS)
916
-
917
- update_endpoint_record(
918
- project=self.project,
919
- endpoint_id=event.pop(EventFieldType.ENDPOINT_ID),
920
- attributes=event,
921
- )
922
- return event
923
-
924
-
925
738
  class InferSchema(mlrun.feature_store.steps.MapClass):
926
739
  def __init__(
927
740
  self,
@@ -966,14 +779,14 @@ class InferSchema(mlrun.feature_store.steps.MapClass):
966
779
  def update_endpoint_record(
967
780
  project: str,
968
781
  endpoint_id: str,
782
+ endpoint_name: str,
969
783
  attributes: dict,
970
784
  ):
971
- model_endpoint_store = mlrun.model_monitoring.get_store_object(
785
+ mlrun.db.get_run_db().patch_model_endpoint(
972
786
  project=project,
973
- )
974
-
975
- model_endpoint_store.update_model_endpoint(
976
- endpoint_id=endpoint_id, attributes=attributes
787
+ endpoint_id=endpoint_id,
788
+ attributes=attributes,
789
+ name=endpoint_name,
977
790
  )
978
791
 
979
792
 
@@ -21,7 +21,6 @@ import mlrun.common.schemas
21
21
  import mlrun.common.schemas.alert as alert_objects
22
22
  import mlrun.model_monitoring
23
23
  from mlrun.common.schemas.model_monitoring.constants import (
24
- EventFieldType,
25
24
  HistogramDataDriftApplicationConstants,
26
25
  MetricData,
27
26
  ResultData,
@@ -121,9 +120,6 @@ class ModelMonitoringWriter(StepToDict):
121
120
  notification_types=[NotificationKind.slack]
122
121
  )
123
122
 
124
- self._app_result_store = mlrun.model_monitoring.get_store_object(
125
- project=self.project, secret_provider=secret_provider
126
- )
127
123
  self._tsdb_connector = mlrun.model_monitoring.get_tsdb_connector(
128
124
  project=self.project, secret_provider=secret_provider
129
125
  )
@@ -266,14 +262,9 @@ class ModelMonitoringWriter(StepToDict):
266
262
  == ResultStatusApp.potential_detection.value
267
263
  )
268
264
  ):
269
- endpoint_id = event[WriterEvent.ENDPOINT_ID]
270
- endpoint_record = self._endpoints_records.setdefault(
271
- endpoint_id,
272
- self._app_result_store.get_model_endpoint(endpoint_id=endpoint_id),
273
- )
274
265
  event_value = {
275
266
  "app_name": event[WriterEvent.APPLICATION_NAME],
276
- "model": endpoint_record.get(EventFieldType.MODEL),
267
+ "model": event[WriterEvent.ENDPOINT_NAME],
277
268
  "model_endpoint_id": event[WriterEvent.ENDPOINT_ID],
278
269
  "result_name": event[ResultData.RESULT_NAME],
279
270
  "result_value": event[ResultData.RESULT_VALUE],