mlrun 1.7.0rc20__py3-none-any.whl → 1.7.0rc28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (92) hide show
  1. mlrun/__main__.py +10 -8
  2. mlrun/alerts/alert.py +55 -18
  3. mlrun/api/schemas/__init__.py +3 -3
  4. mlrun/artifacts/manager.py +26 -0
  5. mlrun/common/constants.py +3 -2
  6. mlrun/common/formatters/__init__.py +1 -0
  7. mlrun/common/formatters/artifact.py +26 -3
  8. mlrun/common/formatters/base.py +44 -9
  9. mlrun/common/formatters/function.py +12 -7
  10. mlrun/common/formatters/run.py +26 -0
  11. mlrun/common/helpers.py +11 -0
  12. mlrun/common/schemas/__init__.py +4 -0
  13. mlrun/common/schemas/alert.py +5 -9
  14. mlrun/common/schemas/api_gateway.py +64 -16
  15. mlrun/common/schemas/artifact.py +11 -0
  16. mlrun/common/schemas/constants.py +3 -0
  17. mlrun/common/schemas/feature_store.py +58 -28
  18. mlrun/common/schemas/model_monitoring/constants.py +21 -12
  19. mlrun/common/schemas/model_monitoring/model_endpoints.py +0 -12
  20. mlrun/common/schemas/pipeline.py +16 -0
  21. mlrun/common/schemas/project.py +17 -0
  22. mlrun/common/schemas/runs.py +17 -0
  23. mlrun/common/schemas/schedule.py +1 -1
  24. mlrun/common/types.py +6 -0
  25. mlrun/config.py +17 -25
  26. mlrun/datastore/azure_blob.py +2 -1
  27. mlrun/datastore/datastore.py +3 -3
  28. mlrun/datastore/google_cloud_storage.py +6 -2
  29. mlrun/datastore/snowflake_utils.py +3 -1
  30. mlrun/datastore/sources.py +26 -11
  31. mlrun/datastore/store_resources.py +2 -0
  32. mlrun/datastore/targets.py +68 -16
  33. mlrun/db/base.py +83 -2
  34. mlrun/db/httpdb.py +280 -63
  35. mlrun/db/nopdb.py +60 -3
  36. mlrun/errors.py +5 -3
  37. mlrun/execution.py +28 -13
  38. mlrun/feature_store/feature_vector.py +8 -0
  39. mlrun/feature_store/retrieval/spark_merger.py +13 -2
  40. mlrun/launcher/local.py +4 -0
  41. mlrun/launcher/remote.py +1 -0
  42. mlrun/model.py +32 -3
  43. mlrun/model_monitoring/api.py +7 -52
  44. mlrun/model_monitoring/applications/base.py +5 -7
  45. mlrun/model_monitoring/applications/histogram_data_drift.py +1 -1
  46. mlrun/model_monitoring/db/stores/__init__.py +37 -24
  47. mlrun/model_monitoring/db/stores/base/store.py +40 -1
  48. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +42 -87
  49. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +27 -35
  50. mlrun/model_monitoring/db/tsdb/__init__.py +15 -15
  51. mlrun/model_monitoring/db/tsdb/base.py +1 -14
  52. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +22 -18
  53. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +86 -56
  54. mlrun/model_monitoring/helpers.py +34 -9
  55. mlrun/model_monitoring/stream_processing.py +12 -11
  56. mlrun/model_monitoring/writer.py +11 -11
  57. mlrun/projects/operations.py +5 -0
  58. mlrun/projects/pipelines.py +35 -21
  59. mlrun/projects/project.py +216 -107
  60. mlrun/render.py +10 -5
  61. mlrun/run.py +15 -5
  62. mlrun/runtimes/__init__.py +2 -0
  63. mlrun/runtimes/base.py +17 -4
  64. mlrun/runtimes/daskjob.py +8 -1
  65. mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
  66. mlrun/runtimes/local.py +23 -4
  67. mlrun/runtimes/nuclio/application/application.py +0 -2
  68. mlrun/runtimes/nuclio/function.py +31 -2
  69. mlrun/runtimes/nuclio/serving.py +9 -6
  70. mlrun/runtimes/pod.py +5 -29
  71. mlrun/runtimes/remotesparkjob.py +8 -2
  72. mlrun/serving/__init__.py +8 -1
  73. mlrun/serving/routers.py +75 -59
  74. mlrun/serving/server.py +11 -0
  75. mlrun/serving/states.py +80 -8
  76. mlrun/serving/utils.py +19 -11
  77. mlrun/serving/v2_serving.py +66 -39
  78. mlrun/utils/helpers.py +91 -11
  79. mlrun/utils/logger.py +36 -2
  80. mlrun/utils/notifications/notification/base.py +43 -7
  81. mlrun/utils/notifications/notification/git.py +21 -0
  82. mlrun/utils/notifications/notification/slack.py +9 -14
  83. mlrun/utils/notifications/notification/webhook.py +41 -1
  84. mlrun/utils/notifications/notification_pusher.py +3 -9
  85. mlrun/utils/regex.py +9 -0
  86. mlrun/utils/version/version.json +2 -2
  87. {mlrun-1.7.0rc20.dist-info → mlrun-1.7.0rc28.dist-info}/METADATA +16 -9
  88. {mlrun-1.7.0rc20.dist-info → mlrun-1.7.0rc28.dist-info}/RECORD +92 -91
  89. {mlrun-1.7.0rc20.dist-info → mlrun-1.7.0rc28.dist-info}/WHEEL +1 -1
  90. {mlrun-1.7.0rc20.dist-info → mlrun-1.7.0rc28.dist-info}/LICENSE +0 -0
  91. {mlrun-1.7.0rc20.dist-info → mlrun-1.7.0rc28.dist-info}/entry_points.txt +0 -0
  92. {mlrun-1.7.0rc20.dist-info → mlrun-1.7.0rc28.dist-info}/top_level.txt +0 -0
@@ -57,41 +57,41 @@ class ObjectTSDBFactory(enum.Enum):
57
57
  :param value: Provided enum (invalid) value.
58
58
  """
59
59
  valid_values = list(cls.__members__.keys())
60
- raise mlrun.errors.MLRunInvalidArgumentError(
60
+ raise mlrun.errors.MLRunInvalidMMStoreType(
61
61
  f"{value} is not a valid tsdb, please choose a valid value: %{valid_values}."
62
62
  )
63
63
 
64
64
 
65
65
  def get_tsdb_connector(
66
66
  project: str,
67
- tsdb_connector_type: str = "",
68
- secret_provider: typing.Optional[typing.Callable] = None,
67
+ secret_provider: typing.Optional[typing.Callable[[str], str]] = None,
68
+ tsdb_connection_string: typing.Optional[str] = None,
69
69
  **kwargs,
70
70
  ) -> TSDBConnector:
71
71
  """
72
72
  Get TSDB connector object.
73
- :param project: The name of the project.
74
- :param tsdb_connector_type: The type of the TSDB connector. See mlrun.model_monitoring.db.tsdb.ObjectTSDBFactory
75
- for available options.
76
- :param secret_provider: An optional secret provider to get the connection string secret.
73
+ :param project: The name of the project.
74
+ :param secret_provider: An optional secret provider to get the connection string secret.
75
+ :param tsdb_connection_string: An optional explicit connection string to the TSDB.
77
76
 
78
77
  :return: `TSDBConnector` object. The main goal of this object is to handle different operations on the
79
78
  TSDB connector such as updating drift metrics or write application record result.
80
79
  """
81
80
 
82
- tsdb_connection_string = mlrun.model_monitoring.helpers.get_tsdb_connection_string(
83
- secret_provider=secret_provider
81
+ tsdb_connection_string = (
82
+ tsdb_connection_string
83
+ or mlrun.model_monitoring.helpers.get_tsdb_connection_string(
84
+ secret_provider=secret_provider
85
+ )
84
86
  )
85
87
 
86
88
  if tsdb_connection_string and tsdb_connection_string.startswith("taosws"):
87
89
  tsdb_connector_type = mlrun.common.schemas.model_monitoring.TSDBTarget.TDEngine
88
90
  kwargs["connection_string"] = tsdb_connection_string
89
-
90
- # Set the default TSDB connector type if no connection has been set
91
- tsdb_connector_type = (
92
- tsdb_connector_type
93
- or mlrun.mlconf.model_endpoint_monitoring.tsdb_connector_type
94
- )
91
+ elif tsdb_connection_string and tsdb_connection_string == "v3io":
92
+ tsdb_connector_type = mlrun.common.schemas.model_monitoring.TSDBTarget.V3IO_TSDB
93
+ else:
94
+ tsdb_connector_type = None
95
95
 
96
96
  # Get connector type value from ObjectTSDBFactory enum class
97
97
  tsdb_connector_factory = ObjectTSDBFactory(tsdb_connector_type)
@@ -25,7 +25,7 @@ from mlrun.utils import logger
25
25
 
26
26
 
27
27
  class TSDBConnector(ABC):
28
- type: str = ""
28
+ type: typing.ClassVar[str]
29
29
 
30
30
  def __init__(self, project: str):
31
31
  """
@@ -177,19 +177,6 @@ class TSDBConnector(ABC):
177
177
  :return: Metric values object or no data object.
178
178
  """
179
179
 
180
- @abstractmethod
181
- def read_prediction_metric_for_endpoint_if_exists(
182
- self, endpoint_id: str
183
- ) -> typing.Optional[mm_schemas.ModelEndpointMonitoringMetric]:
184
- """
185
- Read the "invocations" metric for the provided model endpoint, and return the metric object
186
- if it exists.
187
-
188
- :param endpoint_id: The model endpoint identifier.
189
- :return: `None` if the invocations metric does not exist, otherwise return the
190
- corresponding metric object.
191
- """
192
-
193
180
  @staticmethod
194
181
  def df_to_metrics_values(
195
182
  *,
@@ -377,21 +377,25 @@ class TDEngineConnector(TSDBConnector):
377
377
  ), # pyright: ignore[reportArgumentType]
378
378
  )
379
379
 
380
- def read_prediction_metric_for_endpoint_if_exists(
381
- self, endpoint_id: str
382
- ) -> typing.Optional[mm_schemas.ModelEndpointMonitoringMetric]:
383
- # Read just one record, because we just want to check if there is any data for this endpoint_id
384
- predictions = self.read_predictions(
385
- endpoint_id=endpoint_id,
386
- start=datetime.min,
387
- end=mlrun.utils.now_date(),
388
- limit=1,
389
- )
390
- if predictions:
391
- return mm_schemas.ModelEndpointMonitoringMetric(
392
- project=self.project,
393
- app=mm_schemas.SpecialApps.MLRUN_INFRA,
394
- type=mm_schemas.ModelEndpointMonitoringMetricType.METRIC,
395
- name=mm_schemas.PredictionsQueryConstants.INVOCATIONS,
396
- full_name=get_invocations_fqn(self.project),
397
- )
380
+ # Note: this function serves as a reference for checking the TSDB for the existence of a metric.
381
+ #
382
+ # def read_prediction_metric_for_endpoint_if_exists(
383
+ # self, endpoint_id: str
384
+ # ) -> typing.Optional[mm_schemas.ModelEndpointMonitoringMetric]:
385
+ # """
386
+ # Read the "invocations" metric for the provided model endpoint, and return the metric object
387
+ # if it exists.
388
+ #
389
+ # :param endpoint_id: The model endpoint identifier.
390
+ # :return: `None` if the invocations metric does not exist, otherwise return the
391
+ # corresponding metric object.
392
+ # """
393
+ # # Read just one record, because we just want to check if there is any data for this endpoint_id
394
+ # predictions = self.read_predictions(
395
+ # endpoint_id=endpoint_id,
396
+ # start=datetime.min,
397
+ # end=mlrun.utils.now_date(),
398
+ # limit=1,
399
+ # )
400
+ # if predictions:
401
+ # return get_invocations_metric(self.project)
@@ -12,15 +12,13 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import typing
16
15
  from datetime import datetime
17
16
  from io import StringIO
18
17
  from typing import Literal, Optional, Union
19
18
 
20
19
  import pandas as pd
20
+ import v3io_frames
21
21
  import v3io_frames.client
22
- import v3io_frames.errors
23
- from v3io_frames.frames_pb2 import IGNORE
24
22
 
25
23
  import mlrun.common.model_monitoring
26
24
  import mlrun.common.schemas.model_monitoring as mm_schemas
@@ -35,6 +33,17 @@ _TSDB_RATE = "1/s"
35
33
  _CONTAINER = "users"
36
34
 
37
35
 
36
+ def _is_no_schema_error(exc: v3io_frames.ReadError) -> bool:
37
+ """
38
+ In case of a nonexistent TSDB table - a `v3io_frames.ReadError` error is raised.
39
+ Check if the error message contains the relevant string to verify the cause.
40
+ """
41
+ msg = str(exc)
42
+ # https://github.com/v3io/v3io-tsdb/blob/v0.14.1/pkg/tsdb/v3iotsdb.go#L205
43
+ # https://github.com/v3io/v3io-tsdb/blob/v0.14.1/pkg/partmgr/partmgr.go#L238
44
+ return "No TSDB schema file found" in msg or "Failed to read schema at path" in msg
45
+
46
+
38
47
  class V3IOTSDBConnector(TSDBConnector):
39
48
  """
40
49
  Handles the TSDB operations when the TSDB connector is of type V3IO. To manage these operations we use V3IO Frames
@@ -47,7 +56,7 @@ class V3IOTSDBConnector(TSDBConnector):
47
56
  self,
48
57
  project: str,
49
58
  container: str = _CONTAINER,
50
- v3io_framesd: typing.Optional[str] = None,
59
+ v3io_framesd: Optional[str] = None,
51
60
  create_table: bool = False,
52
61
  ) -> None:
53
62
  super().__init__(project=project)
@@ -132,7 +141,7 @@ class V3IOTSDBConnector(TSDBConnector):
132
141
  self._frames_client.create(
133
142
  backend=_TSDB_BE,
134
143
  table=table,
135
- if_exists=IGNORE,
144
+ if_exists=v3io_frames.IGNORE,
136
145
  rate=_TSDB_RATE,
137
146
  )
138
147
 
@@ -162,7 +171,7 @@ class V3IOTSDBConnector(TSDBConnector):
162
171
  time_col=mm_schemas.EventFieldType.TIMESTAMP,
163
172
  container=self.container,
164
173
  v3io_frames=self.v3io_framesd,
165
- columns=["latency"],
174
+ columns=[mm_schemas.EventFieldType.LATENCY],
166
175
  index_cols=[
167
176
  mm_schemas.EventFieldType.ENDPOINT_ID,
168
177
  ],
@@ -280,7 +289,7 @@ class V3IOTSDBConnector(TSDBConnector):
280
289
  index_cols=index_cols,
281
290
  )
282
291
  logger.info("Updated V3IO TSDB successfully", table=table)
283
- except v3io_frames.errors.Error as err:
292
+ except v3io_frames.Error as err:
284
293
  logger.exception(
285
294
  "Could not write drift measures to TSDB",
286
295
  err=err,
@@ -291,7 +300,7 @@ class V3IOTSDBConnector(TSDBConnector):
291
300
  f"Failed to write application result to TSDB: {err}"
292
301
  )
293
302
 
294
- def delete_tsdb_resources(self, table: typing.Optional[str] = None):
303
+ def delete_tsdb_resources(self, table: Optional[str] = None):
295
304
  if table:
296
305
  # Delete a specific table
297
306
  tables = [table]
@@ -301,7 +310,7 @@ class V3IOTSDBConnector(TSDBConnector):
301
310
  for table_to_delete in tables:
302
311
  try:
303
312
  self._frames_client.delete(backend=_TSDB_BE, table=table_to_delete)
304
- except v3io_frames.errors.DeleteError as e:
313
+ except v3io_frames.DeleteError as e:
305
314
  logger.warning(
306
315
  f"Failed to delete TSDB table '{table}'",
307
316
  err=mlrun.errors.err_to_str(e),
@@ -362,7 +371,7 @@ class V3IOTSDBConnector(TSDBConnector):
362
371
  ]
363
372
  metrics_mapping[metric] = values
364
373
 
365
- except v3io_frames.errors.Error as err:
374
+ except v3io_frames.Error as err:
366
375
  logger.warn("Failed to read tsdb", err=err, endpoint=endpoint_id)
367
376
 
368
377
  return metrics_mapping
@@ -372,12 +381,11 @@ class V3IOTSDBConnector(TSDBConnector):
372
381
  table: str,
373
382
  start: Union[datetime, str],
374
383
  end: Union[datetime, str],
375
- columns: typing.Optional[list[str]] = None,
384
+ columns: Optional[list[str]] = None,
376
385
  filter_query: str = "",
377
- interval: typing.Optional[str] = None,
378
- agg_funcs: typing.Optional[list] = None,
379
- limit: typing.Optional[int] = None,
380
- sliding_window_step: typing.Optional[str] = None,
386
+ interval: Optional[str] = None,
387
+ agg_funcs: Optional[list[str]] = None,
388
+ sliding_window_step: Optional[str] = None,
381
389
  **kwargs,
382
390
  ) -> pd.DataFrame:
383
391
  """
@@ -400,7 +408,6 @@ class V3IOTSDBConnector(TSDBConnector):
400
408
  :param agg_funcs: The aggregation functions to apply on the columns. Note that if `agg_funcs` is
401
409
  provided, `interval` must bg provided as well. Provided as a list of strings in
402
410
  the format of ['sum', 'avg', 'count', ...].
403
- :param limit: The maximum number of records to return.
404
411
  :param sliding_window_step: The time step for which the time window moves forward. Note that if
405
412
  `sliding_window_step` is provided, interval must be provided as well. Provided
406
413
  as a string in the format of '1m', '1h', etc.
@@ -414,25 +421,28 @@ class V3IOTSDBConnector(TSDBConnector):
414
421
  f"Available tables: {list(self.tables.keys())}"
415
422
  )
416
423
 
417
- if agg_funcs:
418
- # Frames client expects the aggregators to be a comma-separated string
419
- agg_funcs = ",".join(agg_funcs)
424
+ # Frames client expects the aggregators to be a comma-separated string
425
+ aggregators = ",".join(agg_funcs) if agg_funcs else None
420
426
  table_path = self.tables[table]
421
- df = self._frames_client.read(
422
- backend=_TSDB_BE,
423
- table=table_path,
424
- start=start,
425
- end=end,
426
- columns=columns,
427
- filter=filter_query,
428
- aggregation_window=interval,
429
- aggregators=agg_funcs,
430
- step=sliding_window_step,
431
- **kwargs,
432
- )
427
+ try:
428
+ df = self._frames_client.read(
429
+ backend=_TSDB_BE,
430
+ table=table_path,
431
+ start=start,
432
+ end=end,
433
+ columns=columns,
434
+ filter=filter_query,
435
+ aggregation_window=interval,
436
+ aggregators=aggregators,
437
+ step=sliding_window_step,
438
+ **kwargs,
439
+ )
440
+ except v3io_frames.ReadError as err:
441
+ if _is_no_schema_error(err):
442
+ return pd.DataFrame()
443
+ else:
444
+ raise err
433
445
 
434
- if limit:
435
- df = df.head(limit)
436
446
  return df
437
447
 
438
448
  def _get_v3io_source_directory(self) -> str:
@@ -503,8 +513,8 @@ class V3IOTSDBConnector(TSDBConnector):
503
513
  raise ValueError(f"Invalid {type = }")
504
514
 
505
515
  query = self._get_sql_query(
506
- endpoint_id,
507
- [(metric.app, metric.name) for metric in metrics],
516
+ endpoint_id=endpoint_id,
517
+ metric_and_app_names=[(metric.app, metric.name) for metric in metrics],
508
518
  table_path=table_path,
509
519
  name=name,
510
520
  )
@@ -530,21 +540,28 @@ class V3IOTSDBConnector(TSDBConnector):
530
540
 
531
541
  @staticmethod
532
542
  def _get_sql_query(
543
+ *,
533
544
  endpoint_id: str,
534
- names: list[tuple[str, str]],
535
545
  table_path: str,
536
546
  name: str = mm_schemas.ResultData.RESULT_NAME,
547
+ metric_and_app_names: Optional[list[tuple[str, str]]] = None,
548
+ columns: Optional[list[str]] = None,
537
549
  ) -> str:
538
550
  """Get the SQL query for the results/metrics table"""
551
+ if columns:
552
+ selection = ",".join(columns)
553
+ else:
554
+ selection = "*"
555
+
539
556
  with StringIO() as query:
540
557
  query.write(
541
- f"SELECT * FROM '{table_path}' "
558
+ f"SELECT {selection} FROM '{table_path}' "
542
559
  f"WHERE {mm_schemas.WriterEvent.ENDPOINT_ID}='{endpoint_id}'"
543
560
  )
544
- if names:
561
+ if metric_and_app_names:
545
562
  query.write(" AND (")
546
563
 
547
- for i, (app_name, result_name) in enumerate(names):
564
+ for i, (app_name, result_name) in enumerate(metric_and_app_names):
548
565
  sub_cond = (
549
566
  f"({mm_schemas.WriterEvent.APPLICATION_NAME}='{app_name}' "
550
567
  f"AND {name}='{result_name}')"
@@ -566,7 +583,6 @@ class V3IOTSDBConnector(TSDBConnector):
566
583
  end: Union[datetime, str],
567
584
  aggregation_window: Optional[str] = None,
568
585
  agg_funcs: Optional[list[str]] = None,
569
- limit: Optional[int] = None,
570
586
  ) -> Union[
571
587
  mm_schemas.ModelEndpointMonitoringMetricNoData,
572
588
  mm_schemas.ModelEndpointMonitoringMetricValues,
@@ -585,7 +601,6 @@ class V3IOTSDBConnector(TSDBConnector):
585
601
  filter_query=f"endpoint_id=='{endpoint_id}'",
586
602
  interval=aggregation_window,
587
603
  agg_funcs=agg_funcs,
588
- limit=limit,
589
604
  sliding_window_step=aggregation_window,
590
605
  )
591
606
 
@@ -613,18 +628,33 @@ class V3IOTSDBConnector(TSDBConnector):
613
628
  ), # pyright: ignore[reportArgumentType]
614
629
  )
615
630
 
616
- def read_prediction_metric_for_endpoint_if_exists(
617
- self, endpoint_id: str
618
- ) -> Optional[mm_schemas.ModelEndpointMonitoringMetric]:
619
- # Read just one record, because we just want to check if there is any data for this endpoint_id
620
- predictions = self.read_predictions(
621
- endpoint_id=endpoint_id, start="0", end="now", limit=1
622
- )
623
- if predictions:
624
- return mm_schemas.ModelEndpointMonitoringMetric(
625
- project=self.project,
626
- app=mm_schemas.SpecialApps.MLRUN_INFRA,
627
- type=mm_schemas.ModelEndpointMonitoringMetricType.METRIC,
628
- name=mm_schemas.PredictionsQueryConstants.INVOCATIONS,
629
- full_name=get_invocations_fqn(self.project),
630
- )
631
+ # Note: this function serves as a reference for checking the TSDB for the existence of a metric.
632
+ #
633
+ # def read_prediction_metric_for_endpoint_if_exists(
634
+ # self, endpoint_id: str
635
+ # ) -> Optional[mm_schemas.ModelEndpointMonitoringMetric]:
636
+ # """
637
+ # Read the count of the latency column in the predictions table for the given endpoint_id.
638
+ # We just want to check if there is any data for this endpoint_id.
639
+ # """
640
+ # query = self._get_sql_query(
641
+ # endpoint_id=endpoint_id,
642
+ # table_path=self.tables[mm_schemas.FileTargetKind.PREDICTIONS],
643
+ # columns=[f"count({mm_schemas.EventFieldType.LATENCY})"],
644
+ # )
645
+ # try:
646
+ # logger.debug("Checking TSDB", project=self.project, query=query)
647
+ # df: pd.DataFrame = self._frames_client.read(
648
+ # backend=_TSDB_BE, query=query, start="0", end="now"
649
+ # )
650
+ # except v3io_frames.ReadError as err:
651
+ # if _is_no_schema_error(err):
652
+ # logger.debug(
653
+ # "No predictions yet", project=self.project, endpoint_id=endpoint_id
654
+ # )
655
+ # return
656
+ # else:
657
+ # raise
658
+ #
659
+ # if not df.empty:
660
+ # return get_invocations_metric(self.project)
@@ -25,6 +25,7 @@ from mlrun.common.schemas.model_monitoring import (
25
25
  EventFieldType,
26
26
  )
27
27
  from mlrun.common.schemas.model_monitoring.model_endpoints import (
28
+ ModelEndpointMonitoringMetric,
28
29
  ModelEndpointMonitoringMetricType,
29
30
  _compose_full_name,
30
31
  )
@@ -58,13 +59,17 @@ def get_stream_path(
58
59
 
59
60
  stream_uri = mlrun.get_secret_or_env(
60
61
  mlrun.common.schemas.model_monitoring.ProjectSecretKeys.STREAM_PATH
61
- ) or mlrun.mlconf.get_model_monitoring_file_target_path(
62
- project=project,
63
- kind=mlrun.common.schemas.model_monitoring.FileTargetKind.STREAM,
64
- target="online",
65
- function_name=function_name,
66
62
  )
67
63
 
64
+ if not stream_uri or stream_uri == "v3io":
65
+ # TODO : remove the first part of this condition in 1.9.0
66
+ stream_uri = mlrun.mlconf.get_model_monitoring_file_target_path(
67
+ project=project,
68
+ kind=mlrun.common.schemas.model_monitoring.FileTargetKind.STREAM,
69
+ target="online",
70
+ function_name=function_name,
71
+ )
72
+
68
73
  if isinstance(stream_uri, list): # ML-6043 - user side gets only the new stream uri
69
74
  stream_uri = stream_uri[1] # get new stream path, under projects
70
75
  return mlrun.common.model_monitoring.helpers.parse_monitoring_stream_path(
@@ -96,7 +101,7 @@ def get_monitoring_parquet_path(
96
101
  return parquet_path
97
102
 
98
103
 
99
- def get_connection_string(secret_provider: typing.Callable = None) -> str:
104
+ def get_connection_string(secret_provider: typing.Callable[[str], str] = None) -> str:
100
105
  """Get endpoint store connection string from the project secret. If wasn't set, take it from the system
101
106
  configurations.
102
107
 
@@ -116,7 +121,7 @@ def get_connection_string(secret_provider: typing.Callable = None) -> str:
116
121
 
117
122
 
118
123
  def get_tsdb_connection_string(
119
- secret_provider: typing.Optional[typing.Callable] = None,
124
+ secret_provider: typing.Optional[typing.Callable[[str], str]] = None,
120
125
  ) -> str:
121
126
  """Get TSDB connection string from the project secret. If wasn't set, take it from the system
122
127
  configurations.
@@ -277,9 +282,13 @@ def calculate_inputs_statistics(
277
282
  return inputs_statistics
278
283
 
279
284
 
280
- def get_endpoint_record(project: str, endpoint_id: str):
285
+ def get_endpoint_record(
286
+ project: str,
287
+ endpoint_id: str,
288
+ secret_provider: typing.Optional[typing.Callable[[str], str]] = None,
289
+ ) -> dict[str, typing.Any]:
281
290
  model_endpoint_store = mlrun.model_monitoring.get_store_object(
282
- project=project,
291
+ project=project, secret_provider=secret_provider
283
292
  )
284
293
  return model_endpoint_store.get_model_endpoint(endpoint_id=endpoint_id)
285
294
 
@@ -305,3 +314,19 @@ def get_invocations_fqn(project: str) -> str:
305
314
  name=mm_constants.PredictionsQueryConstants.INVOCATIONS,
306
315
  type=ModelEndpointMonitoringMetricType.METRIC,
307
316
  )
317
+
318
+
319
+ def get_invocations_metric(project: str) -> ModelEndpointMonitoringMetric:
320
+ """
321
+ Return the invocations metric of any model endpoint in the given project.
322
+
323
+ :param project: The project name.
324
+ :returns: The model monitoring metric object.
325
+ """
326
+ return ModelEndpointMonitoringMetric(
327
+ project=project,
328
+ app=mm_constants.SpecialApps.MLRUN_INFRA,
329
+ type=ModelEndpointMonitoringMetricType.METRIC,
330
+ name=mm_constants.PredictionsQueryConstants.INVOCATIONS,
331
+ full_name=get_invocations_fqn(project),
332
+ )
@@ -66,10 +66,6 @@ class EventStreamProcessor:
66
66
  self.parquet_batching_max_events = parquet_batching_max_events
67
67
  self.parquet_batching_timeout_secs = parquet_batching_timeout_secs
68
68
 
69
- self.model_endpoint_store_target = (
70
- mlrun.mlconf.model_endpoint_monitoring.store_type
71
- )
72
-
73
69
  logger.info(
74
70
  "Initializing model monitoring event stream processor",
75
71
  parquet_path=self.parquet_path,
@@ -139,7 +135,7 @@ class EventStreamProcessor:
139
135
  def apply_monitoring_serving_graph(
140
136
  self,
141
137
  fn: mlrun.runtimes.ServingRuntime,
142
- tsdb_service_provider: typing.Optional[typing.Callable] = None,
138
+ secret_provider: typing.Optional[typing.Callable[[str], str]] = None,
143
139
  ) -> None:
144
140
  """
145
141
  Apply monitoring serving graph to a given serving function. The following serving graph includes about 4 main
@@ -167,7 +163,8 @@ class EventStreamProcessor:
167
163
  using CE, the parquet target path is based on the defined MLRun artifact path.
168
164
 
169
165
  :param fn: A serving function.
170
- :param tsdb_service_provider: An optional callable function that provides the TSDB connection string.
166
+ :param secret_provider: An optional callable function that provides the connection string from the project
167
+ secret.
171
168
  """
172
169
 
173
170
  graph = typing.cast(
@@ -293,7 +290,6 @@ class EventStreamProcessor:
293
290
  name="UpdateEndpoint",
294
291
  after="ProcessBeforeEndpointUpdate",
295
292
  project=self.project,
296
- model_endpoint_store_target=self.model_endpoint_store_target,
297
293
  )
298
294
 
299
295
  apply_update_endpoint()
@@ -310,7 +306,10 @@ class EventStreamProcessor:
310
306
  table=self.kv_path,
311
307
  )
312
308
 
313
- if self.model_endpoint_store_target == ModelEndpointTarget.V3IO_NOSQL:
309
+ store_object = mlrun.model_monitoring.get_store_object(
310
+ project=self.project, secret_provider=secret_provider
311
+ )
312
+ if store_object.type == ModelEndpointTarget.V3IO_NOSQL:
314
313
  apply_infer_schema()
315
314
 
316
315
  # Emits the event in window size of events based on sample_window size (10 by default)
@@ -328,7 +327,7 @@ class EventStreamProcessor:
328
327
  # TSDB branch (skip to Prometheus if in CE env)
329
328
  if not mlrun.mlconf.is_ce_mode():
330
329
  tsdb_connector = mlrun.model_monitoring.get_tsdb_connector(
331
- project=self.project, secret_provider=tsdb_service_provider
330
+ project=self.project, secret_provider=secret_provider
332
331
  )
333
332
  tsdb_connector.apply_monitoring_stream_steps(graph=graph)
334
333
 
@@ -904,7 +903,7 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
904
903
 
905
904
 
906
905
  class UpdateEndpoint(mlrun.feature_store.steps.MapClass):
907
- def __init__(self, project: str, model_endpoint_store_target: str, **kwargs):
906
+ def __init__(self, project: str, **kwargs):
908
907
  """
909
908
  Update the model endpoint record in the DB. Note that the event at this point includes metadata and stats about
910
909
  the average latency and the amount of predictions over time. This data will be used in the monitoring dashboards
@@ -914,9 +913,11 @@ class UpdateEndpoint(mlrun.feature_store.steps.MapClass):
914
913
  """
915
914
  super().__init__(**kwargs)
916
915
  self.project = project
917
- self.model_endpoint_store_target = model_endpoint_store_target
918
916
 
919
917
  def do(self, event: dict):
918
+ # Remove labels from the event
919
+ event.pop(EventFieldType.LABELS)
920
+
920
921
  update_endpoint_record(
921
922
  project=self.project,
922
923
  endpoint_id=event.pop(EventFieldType.ENDPOINT_ID),
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import json
16
- from typing import Any, NewType
16
+ from typing import Any, Callable, NewType
17
17
 
18
18
  import mlrun.common.model_monitoring
19
19
  import mlrun.common.schemas
@@ -30,7 +30,7 @@ from mlrun.common.schemas.model_monitoring.constants import (
30
30
  WriterEventKind,
31
31
  )
32
32
  from mlrun.common.schemas.notification import NotificationKind, NotificationSeverity
33
- from mlrun.model_monitoring.helpers import get_endpoint_record, get_result_instance_fqn
33
+ from mlrun.model_monitoring.helpers import get_result_instance_fqn
34
34
  from mlrun.serving.utils import StepToDict
35
35
  from mlrun.utils import logger
36
36
  from mlrun.utils.notifications.notification_pusher import CustomNotificationPusher
@@ -102,7 +102,11 @@ class ModelMonitoringWriter(StepToDict):
102
102
 
103
103
  kind = "monitoring_application_stream_pusher"
104
104
 
105
- def __init__(self, project: str, tsdb_secret_provider=None) -> None:
105
+ def __init__(
106
+ self,
107
+ project: str,
108
+ secret_provider: Callable = None,
109
+ ) -> None:
106
110
  self.project = project
107
111
  self.name = project # required for the deployment process
108
112
 
@@ -111,10 +115,10 @@ class ModelMonitoringWriter(StepToDict):
111
115
  )
112
116
 
113
117
  self._app_result_store = mlrun.model_monitoring.get_store_object(
114
- project=self.project
118
+ project=self.project, secret_provider=secret_provider
115
119
  )
116
120
  self._tsdb_connector = mlrun.model_monitoring.get_tsdb_connector(
117
- project=self.project, secret_provider=tsdb_secret_provider
121
+ project=self.project, secret_provider=secret_provider
118
122
  )
119
123
  self._endpoints_records = {}
120
124
 
@@ -149,11 +153,7 @@ class ModelMonitoringWriter(StepToDict):
149
153
  result_kind: int, result_status: int
150
154
  ) -> alert_objects.EventKind:
151
155
  """Generate the required Event Kind format for the alerting system"""
152
- if result_kind == ResultKindApp.custom.value:
153
- # Custom kind is represented as an anomaly detection
154
- event_kind = "mm_app_anomaly"
155
- else:
156
- event_kind = ResultKindApp(value=result_kind).name
156
+ event_kind = ResultKindApp(value=result_kind).name
157
157
 
158
158
  if result_status == ResultStatusApp.detected.value:
159
159
  event_kind = f"{event_kind}_detected"
@@ -223,7 +223,7 @@ class ModelMonitoringWriter(StepToDict):
223
223
  endpoint_id = event[WriterEvent.ENDPOINT_ID]
224
224
  endpoint_record = self._endpoints_records.setdefault(
225
225
  endpoint_id,
226
- get_endpoint_record(project=self.project, endpoint_id=endpoint_id),
226
+ self._app_result_store.get_model_endpoint(endpoint_id=endpoint_id),
227
227
  )
228
228
  event_value = {
229
229
  "app_name": event[WriterEvent.APPLICATION_NAME],
@@ -77,6 +77,7 @@ def run_function(
77
77
  notifications: list[mlrun.model.Notification] = None,
78
78
  returns: Optional[list[Union[str, dict[str, str]]]] = None,
79
79
  builder_env: Optional[list] = None,
80
+ reset_on_run: Optional[bool] = None,
80
81
  ) -> Union[mlrun.model.RunObject, PipelineNodeWrapper]:
81
82
  """Run a local or remote task as part of a local/kubeflow pipeline
82
83
 
@@ -167,6 +168,9 @@ def run_function(
167
168
  artifact type can be given there. The artifact key must appear in the dictionary as
168
169
  "key": "the_key".
169
170
  :param builder_env: env vars dict for source archive config/credentials e.g. builder_env={"GIT_TOKEN": token}
171
+ :param reset_on_run: When True, function python modules would reload prior to code execution.
172
+ This ensures latest code changes are executed. This argument must be used in
173
+ conjunction with the local=True argument.
170
174
  :return: MLRun RunObject or PipelineNodeWrapper
171
175
  """
172
176
  engine, function = _get_engine_and_function(function, project_object)
@@ -215,6 +219,7 @@ def run_function(
215
219
  schedule=schedule,
216
220
  notifications=notifications,
217
221
  builder_env=builder_env,
222
+ reset_on_run=reset_on_run,
218
223
  )
219
224
  if run_result:
220
225
  run_result._notified = False