mlrun 1.8.0rc21__py3-none-any.whl → 1.8.0rc26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (47) hide show
  1. mlrun/__init__.py +37 -3
  2. mlrun/alerts/alert.py +1 -0
  3. mlrun/artifacts/document.py +78 -36
  4. mlrun/common/formatters/feature_set.py +1 -0
  5. mlrun/common/schemas/alert.py +3 -0
  6. mlrun/common/schemas/client_spec.py +0 -1
  7. mlrun/common/schemas/model_monitoring/constants.py +27 -9
  8. mlrun/common/schemas/workflow.py +1 -0
  9. mlrun/config.py +39 -6
  10. mlrun/datastore/datastore_profile.py +58 -16
  11. mlrun/datastore/sources.py +7 -1
  12. mlrun/datastore/vectorstore.py +20 -1
  13. mlrun/db/base.py +11 -0
  14. mlrun/db/httpdb.py +21 -9
  15. mlrun/db/nopdb.py +10 -0
  16. mlrun/errors.py +4 -0
  17. mlrun/execution.py +15 -6
  18. mlrun/launcher/client.py +2 -2
  19. mlrun/launcher/local.py +5 -1
  20. mlrun/model_monitoring/applications/_application_steps.py +3 -1
  21. mlrun/model_monitoring/controller.py +266 -103
  22. mlrun/model_monitoring/db/tsdb/__init__.py +11 -23
  23. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +5 -2
  24. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +8 -8
  25. mlrun/model_monitoring/helpers.py +16 -10
  26. mlrun/model_monitoring/stream_processing.py +85 -35
  27. mlrun/package/context_handler.py +1 -1
  28. mlrun/package/packagers_manager.py +4 -18
  29. mlrun/projects/pipelines.py +2 -2
  30. mlrun/projects/project.py +123 -38
  31. mlrun/runtimes/nuclio/serving.py +2 -2
  32. mlrun/runtimes/sparkjob/spark3job.py +1 -1
  33. mlrun/secrets.py +1 -1
  34. mlrun/serving/server.py +11 -3
  35. mlrun/serving/states.py +65 -8
  36. mlrun/serving/v2_serving.py +16 -8
  37. mlrun/utils/helpers.py +81 -21
  38. mlrun/utils/notifications/notification/base.py +6 -1
  39. mlrun/utils/notifications/notification/slack.py +5 -1
  40. mlrun/utils/notifications/notification_pusher.py +13 -4
  41. mlrun/utils/version/version.json +2 -2
  42. {mlrun-1.8.0rc21.dist-info → mlrun-1.8.0rc26.dist-info}/METADATA +33 -16
  43. {mlrun-1.8.0rc21.dist-info → mlrun-1.8.0rc26.dist-info}/RECORD +47 -47
  44. {mlrun-1.8.0rc21.dist-info → mlrun-1.8.0rc26.dist-info}/WHEEL +1 -1
  45. {mlrun-1.8.0rc21.dist-info → mlrun-1.8.0rc26.dist-info}/LICENSE +0 -0
  46. {mlrun-1.8.0rc21.dist-info → mlrun-1.8.0rc26.dist-info}/entry_points.txt +0 -0
  47. {mlrun-1.8.0rc21.dist-info → mlrun-1.8.0rc26.dist-info}/top_level.txt +0 -0
mlrun/projects/project.py CHANGED
@@ -29,6 +29,7 @@ import zipfile
29
29
  from copy import deepcopy
30
30
  from os import environ, makedirs, path
31
31
  from typing import Callable, Optional, Union, cast
32
+ from urllib.parse import urlparse
32
33
 
33
34
  import dotenv
34
35
  import git
@@ -1912,7 +1913,7 @@ class MlrunProject(ModelObj):
1912
1913
 
1913
1914
  def log_document(
1914
1915
  self,
1915
- key: str,
1916
+ key: str = "",
1916
1917
  tag: str = "",
1917
1918
  local_path: str = "",
1918
1919
  artifact_path: Optional[str] = None,
@@ -1925,7 +1926,8 @@ class MlrunProject(ModelObj):
1925
1926
  """
1926
1927
  Log a document as an artifact.
1927
1928
 
1928
- :param key: Artifact key
1929
+ :param key: Optional artifact key. If not provided, will be derived from local_path
1930
+ or target_path using DocumentArtifact.key_from_source()
1929
1931
  :param tag: Version tag
1930
1932
  :param local_path: path to the local file we upload, will also be use
1931
1933
  as the destination subpath (under "artifact_path")
@@ -1954,7 +1956,6 @@ class MlrunProject(ModelObj):
1954
1956
  Example:
1955
1957
  >>> # Log a PDF document with custom loader
1956
1958
  >>> project.log_document(
1957
- ... key="my_doc",
1958
1959
  ... local_path="path/to/doc.pdf",
1959
1960
  ... document_loader=DocumentLoaderSpec(
1960
1961
  ... loader_class_name="langchain_community.document_loaders.PDFLoader",
@@ -1964,17 +1965,24 @@ class MlrunProject(ModelObj):
1964
1965
  ... )
1965
1966
 
1966
1967
  """
1968
+ if not key and not local_path and not target_path:
1969
+ raise ValueError(
1970
+ "Must provide either 'key' parameter or 'local_path'/'target_path' to derive the key from"
1971
+ )
1972
+ if not key:
1973
+ key = DocumentArtifact.key_from_source(local_path or target_path)
1974
+
1975
+ document_loader_spec = document_loader_spec or DocumentLoaderSpec()
1967
1976
  if not document_loader_spec.download_object and upload:
1968
1977
  raise ValueError(
1969
- "This document loader expects direct links/URLs and does not support file uploads. "
1970
- "Either set download_object=True or set upload=False"
1978
+ "The document loader is configured to not support downloads but the upload flag is set to True."
1979
+ "Either set loader.download_object=True or set upload=False"
1971
1980
  )
1972
1981
  doc_artifact = DocumentArtifact(
1973
1982
  key=key,
1974
1983
  original_source=local_path or target_path,
1975
- document_loader_spec=document_loader_spec
1976
- if document_loader_spec
1977
- else DocumentLoaderSpec(),
1984
+ document_loader_spec=document_loader_spec,
1985
+ collections=kwargs.pop("collections", None),
1978
1986
  **kwargs,
1979
1987
  )
1980
1988
  return self.log_artifact(
@@ -2118,8 +2126,9 @@ class MlrunProject(ModelObj):
2118
2126
  """
2119
2127
  :param name: AlertConfig name.
2120
2128
  :param summary: Summary of the alert, will be sent in the generated notifications
2121
- :param endpoints: The endpoints from which to retrieve the metrics that the
2122
- alerts will be based on.
2129
+ :param endpoints: The endpoints from which metrics will be retrieved to configure the alerts.
2130
+ This `ModelEndpointList` object obtained via the `list_model_endpoints`
2131
+ method or created manually using `ModelEndpoint` objects.
2123
2132
  :param events: AlertTrigger event types (EventKind).
2124
2133
  :param notifications: List of notifications to invoke once the alert is triggered
2125
2134
  :param result_names: Optional. Filters the result names used to create the alert configuration,
@@ -2128,6 +2137,8 @@ class MlrunProject(ModelObj):
2128
2137
  For example:
2129
2138
  [`app1.result-*`, `*.result1`]
2130
2139
  will match "mep1.app1.result.result-1" and "mep1.app2.result.result1".
2140
+ A specific result_name (not a wildcard) will always create a new alert
2141
+ config, regardless of whether the result name exists.
2131
2142
  :param severity: Severity of the alert.
2132
2143
  :param criteria: When the alert will be triggered based on the
2133
2144
  specified number of events within the defined time period.
@@ -2138,6 +2149,11 @@ class MlrunProject(ModelObj):
2138
2149
  """
2139
2150
  db = mlrun.db.get_run_db(secrets=self._secrets)
2140
2151
  matching_results = []
2152
+ specific_result_names = [
2153
+ result_name
2154
+ for result_name in result_names
2155
+ if result_name.count(".") == 3 and "*" not in result_name
2156
+ ]
2141
2157
  alerts = []
2142
2158
  endpoint_ids = [endpoint.metadata.uid for endpoint in endpoints.endpoints]
2143
2159
  # using separation to group by endpoint IDs:
@@ -2161,7 +2177,14 @@ class MlrunProject(ModelObj):
2161
2177
  existing_result_names=results_fqn_by_endpoint,
2162
2178
  result_name_filters=result_names,
2163
2179
  )
2164
- for result_fqn in matching_results:
2180
+ for specific_result_name in specific_result_names:
2181
+ if specific_result_name not in matching_results:
2182
+ logger.warning(
2183
+ f"The specific result name '{specific_result_name}' was"
2184
+ f" not found in the existing endpoint results. Adding alert configuration anyway."
2185
+ )
2186
+ alert_result_names = list(set(specific_result_names + matching_results))
2187
+ for result_fqn in alert_result_names:
2165
2188
  alerts.append(
2166
2189
  mlrun.alerts.alert.AlertConfig(
2167
2190
  project=self.name,
@@ -3608,9 +3631,12 @@ class MlrunProject(ModelObj):
3608
3631
  def set_model_monitoring_credentials(
3609
3632
  self,
3610
3633
  access_key: Optional[str] = None,
3611
- stream_path: Optional[str] = None,
3612
- tsdb_connection: Optional[str] = None,
3634
+ stream_path: Optional[str] = None, # Deprecated
3635
+ tsdb_connection: Optional[str] = None, # Deprecated
3613
3636
  replace_creds: bool = False,
3637
+ *,
3638
+ stream_profile_name: Optional[str] = None,
3639
+ tsdb_profile_name: Optional[str] = None,
3614
3640
  ):
3615
3641
  """
3616
3642
  Set the credentials that will be used by the project's model monitoring
@@ -3622,50 +3648,109 @@ class MlrunProject(ModelObj):
3622
3648
  * None - will be set from the system configuration.
3623
3649
  * v3io - for v3io endpoint store, pass `v3io` and the system will generate the
3624
3650
  exact path.
3625
- :param stream_path: Path to the model monitoring stream. By default, None. Options:
3626
-
3627
- * None - will be set from the system configuration.
3628
- * v3io - for v3io stream, pass `v3io` and the system will generate the exact
3629
- path.
3630
- * Kafka - for Kafka stream, provide the full connection string without custom
3631
- topic, for example kafka://<some_kafka_broker>:<port>.
3632
- :param tsdb_connection: Connection string to the time series database. By default, None.
3651
+ :param stream_path: (Deprecated) This argument is deprecated. Use ``stream_profile_name`` instead.
3652
+ Path to the model monitoring stream. By default, None. Options:
3653
+
3654
+ * ``"v3io"`` - for v3io stream, pass ``"v3io"`` and the system will generate
3655
+ the exact path.
3656
+ * Kafka - for Kafka stream, provide the full connection string without acustom
3657
+ topic, for example ``"kafka://<some_kafka_broker>:<port>"``.
3658
+ :param tsdb_connection: (Deprecated) Connection string to the time series database. By default, None.
3633
3659
  Options:
3634
3660
 
3635
- * None - will be set from the system configuration.
3636
- * v3io - for v3io stream, pass `v3io` and the system will generate the exact
3637
- path.
3661
+ * v3io - for v3io stream, pass ``"v3io"`` and the system will generate the
3662
+ exact path.
3638
3663
  * TDEngine - for TDEngine tsdb, provide the full websocket connection URL,
3639
- for example taosws://<username>:<password>@<host>:<port>.
3664
+ for example ``"taosws://<username>:<password>@<host>:<port>"``.
3640
3665
  :param replace_creds: If True, will override the existing credentials.
3641
3666
  Please keep in mind that if you already enabled model monitoring on
3642
3667
  your project this action can cause data loose and will require redeploying
3643
3668
  all model monitoring functions & model monitoring infra
3644
3669
  & tracked model server.
3670
+ :param stream_profile_name: The datastore profile name of the stream to be used in model monitoring.
3671
+ The supported profiles are:
3672
+
3673
+ * :py:class:`~mlrun.datastore.datastore_profile.DatastoreProfileV3io`
3674
+ * :py:class:`~mlrun.datastore.datastore_profile.DatastoreProfileKafkaSource`
3675
+
3676
+ You need to register one of them, and pass the profile's name.
3677
+ :param tsdb_profile_name: The datastore profile name of the time-series database to be used in model
3678
+ monitoring. The supported profiles are:
3679
+
3680
+ * :py:class:`~mlrun.datastore.datastore_profile.DatastoreProfileV3io`
3681
+ * :py:class:`~mlrun.datastore.datastore_profile.TDEngineDatastoreProfile`
3682
+
3683
+ You need to register one of them, and pass the profile's name.
3645
3684
  """
3646
3685
  db = mlrun.db.get_run_db(secrets=self._secrets)
3647
- if tsdb_connection == "v3io":
3648
- tsdb_profile = mlrun.datastore.datastore_profile.DatastoreProfileV3io(
3649
- name="mm-infra-tsdb"
3686
+
3687
+ if tsdb_connection:
3688
+ warnings.warn(
3689
+ "The `tsdb_connection` argument is deprecated and will be removed in MLRun version 1.8.0. "
3690
+ "Use `tsdb_profile_name` instead.",
3691
+ FutureWarning,
3650
3692
  )
3693
+ if tsdb_profile_name:
3694
+ raise mlrun.errors.MLRunValueError(
3695
+ "If you set `tsdb_profile_name`, you must not pass `tsdb_connection`."
3696
+ )
3697
+ if tsdb_connection == "v3io":
3698
+ tsdb_profile = mlrun.datastore.datastore_profile.DatastoreProfileV3io(
3699
+ name=mm_constants.DefaultProfileName.TSDB
3700
+ )
3701
+ else:
3702
+ parsed_url = urlparse(tsdb_connection)
3703
+ if parsed_url.scheme != "taosws":
3704
+ raise mlrun.errors.MLRunValueError(
3705
+ f"Unsupported `tsdb_connection`: '{tsdb_connection}'."
3706
+ )
3707
+ tsdb_profile = (
3708
+ mlrun.datastore.datastore_profile.TDEngineDatastoreProfile(
3709
+ name=mm_constants.DefaultProfileName.TSDB,
3710
+ user=parsed_url.username,
3711
+ password=parsed_url.password,
3712
+ host=parsed_url.hostname,
3713
+ port=parsed_url.port,
3714
+ )
3715
+ )
3716
+
3651
3717
  self.register_datastore_profile(tsdb_profile)
3652
3718
  tsdb_profile_name = tsdb_profile.name
3653
- else:
3654
- tsdb_profile_name = None
3655
- if stream_path == "v3io":
3656
- stream_profile = mlrun.datastore.datastore_profile.DatastoreProfileV3io(
3657
- name="mm-infra-stream"
3719
+
3720
+ if stream_path:
3721
+ warnings.warn(
3722
+ "The `stream_path` argument is deprecated and will be removed in MLRun version 1.8.0. "
3723
+ "Use `stream_profile_name` instead.",
3724
+ FutureWarning,
3658
3725
  )
3726
+ if stream_profile_name:
3727
+ raise mlrun.errors.MLRunValueError(
3728
+ "If you set `stream_profile_name`, you must not pass `stream_path`."
3729
+ )
3730
+ if stream_path == "v3io":
3731
+ stream_profile = mlrun.datastore.datastore_profile.DatastoreProfileV3io(
3732
+ name=mm_constants.DefaultProfileName.STREAM
3733
+ )
3734
+ else:
3735
+ parsed_stream = urlparse(stream_path)
3736
+ if parsed_stream.scheme != "kafka":
3737
+ raise mlrun.errors.MLRunValueError(
3738
+ f"Unsupported `stream_path`: '{stream_path}'."
3739
+ )
3740
+ stream_profile = (
3741
+ mlrun.datastore.datastore_profile.DatastoreProfileKafkaSource(
3742
+ name=mm_constants.DefaultProfileName.STREAM,
3743
+ brokers=[parsed_stream.netloc],
3744
+ topics=[],
3745
+ )
3746
+ )
3659
3747
  self.register_datastore_profile(stream_profile)
3660
3748
  stream_profile_name = stream_profile.name
3661
- else:
3662
- stream_profile_name = None
3749
+
3663
3750
  db.set_model_monitoring_credentials(
3664
3751
  project=self.name,
3665
3752
  credentials={
3666
3753
  "access_key": access_key,
3667
- "stream_path": stream_path,
3668
- "tsdb_connection": tsdb_connection,
3669
3754
  "tsdb_profile_name": tsdb_profile_name,
3670
3755
  "stream_profile_name": stream_profile_name,
3671
3756
  },
@@ -3676,7 +3761,7 @@ class MlrunProject(ModelObj):
3676
3761
  "Model monitoring credentials were set successfully. "
3677
3762
  "Please keep in mind that if you already had model monitoring functions "
3678
3763
  "/ model monitoring infra / tracked model server "
3679
- "deployed on your project, you will need to redeploy them."
3764
+ "deployed on your project, you will need to redeploy them. "
3680
3765
  "For redeploying the model monitoring infra, please use `enable_model_monitoring` API "
3681
3766
  "and set `rebuild_images=True`"
3682
3767
  )
@@ -688,7 +688,7 @@ class ServingRuntime(RemoteRuntime):
688
688
  "project": self.metadata.project,
689
689
  "version": "v2",
690
690
  "parameters": self.spec.parameters,
691
- "graph": self.spec.graph.to_dict() if self.spec.graph else {},
691
+ "graph": self.spec.graph.to_dict(strip=True) if self.spec.graph else {},
692
692
  "load_mode": self.spec.load_mode,
693
693
  "functions": function_name_uri_map,
694
694
  "graph_initializer": self.spec.graph_initializer,
@@ -756,7 +756,7 @@ class ServingRuntime(RemoteRuntime):
756
756
  namespace=namespace,
757
757
  logger=logger,
758
758
  is_mock=True,
759
- monitoring_mock=track_models,
759
+ monitoring_mock=self.spec.track_models,
760
760
  )
761
761
 
762
762
  if workdir:
@@ -789,7 +789,7 @@ class Spark3Runtime(KubejobRuntime):
789
789
  Spark itself uses the spec.[executor|driver].cores parameter to set the parallelism of tasks and cores
790
790
  assigned to each task within the pod. This function sets the .cores parameters for the job executed.
791
791
 
792
- See https://github.com/GoogleCloudPlatform/spark-on-k8s-operator/issues/581 for a discussion about those
792
+ See https://github.com/kubeflow/spark-operator/issues/581 for a discussion about those
793
793
  parameters and their meaning in Spark operator.
794
794
 
795
795
  :param executor_cores: Number of cores to use for executor (spark.executor.cores)
mlrun/secrets.py CHANGED
@@ -134,7 +134,7 @@ class SecretsStore:
134
134
  def k8s_env_variable_name_for_secret(secret_name):
135
135
  from mlrun.config import config
136
136
 
137
- return config.secret_stores.kubernetes.env_variable_prefix + secret_name.upper()
137
+ return config.secret_stores.kubernetes.env_variable_prefix + secret_name
138
138
 
139
139
  def get_k8s_secrets(self):
140
140
  for source in self._hidden_sources:
mlrun/serving/server.py CHANGED
@@ -44,6 +44,8 @@ from ..utils import get_caller_globals
44
44
  from .states import RootFlowStep, RouterStep, get_function, graph_root_setter
45
45
  from .utils import event_id_key, event_path_key
46
46
 
47
+ DUMMY_STREAM = "dummy://"
48
+
47
49
 
48
50
  class _StreamContext:
49
51
  """Handles the stream context for the events stream process. Includes the configuration for the output stream
@@ -72,14 +74,20 @@ class _StreamContext:
72
74
  function_uri, config.default_project
73
75
  )
74
76
 
75
- self.stream_uri = mlrun.model_monitoring.get_stream_path(project=project)
77
+ stream_args = parameters.get("stream_args", {})
78
+
79
+ if log_stream == DUMMY_STREAM:
80
+ # Dummy stream used for testing, see tests/serving/test_serving.py
81
+ self.stream_uri = DUMMY_STREAM
82
+ elif not stream_args.get("mock"): # if not a mock: `context.is_mock = True`
83
+ self.stream_uri = mlrun.model_monitoring.get_stream_path(
84
+ project=project
85
+ )
76
86
 
77
87
  if log_stream:
78
88
  # Update the stream path to the log stream value
79
89
  self.stream_uri = log_stream.format(project=project)
80
90
 
81
- stream_args = parameters.get("stream_args", {})
82
-
83
91
  self.output_stream = get_stream_pusher(self.stream_uri, **stream_args)
84
92
 
85
93
 
mlrun/serving/states.py CHANGED
@@ -31,6 +31,7 @@ import storey.utils
31
31
 
32
32
  import mlrun
33
33
  import mlrun.common.schemas as schemas
34
+ from mlrun.utils import logger
34
35
 
35
36
  from ..config import config
36
37
  from ..datastore import get_stream_pusher
@@ -49,6 +50,8 @@ path_splitter = "/"
49
50
  previous_step = "$prev"
50
51
  queue_class_names = [">>", "$queue"]
51
52
 
53
+ MAX_MODELS_PER_ROUTER = 5000
54
+
52
55
 
53
56
  class GraphError(Exception):
54
57
  """error in graph topology or configuration"""
@@ -86,8 +89,10 @@ _task_step_fields = [
86
89
  "endpoint_type",
87
90
  ]
88
91
 
89
-
90
- MAX_ALLOWED_STEPS = 4500
92
+ _default_fields_to_strip_from_step = [
93
+ "model_endpoint_creation_strategy",
94
+ "endpoint_type",
95
+ ]
91
96
 
92
97
 
93
98
  def new_remote_endpoint(
@@ -110,6 +115,7 @@ class BaseStep(ModelObj):
110
115
  kind = "BaseStep"
111
116
  default_shape = "ellipse"
112
117
  _dict_fields = ["kind", "comment", "after", "on_error"]
118
+ _default_fields_to_strip = _default_fields_to_strip_from_step
113
119
 
114
120
  def __init__(
115
121
  self,
@@ -319,6 +325,9 @@ class BaseStep(ModelObj):
319
325
  full_event: Optional[bool] = None,
320
326
  input_path: Optional[str] = None,
321
327
  result_path: Optional[str] = None,
328
+ model_endpoint_creation_strategy: Optional[
329
+ schemas.ModelEndpointCreationStrategy
330
+ ] = None,
322
331
  **class_args,
323
332
  ):
324
333
  """add a step right after this step and return the new step
@@ -346,6 +355,16 @@ class BaseStep(ModelObj):
346
355
  this require that the event body will behave like a dict, example:
347
356
  event: {"x": 5} , result_path="y" means the output of the step will be written
348
357
  to event["y"] resulting in {"x": 5, "y": <result>}
358
+ :param model_endpoint_creation_strategy: Strategy for creating or updating the model endpoint:
359
+ * **overwrite**:
360
+ 1. If model endpoints with the same name exist, delete the `latest` one.
361
+ 2. Create a new model endpoint entry and set it as `latest`.
362
+ * **inplace** (default):
363
+ 1. If model endpoints with the same name exist, update the `latest` entry.
364
+ 2. Otherwise, create a new entry.
365
+ * **archive**:
366
+ 1. If model endpoints with the same name exist, preserve them.
367
+ 2. Create a new model endpoint with the same name and set it to `latest`.
349
368
  :param class_args: class init arguments
350
369
  """
351
370
  if hasattr(self, "steps"):
@@ -367,6 +386,7 @@ class BaseStep(ModelObj):
367
386
  input_path=input_path,
368
387
  result_path=result_path,
369
388
  class_args=class_args,
389
+ model_endpoint_creation_strategy=model_endpoint_creation_strategy,
370
390
  )
371
391
  step = parent._steps.update(name, step)
372
392
  step.set_parent(parent)
@@ -625,6 +645,19 @@ class TaskStep(BaseStep):
625
645
  raise exc
626
646
  return event
627
647
 
648
+ def to_dict(
649
+ self,
650
+ fields: Optional[list] = None,
651
+ exclude: Optional[list] = None,
652
+ strip: bool = False,
653
+ ) -> dict:
654
+ self.endpoint_type = (
655
+ self.endpoint_type.value
656
+ if isinstance(self.endpoint_type, schemas.EndpointType)
657
+ else self.endpoint_type
658
+ )
659
+ return super().to_dict(fields, exclude, strip)
660
+
628
661
 
629
662
  class MonitoringApplicationStep(TaskStep):
630
663
  """monitoring application execution step, runs users class code"""
@@ -755,7 +788,7 @@ class RouterStep(TaskStep):
755
788
  creation_strategy: schemas.ModelEndpointCreationStrategy = schemas.ModelEndpointCreationStrategy.INPLACE,
756
789
  **class_args,
757
790
  ):
758
- """add child route step or class to the router
791
+ """add child route step or class to the router, if key exists it will be updated
759
792
 
760
793
  :param key: unique name (and route path) for the child step
761
794
  :param route: child step object (Task, ..)
@@ -775,7 +808,13 @@ class RouterStep(TaskStep):
775
808
  2. Create a new model endpoint with the same name and set it to `latest`.
776
809
 
777
810
  """
778
-
811
+ if len(self.routes.keys()) >= MAX_MODELS_PER_ROUTER and key not in self.routes:
812
+ raise mlrun.errors.MLRunModelLimitExceededError(
813
+ f"Router cannot support more than {MAX_MODELS_PER_ROUTER} model endpoints. "
814
+ f"To add a new route, edit an existing one by passing the same key."
815
+ )
816
+ if key in self.routes:
817
+ logger.info(f"Model {key} already exists, updating it.")
779
818
  if not route and not class_name and not handler:
780
819
  raise MLRunInvalidArgumentError("route or class_name must be specified")
781
820
  if not route:
@@ -790,10 +829,6 @@ class RouterStep(TaskStep):
790
829
  )
791
830
  route.function = function or route.function
792
831
 
793
- if len(self._routes) >= MAX_ALLOWED_STEPS:
794
- raise mlrun.errors.MLRunInvalidArgumentError(
795
- f"Cannot create the serving graph: the maximum number of steps is {MAX_ALLOWED_STEPS}"
796
- )
797
832
  route = self._routes.update(key, route)
798
833
  route.set_parent(self)
799
834
  return route
@@ -806,6 +841,10 @@ class RouterStep(TaskStep):
806
841
  del self._routes[key]
807
842
 
808
843
  def init_object(self, context, namespace, mode="sync", reset=False, **extra_kwargs):
844
+ if not self.routes:
845
+ raise mlrun.errors.MLRunRuntimeError(
846
+ "You have to add models to the router step before initializing it"
847
+ )
809
848
  if not self._is_local_function(context):
810
849
  return
811
850
 
@@ -1010,6 +1049,9 @@ class QueueStep(BaseStep):
1010
1049
  full_event: Optional[bool] = None,
1011
1050
  input_path: Optional[str] = None,
1012
1051
  result_path: Optional[str] = None,
1052
+ model_endpoint_creation_strategy: Optional[
1053
+ schemas.ModelEndpointCreationStrategy
1054
+ ] = None,
1013
1055
  **class_args,
1014
1056
  ):
1015
1057
  if not function:
@@ -1026,6 +1068,7 @@ class QueueStep(BaseStep):
1026
1068
  full_event,
1027
1069
  input_path,
1028
1070
  result_path,
1071
+ model_endpoint_creation_strategy,
1029
1072
  **class_args,
1030
1073
  )
1031
1074
 
@@ -1104,6 +1147,9 @@ class FlowStep(BaseStep):
1104
1147
  full_event: Optional[bool] = None,
1105
1148
  input_path: Optional[str] = None,
1106
1149
  result_path: Optional[str] = None,
1150
+ model_endpoint_creation_strategy: Optional[
1151
+ schemas.ModelEndpointCreationStrategy
1152
+ ] = None,
1107
1153
  **class_args,
1108
1154
  ):
1109
1155
  """add task, queue or router step/class to the flow
@@ -1135,6 +1181,16 @@ class FlowStep(BaseStep):
1135
1181
  this require that the event body will behave like a dict, example:
1136
1182
  event: {"x": 5} , result_path="y" means the output of the step will be written
1137
1183
  to event["y"] resulting in {"x": 5, "y": <result>}
1184
+ :param model_endpoint_creation_strategy: Strategy for creating or updating the model endpoint:
1185
+ * **overwrite**:
1186
+ 1. If model endpoints with the same name exist, delete the `latest` one.
1187
+ 2. Create a new model endpoint entry and set it as `latest`.
1188
+ * **inplace** (default):
1189
+ 1. If model endpoints with the same name exist, update the `latest` entry.
1190
+ 2. Otherwise, create a new entry.
1191
+ * **archive**:
1192
+ 1. If model endpoints with the same name exist, preserve them.
1193
+ 2. Create a new model endpoint with the same name and set it to `latest`.
1138
1194
  :param class_args: class init arguments
1139
1195
  """
1140
1196
 
@@ -1147,6 +1203,7 @@ class FlowStep(BaseStep):
1147
1203
  full_event=full_event,
1148
1204
  input_path=input_path,
1149
1205
  result_path=result_path,
1206
+ model_endpoint_creation_strategy=model_endpoint_creation_strategy,
1150
1207
  class_args=class_args,
1151
1208
  )
1152
1209
 
@@ -149,16 +149,24 @@ class V2ModelServer(StepToDict):
149
149
  if not self.context.is_mock and not self.model_spec:
150
150
  self.get_model()
151
151
  if not self.context.is_mock or self.context.monitoring_mock:
152
- self.model_endpoint = mlrun.get_run_db().get_model_endpoint(
153
- project=server.project,
154
- name=self.name,
155
- function_name=server.function_name,
156
- function_tag=server.function_tag or "latest",
157
- )
158
- self.model_endpoint_uid = self.model_endpoint.metadata.uid
152
+ try:
153
+ self.model_endpoint = mlrun.get_run_db().get_model_endpoint(
154
+ project=server.project,
155
+ name=self.name,
156
+ function_name=server.function_name,
157
+ function_tag=server.function_tag or "latest",
158
+ )
159
+ self.model_endpoint_uid = self.model_endpoint.metadata.uid
160
+ except mlrun.errors.MLRunNotFoundError:
161
+ logger.info(
162
+ "Model Endpoint not found for this step we will not monitor this model",
163
+ function_name=server.function_name,
164
+ name=self.name,
165
+ )
166
+ self.model_endpoint, self.model_endpoint_uid = None, None
159
167
  self._model_logger = (
160
168
  _ModelLogPusher(self, self.context)
161
- if self.context and self.context.stream.enabled
169
+ if self.context and self.context.stream.enabled and self.model_endpoint_uid
162
170
  else None
163
171
  )
164
172