mlrun 1.10.0rc25__py3-none-any.whl → 1.10.0rc26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (37) hide show
  1. mlrun/artifacts/llm_prompt.py +8 -1
  2. mlrun/common/model_monitoring/helpers.py +86 -0
  3. mlrun/common/schemas/hub.py +11 -18
  4. mlrun/config.py +2 -3
  5. mlrun/datastore/__init__.py +2 -2
  6. mlrun/datastore/datastore_profile.py +27 -3
  7. mlrun/datastore/model_provider/huggingface_provider.py +5 -1
  8. mlrun/datastore/model_provider/model_provider.py +1 -1
  9. mlrun/datastore/s3.py +24 -2
  10. mlrun/datastore/storeytargets.py +2 -3
  11. mlrun/db/base.py +14 -0
  12. mlrun/db/httpdb.py +11 -2
  13. mlrun/db/nopdb.py +13 -0
  14. mlrun/k8s_utils.py +0 -14
  15. mlrun/model_monitoring/applications/base.py +15 -0
  16. mlrun/model_monitoring/controller.py +5 -3
  17. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +3 -1
  18. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +17 -4
  19. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +3 -0
  20. mlrun/model_monitoring/helpers.py +5 -5
  21. mlrun/projects/project.py +5 -5
  22. mlrun/runtimes/base.py +0 -3
  23. mlrun/runtimes/mounts.py +15 -2
  24. mlrun/runtimes/pod.py +153 -11
  25. mlrun/serving/routers.py +23 -41
  26. mlrun/serving/server.py +1 -0
  27. mlrun/serving/states.py +3 -3
  28. mlrun/serving/system_steps.py +52 -29
  29. mlrun/serving/v2_serving.py +9 -10
  30. mlrun/utils/helpers.py +5 -2
  31. mlrun/utils/version/version.json +2 -2
  32. {mlrun-1.10.0rc25.dist-info → mlrun-1.10.0rc26.dist-info}/METADATA +3 -3
  33. {mlrun-1.10.0rc25.dist-info → mlrun-1.10.0rc26.dist-info}/RECORD +37 -37
  34. {mlrun-1.10.0rc25.dist-info → mlrun-1.10.0rc26.dist-info}/WHEEL +0 -0
  35. {mlrun-1.10.0rc25.dist-info → mlrun-1.10.0rc26.dist-info}/entry_points.txt +0 -0
  36. {mlrun-1.10.0rc25.dist-info → mlrun-1.10.0rc26.dist-info}/licenses/LICENSE +0 -0
  37. {mlrun-1.10.0rc25.dist-info → mlrun-1.10.0rc26.dist-info}/top_level.txt +0 -0
@@ -973,6 +973,9 @@ class V3IOTSDBConnector(TSDBConnector):
973
973
  start: Optional[datetime] = None,
974
974
  end: Optional[datetime] = None,
975
975
  ) -> dict[str, float]:
976
+ if not endpoint_ids:
977
+ return {}
978
+
976
979
  # Get the last request timestamp for each endpoint from the KV table.
977
980
  # The result of the query is a list of dictionaries,
978
981
  # each dictionary contains the endpoint id and the last request timestamp.
@@ -143,7 +143,7 @@ def get_stream_path(
143
143
  return stream_uri.replace("v3io://", f"ds://{profile.name}")
144
144
 
145
145
  elif isinstance(
146
- profile, mlrun.datastore.datastore_profile.DatastoreProfileKafkaSource
146
+ profile, mlrun.datastore.datastore_profile.DatastoreProfileKafkaStream
147
147
  ):
148
148
  topic = mlrun.common.model_monitoring.helpers.get_kafka_topic(
149
149
  project=project, function_name=function_name
@@ -152,7 +152,7 @@ def get_stream_path(
152
152
  else:
153
153
  raise mlrun.errors.MLRunValueError(
154
154
  f"Received an unexpected stream profile type: {type(profile)}\n"
155
- "Expects `DatastoreProfileV3io` or `DatastoreProfileKafkaSource`."
155
+ "Expects `DatastoreProfileV3io` or `DatastoreProfileKafkaStream`."
156
156
  )
157
157
 
158
158
 
@@ -300,7 +300,7 @@ def _get_v3io_output_stream(
300
300
 
301
301
  def _get_kafka_output_stream(
302
302
  *,
303
- kafka_profile: mlrun.datastore.datastore_profile.DatastoreProfileKafkaSource,
303
+ kafka_profile: mlrun.datastore.datastore_profile.DatastoreProfileKafkaStream,
304
304
  project: str,
305
305
  function_name: str,
306
306
  mock: bool = False,
@@ -356,7 +356,7 @@ def get_output_stream(
356
356
  )
357
357
 
358
358
  elif isinstance(
359
- profile, mlrun.datastore.datastore_profile.DatastoreProfileKafkaSource
359
+ profile, mlrun.datastore.datastore_profile.DatastoreProfileKafkaStream
360
360
  ):
361
361
  return _get_kafka_output_stream(
362
362
  kafka_profile=profile,
@@ -368,7 +368,7 @@ def get_output_stream(
368
368
  else:
369
369
  raise mlrun.errors.MLRunValueError(
370
370
  f"Received an unexpected stream profile type: {type(profile)}\n"
371
- "Expects `DatastoreProfileV3io` or `DatastoreProfileKafkaSource`."
371
+ "Expects `DatastoreProfileV3io` or `DatastoreProfileKafkaStream`."
372
372
  )
373
373
 
374
374
 
mlrun/projects/project.py CHANGED
@@ -3816,7 +3816,7 @@ class MlrunProject(ModelObj):
3816
3816
 
3817
3817
  import mlrun
3818
3818
  from mlrun.datastore.datastore_profile import (
3819
- DatastoreProfileKafkaSource,
3819
+ DatastoreProfileKafkaStream,
3820
3820
  DatastoreProfileTDEngine,
3821
3821
  )
3822
3822
 
@@ -3833,7 +3833,7 @@ class MlrunProject(ModelObj):
3833
3833
  project.register_datastore_profile(tsdb_profile)
3834
3834
 
3835
3835
  # Create and register stream profile
3836
- stream_profile = DatastoreProfileKafkaSource(
3836
+ stream_profile = DatastoreProfileKafkaStream(
3837
3837
  name="my-kafka",
3838
3838
  brokers=["<kafka-broker-ip-address>:9094"],
3839
3839
  topics=[], # Keep the topics list empty
@@ -3875,9 +3875,9 @@ class MlrunProject(ModelObj):
3875
3875
 
3876
3876
  .. code-block:: python
3877
3877
 
3878
- from mlrun.datastore.datastore_profile import DatastoreProfileKafkaSource
3878
+ from mlrun.datastore.datastore_profile import DatastoreProfileKafkaStream
3879
3879
 
3880
- stream_profile = DatastoreProfileKafkaSource(
3880
+ stream_profile = DatastoreProfileKafkaStream(
3881
3881
  name="confluent-kafka",
3882
3882
  brokers=["<server-domain-start>.confluent.cloud:9092"],
3883
3883
  topics=[],
@@ -3906,7 +3906,7 @@ class MlrunProject(ModelObj):
3906
3906
  The supported profiles are:
3907
3907
 
3908
3908
  * :py:class:`~mlrun.datastore.datastore_profile.DatastoreProfileV3io`
3909
- * :py:class:`~mlrun.datastore.datastore_profile.DatastoreProfileKafkaSource`
3909
+ * :py:class:`~mlrun.datastore.datastore_profile.DatastoreProfileKafkaStream`
3910
3910
 
3911
3911
  You need to register one of them, and pass the profile's name.
3912
3912
  :param replace_creds: If ``True`` - override the existing credentials.
mlrun/runtimes/base.py CHANGED
@@ -142,9 +142,6 @@ class FunctionSpec(ModelObj):
142
142
  def build(self, build):
143
143
  self._build = self._verify_dict(build, "build", ImageBuilder)
144
144
 
145
- def enrich_function_preemption_spec(self):
146
- pass
147
-
148
145
  def validate_service_account(self, allowed_service_accounts):
149
146
  pass
150
147
 
mlrun/runtimes/mounts.py CHANGED
@@ -14,6 +14,7 @@
14
14
 
15
15
  import os
16
16
  import typing
17
+ import warnings
17
18
  from collections import namedtuple
18
19
 
19
20
  from mlrun.config import config
@@ -247,10 +248,22 @@ def mount_s3(
247
248
  def _use_s3_cred(runtime: "KubeResource"):
248
249
  _access_key = aws_access_key or os.environ.get(prefix + "AWS_ACCESS_KEY_ID")
249
250
  _secret_key = aws_secret_key or os.environ.get(prefix + "AWS_SECRET_ACCESS_KEY")
250
- _endpoint_url = endpoint_url or os.environ.get(prefix + "S3_ENDPOINT_URL")
251
+
252
+ # Check for endpoint URL with backward compatibility
253
+ _endpoint_url = endpoint_url or os.environ.get(prefix + "AWS_ENDPOINT_URL_S3")
254
+ if not _endpoint_url:
255
+ # Check for deprecated environment variable
256
+ _endpoint_url = os.environ.get(prefix + "S3_ENDPOINT_URL")
257
+ if _endpoint_url:
258
+ warnings.warn(
259
+ "S3_ENDPOINT_URL is deprecated in 1.10.0 and will be removed in 1.12.0, "
260
+ "use AWS_ENDPOINT_URL_S3 instead.",
261
+ # TODO: Remove this in 1.12.0
262
+ FutureWarning,
263
+ )
251
264
 
252
265
  if _endpoint_url:
253
- runtime.set_env(prefix + "S3_ENDPOINT_URL", _endpoint_url)
266
+ runtime.set_env(prefix + "AWS_ENDPOINT_URL_S3", _endpoint_url)
254
267
  if aws_region:
255
268
  runtime.set_env(prefix + "AWS_REGION", aws_region)
256
269
  if non_anonymous:
mlrun/runtimes/pod.py CHANGED
@@ -17,6 +17,7 @@ import os
17
17
  import re
18
18
  import time
19
19
  import typing
20
+ import warnings
20
21
  from collections.abc import Iterable
21
22
  from enum import Enum
22
23
 
@@ -35,6 +36,7 @@ from mlrun.common.schemas import (
35
36
 
36
37
  from ..config import config as mlconf
37
38
  from ..k8s_utils import (
39
+ generate_preemptible_nodes_affinity_terms,
38
40
  validate_node_selectors,
39
41
  )
40
42
  from ..utils import logger, update_in
@@ -874,6 +876,133 @@ class KubeResource(BaseRuntime):
874
876
  """
875
877
  self.spec.with_requests(mem, cpu, patch=patch)
876
878
 
879
+ @staticmethod
880
+ def detect_preemptible_node_selector(node_selector: dict[str, str]) -> list[str]:
881
+ """
882
+ Check whether any provided node selector matches preemptible selectors.
883
+
884
+ :param node_selector: User-provided node selector mapping.
885
+ :return: List of `"key='value'"` strings that match a preemptible selector.
886
+ """
887
+ preemptible_node_selector = mlconf.get_preemptible_node_selector()
888
+
889
+ return [
890
+ f"'{key}': '{val}'"
891
+ for key, val in node_selector.items()
892
+ if preemptible_node_selector.get(key) == val
893
+ ]
894
+
895
+ def detect_preemptible_tolerations(
896
+ self, tolerations: list[k8s_client.V1Toleration]
897
+ ) -> list[str]:
898
+ """
899
+ Check whether any provided toleration matches preemptible tolerations.
900
+
901
+ :param tolerations: User-provided tolerations.
902
+ :return: List of formatted toleration strings that are considered preemptible.
903
+ """
904
+ preemptible_tolerations = [
905
+ k8s_client.V1Toleration(
906
+ key=toleration.get("key"),
907
+ value=toleration.get("value"),
908
+ effect=toleration.get("effect"),
909
+ )
910
+ for toleration in mlconf.get_preemptible_tolerations()
911
+ ]
912
+
913
+ def _format_toleration(toleration):
914
+ return f"'{toleration.key}'='{toleration.value}' (effect: '{toleration.effect}')"
915
+
916
+ return [
917
+ _format_toleration(toleration)
918
+ for toleration in tolerations
919
+ if toleration in preemptible_tolerations
920
+ ]
921
+
922
+ def detect_preemptible_affinity(self, affinity: k8s_client.V1Affinity) -> list[str]:
923
+ """
924
+ Check whether any provided affinity rules match preemptible affinity configs.
925
+
926
+ :param affinity: User-provided affinity object.
927
+ :return: List of formatted expressions that overlap with preemptible terms.
928
+ """
929
+ preemptible_affinity_terms = generate_preemptible_nodes_affinity_terms()
930
+ conflicting_affinities = []
931
+
932
+ if (
933
+ affinity
934
+ and affinity.node_affinity
935
+ and affinity.node_affinity.required_during_scheduling_ignored_during_execution
936
+ ):
937
+ user_terms = affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms
938
+ for user_term in user_terms:
939
+ user_expressions = {
940
+ (expr.key, expr.operator, tuple(expr.values or []))
941
+ for expr in user_term.match_expressions or []
942
+ }
943
+
944
+ for preemptible_term in preemptible_affinity_terms:
945
+ preemptible_expressions = {
946
+ (expr.key, expr.operator, tuple(expr.values or []))
947
+ for expr in preemptible_term.match_expressions or []
948
+ }
949
+
950
+ # Ensure operators match and preemptible expressions are present
951
+ common_exprs = user_expressions & preemptible_expressions
952
+ if common_exprs:
953
+ formatted = ", ".join(
954
+ f"'{key} {operator} {list(values)}'"
955
+ for key, operator, values in common_exprs
956
+ )
957
+ conflicting_affinities.append(formatted)
958
+ return conflicting_affinities
959
+
960
+ def raise_preemptible_warning(
961
+ self,
962
+ node_selector: typing.Optional[dict[str, str]],
963
+ tolerations: typing.Optional[list[k8s_client.V1Toleration]],
964
+ affinity: typing.Optional[k8s_client.V1Affinity],
965
+ ) -> None:
966
+ """
967
+ Detect conflicts and emit a single consolidated warning if needed.
968
+
969
+ :param node_selector: User-provided node selector.
970
+ :param tolerations: User-provided tolerations.
971
+ :param affinity: User-provided affinity.
972
+ :warns: PreemptionWarning - Emitted when any of the provided selectors,
973
+ tolerations, or affinity terms match the configured preemptible
974
+ settings. The message lists the conflicting items.
975
+ """
976
+ conflict_messages = []
977
+
978
+ if node_selector:
979
+ ns_conflicts = ", ".join(
980
+ self.detect_preemptible_node_selector(node_selector)
981
+ )
982
+ if ns_conflicts:
983
+ conflict_messages.append(f"Node selectors: {ns_conflicts}")
984
+
985
+ if tolerations:
986
+ tol_conflicts = ", ".join(self.detect_preemptible_tolerations(tolerations))
987
+ if tol_conflicts:
988
+ conflict_messages.append(f"Tolerations: {tol_conflicts}")
989
+
990
+ if affinity:
991
+ affinity_conflicts = ", ".join(self.detect_preemptible_affinity(affinity))
992
+ if affinity_conflicts:
993
+ conflict_messages.append(f"Affinity: {affinity_conflicts}")
994
+
995
+ if conflict_messages:
996
+ warning_componentes = "; \n".join(conflict_messages)
997
+ warnings.warn(
998
+ f"Warning: based on MLRun's preemptible node configuration, the following components \n"
999
+ f"may be removed or adjusted at runtime:\n"
1000
+ f"{warning_componentes}.\n"
1001
+ "This adjustment depends on the function's preemption mode. \n"
1002
+ "The list of potential adjusted preemptible selectors can be viewed here: "
1003
+ "mlrun.mlconf.get_preemptible_node_selector() and mlrun.mlconf.get_preemptible_tolerations()."
1004
+ )
1005
+
877
1006
  def with_node_selection(
878
1007
  self,
879
1008
  node_name: typing.Optional[str] = None,
@@ -882,18 +1011,26 @@ class KubeResource(BaseRuntime):
882
1011
  tolerations: typing.Optional[list[k8s_client.V1Toleration]] = None,
883
1012
  ):
884
1013
  """
885
- Enables to control on which k8s node the job will run
886
-
887
- :param node_name: The name of the k8s node
888
- :param node_selector: Label selector, only nodes with matching labels will be eligible to be picked
889
- :param affinity: Expands the types of constraints you can express - see
890
- https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#affinity-and-anti-affinity
891
- for details
892
- :param tolerations: Tolerations are applied to pods, and allow (but do not require) the pods to schedule
893
- onto nodes with matching taints - see
894
- https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration
895
- for details
1014
+ Configure Kubernetes node scheduling for this function.
1015
+
1016
+ Updates one or more scheduling hints: exact node pinning, label-based selection,
1017
+ affinity/anti-affinity rules, and taint tolerations. Passing ``None`` leaves the
1018
+ current value unchanged; pass an empty dict/list (e.g., ``{}``, ``[]``) to clear.
896
1019
 
1020
+ :param node_name: Exact Kubernetes node name to pin the pod to.
1021
+ :param node_selector: Mapping of label selectors. Use ``{}`` to clear.
1022
+ :param affinity: :class:`kubernetes.client.V1Affinity` constraints.
1023
+ :param tolerations: List of :class:`kubernetes.client.V1Toleration`. Use ``[]`` to clear.
1024
+ :warns: PreemptionWarning - Emitted if provided selectors/tolerations/affinity
1025
+ conflict with the function's preemption mode.
1026
+
1027
+ Example usage:
1028
+ Prefer a GPU pool and allow scheduling on spot nodes::
1029
+
1030
+ job.with_node_selection(
1031
+ node_selector={"nodepool": "gpu"},
1032
+ tolerations=[k8s_client.V1Toleration(key="spot", operator="Exists")],
1033
+ )
897
1034
  """
898
1035
  if node_name:
899
1036
  self.spec.node_name = node_name
@@ -904,6 +1041,11 @@ class KubeResource(BaseRuntime):
904
1041
  self.spec.affinity = affinity
905
1042
  if tolerations is not None:
906
1043
  self.spec.tolerations = tolerations
1044
+ self.raise_preemptible_warning(
1045
+ node_selector=self.spec.node_selector,
1046
+ tolerations=self.spec.tolerations,
1047
+ affinity=self.spec.affinity,
1048
+ )
907
1049
 
908
1050
  def with_priority_class(self, name: typing.Optional[str] = None):
909
1051
  """
mlrun/serving/routers.py CHANGED
@@ -31,6 +31,9 @@ import mlrun.common.model_monitoring
31
31
  import mlrun.common.schemas.model_monitoring
32
32
  from mlrun.utils import logger, now_date
33
33
 
34
+ from ..common.model_monitoring.helpers import (
35
+ get_model_endpoints_creation_task_status,
36
+ )
34
37
  from .utils import RouterToDict, _extract_input_data, _update_result_body
35
38
  from .v2_serving import _ModelLogPusher
36
39
 
@@ -171,46 +174,6 @@ class BaseModelRouter(RouterToDict):
171
174
  """run tasks after processing the event"""
172
175
  return event
173
176
 
174
- def _get_background_task_status(
175
- self,
176
- ) -> mlrun.common.schemas.BackgroundTaskState:
177
- self._background_task_check_timestamp = now_date()
178
- server: mlrun.serving.GraphServer = getattr(
179
- self.context, "_server", None
180
- ) or getattr(self.context, "server", None)
181
- if not self.context.is_mock:
182
- if server.model_endpoint_creation_task_name:
183
- background_task = mlrun.get_run_db().get_project_background_task(
184
- server.project, server.model_endpoint_creation_task_name
185
- )
186
- logger.debug(
187
- "Checking model endpoint creation task status",
188
- task_name=server.model_endpoint_creation_task_name,
189
- )
190
- if (
191
- background_task.status.state
192
- in mlrun.common.schemas.BackgroundTaskState.terminal_states()
193
- ):
194
- logger.info(
195
- f"Model endpoint creation task completed with state {background_task.status.state}"
196
- )
197
- else: # in progress
198
- logger.info(
199
- f"Model endpoint creation task is still in progress with the current state: "
200
- f"{background_task.status.state}. Events will not be monitored for the next "
201
- f"{mlrun.mlconf.model_endpoint_monitoring.model_endpoint_creation_check_period} seconds",
202
- name=self.name,
203
- background_task_check_timestamp=self._background_task_check_timestamp.isoformat(),
204
- )
205
- return background_task.status.state
206
- else:
207
- logger.error(
208
- "Model endpoint creation task name not provided. This function is not being monitored.",
209
- )
210
- elif self.context.monitoring_mock:
211
- return mlrun.common.schemas.BackgroundTaskState.succeeded
212
- return mlrun.common.schemas.BackgroundTaskState.failed
213
-
214
177
  def _update_background_task_state(self, event):
215
178
  if not self.background_task_reached_terminal_state and (
216
179
  self._background_task_check_timestamp is None
@@ -219,7 +182,26 @@ class BaseModelRouter(RouterToDict):
219
182
  seconds=mlrun.mlconf.model_endpoint_monitoring.model_endpoint_creation_check_period
220
183
  )
221
184
  ):
222
- self._background_task_current_state = self._get_background_task_status()
185
+ server: mlrun.serving.GraphServer = getattr(
186
+ self.context, "_server", None
187
+ ) or getattr(self.context, "server", None)
188
+ if not self.context.is_mock:
189
+ (
190
+ self._background_task_current_state,
191
+ self._background_task_check_timestamp,
192
+ _,
193
+ ) = get_model_endpoints_creation_task_status(server)
194
+ elif self.context.monitoring_mock:
195
+ self._background_task_current_state = (
196
+ mlrun.common.schemas.BackgroundTaskState.succeeded
197
+ )
198
+ self._background_task_check_timestamp = mlrun.utils.now_date()
199
+ else:
200
+ self._background_task_current_state = (
201
+ mlrun.common.schemas.BackgroundTaskState.failed
202
+ )
203
+ self._background_task_check_timestamp = mlrun.utils.now_date()
204
+
223
205
  if event.body:
224
206
  event.body["background_task_state"] = (
225
207
  self._background_task_current_state
mlrun/serving/server.py CHANGED
@@ -417,6 +417,7 @@ def add_monitoring_general_steps(
417
417
  "mlrun.serving.system_steps.BackgroundTaskStatus",
418
418
  "background_task_status_step",
419
419
  model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
420
+ full_event=True,
420
421
  )
421
422
  monitor_flow_step = graph.add_step(
422
423
  "storey.Filter",
mlrun/serving/states.py CHANGED
@@ -39,7 +39,7 @@ import mlrun.common.schemas as schemas
39
39
  from mlrun.artifacts.llm_prompt import LLMPromptArtifact, PlaceholderDefaultDict
40
40
  from mlrun.artifacts.model import ModelArtifact
41
41
  from mlrun.datastore.datastore_profile import (
42
- DatastoreProfileKafkaSource,
42
+ DatastoreProfileKafkaStream,
43
43
  DatastoreProfileKafkaTarget,
44
44
  DatastoreProfileV3io,
45
45
  datastore_profile_read,
@@ -3398,7 +3398,7 @@ def _init_async_objects(context, steps):
3398
3398
  datastore_profile = datastore_profile_read(stream_path)
3399
3399
  if isinstance(
3400
3400
  datastore_profile,
3401
- (DatastoreProfileKafkaTarget, DatastoreProfileKafkaSource),
3401
+ (DatastoreProfileKafkaTarget, DatastoreProfileKafkaStream),
3402
3402
  ):
3403
3403
  step._async_object = KafkaStoreyTarget(
3404
3404
  path=stream_path,
@@ -3414,7 +3414,7 @@ def _init_async_objects(context, steps):
3414
3414
  else:
3415
3415
  raise mlrun.errors.MLRunValueError(
3416
3416
  f"Received an unexpected stream profile type: {type(datastore_profile)}\n"
3417
- "Expects `DatastoreProfileV3io` or `DatastoreProfileKafkaSource`."
3417
+ "Expects `DatastoreProfileV3io` or `DatastoreProfileKafkaStream`."
3418
3418
  )
3419
3419
  elif stream_path.startswith("kafka://") or kafka_brokers:
3420
3420
  topic, brokers = parse_kafka_url(stream_path, kafka_brokers)
@@ -11,7 +11,6 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
-
15
14
  import random
16
15
  from copy import copy
17
16
  from datetime import timedelta
@@ -25,10 +24,27 @@ import mlrun.artifacts
25
24
  import mlrun.common.schemas.model_monitoring as mm_schemas
26
25
  import mlrun.feature_store
27
26
  import mlrun.serving
27
+ from mlrun.common.model_monitoring.helpers import (
28
+ get_model_endpoints_creation_task_status,
29
+ )
28
30
  from mlrun.common.schemas import MonitoringData
29
31
  from mlrun.utils import get_data_from_path, logger
30
32
 
31
33
 
34
+ class MatchingEndpointsState(mlrun.common.types.StrEnum):
35
+ all_matched = "all_matched"
36
+ not_all_matched = "not_all_matched"
37
+ no_check_needed = "no_check_needed"
38
+ not_yet_checked = "not_yet_matched"
39
+
40
+ @staticmethod
41
+ def success_states() -> list[str]:
42
+ return [
43
+ MatchingEndpointsState.all_matched,
44
+ MatchingEndpointsState.no_check_needed,
45
+ ]
46
+
47
+
32
48
  class MonitoringPreProcessor(storey.MapClass):
33
49
  """preprocess step, reconstructs the serving output event body to StreamProcessingEvent schema"""
34
50
 
@@ -317,6 +333,9 @@ class BackgroundTaskStatus(storey.MapClass):
317
333
 
318
334
  def __init__(self, **kwargs):
319
335
  super().__init__(**kwargs)
336
+ self.matching_endpoints = MatchingEndpointsState.not_yet_checked
337
+ self.graph_model_endpoint_uids: set = set()
338
+ self.listed_model_endpoint_uids: set = set()
320
339
  self.server: mlrun.serving.GraphServer = (
321
340
  getattr(self.context, "server", None) if self.context else None
322
341
  )
@@ -337,43 +356,47 @@ class BackgroundTaskStatus(storey.MapClass):
337
356
  )
338
357
  )
339
358
  ):
340
- background_task = mlrun.get_run_db().get_project_background_task(
341
- self.server.project, self.server.model_endpoint_creation_task_name
342
- )
343
- self._background_task_check_timestamp = mlrun.utils.now_date()
344
- self._log_background_task_state(background_task.status.state)
345
- self._background_task_state = background_task.status.state
359
+ (
360
+ self._background_task_state,
361
+ self._background_task_check_timestamp,
362
+ self.listed_model_endpoint_uids,
363
+ ) = get_model_endpoints_creation_task_status(self.server)
364
+ if (
365
+ self.listed_model_endpoint_uids
366
+ and self.matching_endpoints == MatchingEndpointsState.not_yet_checked
367
+ ):
368
+ if not self.graph_model_endpoint_uids:
369
+ self.graph_model_endpoint_uids = collect_model_endpoint_uids(
370
+ self.server
371
+ )
372
+
373
+ if self.graph_model_endpoint_uids.issubset(self.listed_model_endpoint_uids):
374
+ self.matching_endpoints = MatchingEndpointsState.all_matched
375
+ elif self.listed_model_endpoint_uids is None:
376
+ self.matching_endpoints = MatchingEndpointsState.no_check_needed
346
377
 
347
378
  if (
348
379
  self._background_task_state
349
380
  == mlrun.common.schemas.BackgroundTaskState.succeeded
381
+ and self.matching_endpoints in MatchingEndpointsState.success_states()
350
382
  ):
351
383
  return event
352
384
  else:
353
385
  return None
354
386
 
355
- def _log_background_task_state(
356
- self, background_task_state: mlrun.common.schemas.BackgroundTaskState
357
- ):
358
- logger.info(
359
- "Checking model endpoint creation task status",
360
- task_name=self.server.model_endpoint_creation_task_name,
361
- )
362
- if (
363
- background_task_state
364
- in mlrun.common.schemas.BackgroundTaskState.terminal_states()
365
- ):
366
- logger.info(
367
- f"Model endpoint creation task completed with state {background_task_state}"
368
- )
369
- else: # in progress
370
- logger.info(
371
- f"Model endpoint creation task is still in progress with the current state: "
372
- f"{background_task_state}. Events will not be monitored for the next "
373
- f"{mlrun.mlconf.model_endpoint_monitoring.model_endpoint_creation_check_period} seconds",
374
- name=self.name,
375
- background_task_check_timestamp=self._background_task_check_timestamp.isoformat(),
376
- )
387
+
388
+ def collect_model_endpoint_uids(server: mlrun.serving.GraphServer) -> set[str]:
389
+ """Collects all model endpoint UIDs from the server's graph steps."""
390
+ model_endpoint_uids = set()
391
+ for step in server.graph.steps.values():
392
+ if hasattr(step, "monitoring_data"):
393
+ for model in step.monitoring_data.keys():
394
+ uid = step.monitoring_data[model].get(
395
+ mlrun.common.schemas.MonitoringData.MODEL_ENDPOINT_UID
396
+ )
397
+ if uid:
398
+ model_endpoint_uids.add(uid)
399
+ return model_endpoint_uids
377
400
 
378
401
 
379
402
  class SamplingStep(storey.MapClass):
@@ -24,6 +24,9 @@ import mlrun.common.schemas.model_monitoring
24
24
  import mlrun.model_monitoring
25
25
  from mlrun.utils import logger, now_date
26
26
 
27
+ from ..common.model_monitoring.helpers import (
28
+ get_model_endpoints_creation_task_status,
29
+ )
27
30
  from .utils import StepToDict, _extract_input_data, _update_result_body
28
31
 
29
32
 
@@ -474,22 +477,18 @@ class V2ModelServer(StepToDict):
474
477
  ) or getattr(self.context, "server", None)
475
478
  if not self.context.is_mock or self.context.monitoring_mock:
476
479
  if server.model_endpoint_creation_task_name:
477
- background_task = mlrun.get_run_db().get_project_background_task(
478
- server.project, server.model_endpoint_creation_task_name
479
- )
480
- logger.debug(
481
- "Checking model endpoint creation task status",
482
- task_name=server.model_endpoint_creation_task_name,
480
+ background_task_state, _, _ = get_model_endpoints_creation_task_status(
481
+ server
483
482
  )
484
483
  if (
485
- background_task.status.state
484
+ background_task_state
486
485
  in mlrun.common.schemas.BackgroundTaskState.terminal_states()
487
486
  ):
488
487
  logger.debug(
489
- f"Model endpoint creation task completed with state {background_task.status.state}"
488
+ f"Model endpoint creation task completed with state {background_task_state}"
490
489
  )
491
490
  if (
492
- background_task.status.state
491
+ background_task_state
493
492
  == mlrun.common.schemas.BackgroundTaskState.succeeded
494
493
  ):
495
494
  self._model_logger = (
@@ -504,7 +503,7 @@ class V2ModelServer(StepToDict):
504
503
  else: # in progress
505
504
  logger.debug(
506
505
  f"Model endpoint creation task is still in progress with the current state: "
507
- f"{background_task.status.state}.",
506
+ f"{background_task_state}.",
508
507
  name=self.name,
509
508
  )
510
509
  else:
mlrun/utils/helpers.py CHANGED
@@ -804,7 +804,7 @@ def remove_tag_from_artifact_uri(uri: str) -> Optional[str]:
804
804
 
805
805
  def extend_hub_uri_if_needed(uri) -> tuple[str, bool]:
806
806
  """
807
- Retrieve the full uri of the item's yaml in the hub.
807
+ Retrieve the full uri of the function's yaml in the hub.
808
808
 
809
809
  :param uri: structure: "hub://[<source>/]<item-name>[:<tag>]"
810
810
 
@@ -845,7 +845,10 @@ def extend_hub_uri_if_needed(uri) -> tuple[str, bool]:
845
845
  # hub function directory name are with underscores instead of hyphens
846
846
  name = name.replace("-", "_")
847
847
  function_suffix = f"{name}/{tag}/src/function.yaml"
848
- return indexed_source.source.get_full_uri(function_suffix), is_hub_uri
848
+ function_type = mlrun.common.schemas.hub.HubSourceType.functions
849
+ return indexed_source.source.get_full_uri(
850
+ function_suffix, function_type
851
+ ), is_hub_uri
849
852
 
850
853
 
851
854
  def gen_md_table(header, rows=None):
@@ -1,4 +1,4 @@
1
1
  {
2
- "git_commit": "93eae062e788304f5adff8f128f49db8f1f346af",
3
- "version": "1.10.0-rc25"
2
+ "git_commit": "fc51af08faf3c93220de4d619b679cf1950ba5ed",
3
+ "version": "1.10.0-rc26"
4
4
  }