mlrun 1.10.0rc8__py3-none-any.whl → 1.10.0rc10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (44) hide show
  1. mlrun/common/constants.py +1 -0
  2. mlrun/common/db/dialects.py +25 -0
  3. mlrun/common/schemas/__init__.py +1 -0
  4. mlrun/common/schemas/function.py +1 -0
  5. mlrun/common/schemas/model_monitoring/model_endpoints.py +8 -0
  6. mlrun/common/schemas/partition.py +13 -3
  7. mlrun/common/schemas/workflow.py +7 -0
  8. mlrun/datastore/utils.py +0 -1
  9. mlrun/db/__init__.py +1 -0
  10. mlrun/db/base.py +17 -0
  11. mlrun/db/nopdb.py +9 -0
  12. mlrun/db/sql_types.py +160 -0
  13. mlrun/frameworks/tf_keras/mlrun_interface.py +4 -1
  14. mlrun/frameworks/tf_keras/model_handler.py +23 -3
  15. mlrun/launcher/base.py +0 -1
  16. mlrun/launcher/client.py +0 -1
  17. mlrun/launcher/local.py +0 -4
  18. mlrun/model_monitoring/applications/base.py +21 -1
  19. mlrun/model_monitoring/applications/context.py +2 -1
  20. mlrun/projects/__init__.py +1 -0
  21. mlrun/projects/pipelines.py +36 -0
  22. mlrun/projects/project.py +0 -13
  23. mlrun/runtimes/daskjob.py +0 -2
  24. mlrun/runtimes/kubejob.py +0 -4
  25. mlrun/runtimes/mpijob/abstract.py +0 -2
  26. mlrun/runtimes/mpijob/v1.py +0 -2
  27. mlrun/runtimes/nuclio/function.py +0 -2
  28. mlrun/runtimes/nuclio/serving.py +0 -46
  29. mlrun/runtimes/pod.py +0 -3
  30. mlrun/runtimes/remotesparkjob.py +0 -2
  31. mlrun/runtimes/sparkjob/spark3job.py +0 -2
  32. mlrun/serving/routers.py +17 -13
  33. mlrun/serving/server.py +3 -97
  34. mlrun/serving/system_steps.py +2 -1
  35. mlrun/serving/v2_serving.py +2 -2
  36. mlrun/utils/helpers.py +1 -1
  37. mlrun/utils/version/version.json +2 -2
  38. {mlrun-1.10.0rc8.dist-info → mlrun-1.10.0rc10.dist-info}/METADATA +15 -12
  39. {mlrun-1.10.0rc8.dist-info → mlrun-1.10.0rc10.dist-info}/RECORD +43 -42
  40. {mlrun-1.10.0rc8.dist-info → mlrun-1.10.0rc10.dist-info}/licenses/LICENSE +1 -1
  41. mlrun/common/db/sql_session.py +0 -79
  42. {mlrun-1.10.0rc8.dist-info → mlrun-1.10.0rc10.dist-info}/WHEEL +0 -0
  43. {mlrun-1.10.0rc8.dist-info → mlrun-1.10.0rc10.dist-info}/entry_points.txt +0 -0
  44. {mlrun-1.10.0rc8.dist-info → mlrun-1.10.0rc10.dist-info}/top_level.txt +0 -0
mlrun/common/constants.py CHANGED
@@ -76,6 +76,7 @@ class MLRunInternalLabels:
76
76
  kind = "kind"
77
77
  component = "component"
78
78
  mlrun_type = "mlrun__type"
79
+ original_workflow_id = "original-workflow-id"
79
80
 
80
81
  owner = "owner"
81
82
  v3io_user = "v3io_user"
@@ -0,0 +1,25 @@
1
+ # Copyright 2025 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import mlrun.common.types
15
+
16
+
17
+ class Dialects(mlrun.common.types.StrEnum):
18
+ MYSQL = "mysql"
19
+ POSTGRESQL = "postgresql"
20
+ SQLITE = "sqlite"
21
+
22
+ @classmethod
23
+ def all(cls) -> list[str]:
24
+ """Return all dialects as a list of strings."""
25
+ return [dialect.value for dialect in cls]
@@ -218,6 +218,7 @@ from .serving import ModelRunnerStepData, MonitoringData
218
218
  from .tag import Tag, TagObjects
219
219
  from .workflow import (
220
220
  GetWorkflowResponse,
221
+ RerunWorkflowRequest,
221
222
  WorkflowRequest,
222
223
  WorkflowResponse,
223
224
  WorkflowSpec,
@@ -47,6 +47,7 @@ class FunctionState:
47
47
 
48
48
  # for pipeline steps
49
49
  skipped = "skipped"
50
+ initialized = "initialized"
50
51
 
51
52
  @classmethod
52
53
  def get_function_state_from_pod_state(cls, pod_state: str):
@@ -15,7 +15,9 @@ import abc
15
15
  import json
16
16
  from datetime import datetime
17
17
  from typing import Any, NamedTuple, Optional, TypeVar
18
+ from uuid import UUID
18
19
 
20
+ from pydantic import validator # use `validator` if you’re still on Pydantic v1
19
21
  from pydantic.v1 import BaseModel, Field, constr
20
22
 
21
23
  # TODO: remove the unused import below after `mlrun.datastore` and `mlrun.utils` usage is removed.
@@ -121,6 +123,12 @@ class ModelEndpointMetadata(ObjectMetadata, ModelEndpointParser):
121
123
  def mutable_fields(cls):
122
124
  return ["labels"]
123
125
 
126
+ @validator("uid", pre=True)
127
+ def _uid_to_str(cls, v): # noqa: N805
128
+ if isinstance(v, UUID):
129
+ return str(v)
130
+ return v
131
+
124
132
 
125
133
  class ModelEndpointSpec(ObjectSpec, ModelEndpointParser):
126
134
  model_class: Optional[str] = ""
@@ -14,10 +14,10 @@
14
14
 
15
15
  from datetime import datetime, timedelta
16
16
 
17
- from mlrun.common.types import StrEnum
17
+ import mlrun.common.types
18
18
 
19
19
 
20
- class PartitionInterval(StrEnum):
20
+ class PartitionInterval(mlrun.common.types.StrEnum):
21
21
  DAY = "DAY"
22
22
  MONTH = "MONTH"
23
23
  YEARWEEK = "YEARWEEK"
@@ -44,6 +44,8 @@ class PartitionInterval(StrEnum):
44
44
  return timedelta(days=30)
45
45
  elif self == PartitionInterval.YEARWEEK:
46
46
  return timedelta(weeks=1)
47
+ else:
48
+ raise ValueError(f"Unsupported PartitionInterval: {self}")
47
49
 
48
50
  @classmethod
49
51
  def from_expression(cls, partition_expression: str):
@@ -83,7 +85,7 @@ class PartitionInterval(StrEnum):
83
85
  current_datetime = start_datetime
84
86
 
85
87
  for _ in range(partition_number):
86
- partition_name = self.get_partition_name(current_datetime)
88
+ partition_name = f"p{self.get_partition_name(current_datetime)}"
87
89
  partition_boundary_date = self.get_next_partition_time(current_datetime)
88
90
  partition_value = self.get_partition_name(partition_boundary_date)
89
91
  partitioning_information_list.append((partition_name, partition_value))
@@ -109,6 +111,8 @@ class PartitionInterval(StrEnum):
109
111
  return (current_datetime.replace(day=1) + timedelta(days=32)).replace(day=1)
110
112
  elif self == PartitionInterval.YEARWEEK:
111
113
  return current_datetime + timedelta(weeks=1)
114
+ else:
115
+ raise ValueError(f"Unsupported PartitionInterval: {self}")
112
116
 
113
117
  def get_partition_name(self, current_datetime: datetime) -> str:
114
118
  if self == PartitionInterval.DAY:
@@ -118,6 +122,8 @@ class PartitionInterval(StrEnum):
118
122
  elif self == PartitionInterval.YEARWEEK:
119
123
  year, week, _ = current_datetime.isocalendar()
120
124
  return f"{year}{week:02d}"
125
+ else:
126
+ raise ValueError(f"Unsupported PartitionInterval: {self}")
121
127
 
122
128
  def get_partition_expression(self, column_name: str):
123
129
  if self == PartitionInterval.YEARWEEK:
@@ -130,6 +136,8 @@ class PartitionInterval(StrEnum):
130
136
  # generates value in format %Y%m in mysql
131
137
  # mysql query example: `select YEAR(NOW())*100 + MONTH(NOW());`
132
138
  return f"YEAR({column_name}) * 100 + MONTH({column_name})"
139
+ else:
140
+ raise ValueError(f"Unsupported PartitionInterval: {self}")
133
141
 
134
142
  def get_number_of_partitions(self, days: int) -> int:
135
143
  # Calculate the number partitions based on given number of days
@@ -140,3 +148,5 @@ class PartitionInterval(StrEnum):
140
148
  return int(days / 30.44)
141
149
  elif self == PartitionInterval.YEARWEEK:
142
150
  return int(days / 7)
151
+ else:
152
+ raise ValueError(f"Unsupported PartitionInterval: {self}")
@@ -46,6 +46,13 @@ class WorkflowRequest(pydantic.v1.BaseModel):
46
46
  notifications: typing.Optional[list[Notification]] = None
47
47
 
48
48
 
49
+ class RerunWorkflowRequest(pydantic.v1.BaseModel):
50
+ run_name: typing.Optional[str] = (None,)
51
+ run_id: typing.Optional[str] = (None,)
52
+ notifications: typing.Optional[list[Notification]] = None
53
+ workflow_runner_node_selector: typing.Optional[dict[str, str]] = None
54
+
55
+
49
56
  class WorkflowResponse(pydantic.v1.BaseModel):
50
57
  project: str = None
51
58
  name: str = None
mlrun/datastore/utils.py CHANGED
@@ -150,7 +150,6 @@ def _generate_sql_query_with_time_filter(
150
150
  table = sqlalchemy.Table(
151
151
  table_name,
152
152
  sqlalchemy.MetaData(),
153
- autoload=True,
154
153
  autoload_with=engine,
155
154
  )
156
155
  query = sqlalchemy.select(table)
mlrun/db/__init__.py CHANGED
@@ -14,6 +14,7 @@
14
14
  from os import environ
15
15
 
16
16
  from ..config import config
17
+ from . import sql_types
17
18
  from .base import RunDBError, RunDBInterface # noqa
18
19
 
19
20
 
mlrun/db/base.py CHANGED
@@ -638,6 +638,16 @@ class RunDBInterface(ABC):
638
638
  ):
639
639
  pass
640
640
 
641
+ @abstractmethod
642
+ def retry_pipeline(
643
+ self,
644
+ run_id: str,
645
+ project: str,
646
+ namespace: Optional[str] = None,
647
+ timeout: int = 30,
648
+ ):
649
+ pass
650
+
641
651
  @abstractmethod
642
652
  def list_project_secrets(
643
653
  self,
@@ -1034,6 +1044,13 @@ class RunDBInterface(ABC):
1034
1044
  ):
1035
1045
  pass
1036
1046
 
1047
+ def get_project_background_task(
1048
+ self,
1049
+ project: str,
1050
+ name: str,
1051
+ ) -> mlrun.common.schemas.BackgroundTask:
1052
+ pass
1053
+
1037
1054
  @abstractmethod
1038
1055
  def submit_workflow(
1039
1056
  self,
mlrun/db/nopdb.py CHANGED
@@ -524,6 +524,15 @@ class NopDB(RunDBInterface):
524
524
  ):
525
525
  pass
526
526
 
527
+ def retry_pipeline(
528
+ self,
529
+ run_id: str,
530
+ project: str,
531
+ namespace: Optional[str] = None,
532
+ timeout: int = 30,
533
+ ):
534
+ pass
535
+
527
536
  def list_pipelines(
528
537
  self,
529
538
  project: str,
mlrun/db/sql_types.py ADDED
@@ -0,0 +1,160 @@
1
+ # Copyright 2025 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """
15
+ This module provides SQLAlchemy TypeDecorator subclasses that are aware of
16
+ database dialects (MySQL, PostgreSQL, SQLite) and automatically select
17
+ appropriate native types (e.g., UUID, BLOB, TIMESTAMP with precision) or
18
+ fallbacks (e.g., hex-string storage) to ensure consistent behavior across
19
+ different database backends.
20
+ """
21
+
22
+ import uuid
23
+ from typing import Any, Optional, Union
24
+
25
+ import sqlalchemy.types
26
+ from sqlalchemy import CHAR, Text
27
+ from sqlalchemy.dialects.mysql import DATETIME as MYSQL_DATETIME
28
+ from sqlalchemy.dialects.mysql import MEDIUMBLOB
29
+ from sqlalchemy.dialects.postgresql import BYTEA
30
+ from sqlalchemy.dialects.postgresql import TIMESTAMP as PG_TIMESTAMP
31
+ from sqlalchemy.dialects.postgresql import UUID as PG_UUID
32
+ from sqlalchemy.engine.interfaces import Dialect
33
+ from sqlalchemy.types import TypeDecorator
34
+
35
+ import mlrun.common.db.dialects
36
+
37
+
38
+ class DateTime(TypeDecorator):
39
+ impl = sqlalchemy.types.DateTime
40
+ cache_ok = True
41
+ precision: int = 3
42
+
43
+ def load_dialect_impl(
44
+ self,
45
+ dialect: Dialect,
46
+ ) -> sqlalchemy.types.TypeEngine:
47
+ if dialect.name == mlrun.common.db.dialects.Dialects.MYSQL:
48
+ return dialect.type_descriptor(
49
+ MYSQL_DATETIME(
50
+ fsp=self.precision,
51
+ timezone=True,
52
+ )
53
+ )
54
+ if dialect.name == mlrun.common.db.dialects.Dialects.POSTGRESQL:
55
+ return dialect.type_descriptor(
56
+ PG_TIMESTAMP(
57
+ precision=self.precision,
58
+ timezone=True,
59
+ )
60
+ )
61
+ return dialect.type_descriptor(sqlalchemy.types.DateTime)
62
+
63
+
64
+ class MicroSecondDateTime(DateTime):
65
+ cache_ok = True
66
+ precision: int = 6
67
+
68
+
69
+ class Blob(TypeDecorator):
70
+ impl = sqlalchemy.types.LargeBinary
71
+ cache_ok = True
72
+
73
+ def load_dialect_impl(
74
+ self,
75
+ dialect: Dialect,
76
+ ) -> sqlalchemy.types.TypeEngine:
77
+ if dialect.name == mlrun.common.db.dialects.Dialects.MYSQL:
78
+ return dialect.type_descriptor(MEDIUMBLOB)
79
+ if dialect.name == mlrun.common.db.dialects.Dialects.POSTGRESQL:
80
+ return dialect.type_descriptor(BYTEA)
81
+ return dialect.type_descriptor(self.impl)
82
+
83
+
84
+ class Utf8BinText(TypeDecorator):
85
+ impl = Text
86
+ cache_ok = True
87
+
88
+ def load_dialect_impl(
89
+ self,
90
+ dialect: Dialect,
91
+ ) -> sqlalchemy.types.TypeEngine:
92
+ if dialect.name == mlrun.common.db.dialects.Dialects.MYSQL:
93
+ return dialect.type_descriptor(
94
+ sqlalchemy.dialects.mysql.VARCHAR(
95
+ collation="utf8_bin",
96
+ length=255,
97
+ )
98
+ )
99
+ if dialect.name == mlrun.common.db.dialects.Dialects.POSTGRESQL:
100
+ # This collation is created as part of the database creation
101
+ return dialect.type_descriptor(
102
+ Text(
103
+ collation="utf8_bin",
104
+ )
105
+ )
106
+ if dialect.name == mlrun.common.db.dialects.Dialects.SQLITE:
107
+ return dialect.type_descriptor(
108
+ Text(
109
+ collation="BINARY",
110
+ )
111
+ )
112
+ return dialect.type_descriptor(self.impl)
113
+
114
+
115
+ class UuidType(TypeDecorator):
116
+ """
117
+ A UUID type which stores as native UUID on Postgres (as_uuid=True)
118
+ and as 32-char hex strings on other dialects.
119
+ """
120
+
121
+ impl = CHAR(32)
122
+ cache_ok = True
123
+
124
+ def load_dialect_impl(self, dialect: Dialect) -> sqlalchemy.types.TypeEngine:
125
+ if dialect.name == mlrun.common.db.dialects.Dialects.POSTGRESQL:
126
+ return dialect.type_descriptor(PG_UUID(as_uuid=True))
127
+ return dialect.type_descriptor(CHAR(32))
128
+
129
+ def process_bind_param(
130
+ self,
131
+ value: Optional[Union[uuid.UUID, str]],
132
+ dialect: Dialect,
133
+ ) -> Optional[Union[uuid.UUID, str]]:
134
+ if value is None:
135
+ return None
136
+ if isinstance(value, uuid.UUID):
137
+ return (
138
+ value
139
+ if dialect.name == mlrun.common.db.dialects.Dialects.POSTGRESQL
140
+ else value.hex
141
+ )
142
+ if isinstance(value, str):
143
+ u = uuid.UUID(value)
144
+ return (
145
+ u
146
+ if dialect.name == mlrun.common.db.dialects.Dialects.POSTGRESQL
147
+ else u.hex
148
+ )
149
+ raise ValueError(f"Cannot bind UUID value {value!r}")
150
+
151
+ def process_result_value(
152
+ self, value: Optional[Union[uuid.UUID, bytes, str]], dialect: Dialect
153
+ ) -> Optional[uuid.UUID]:
154
+ if value is None:
155
+ return None
156
+ return value if isinstance(value, uuid.UUID) else uuid.UUID(value)
157
+
158
+ def coerce_compared_value(self, op: Any, value: Any) -> TypeDecorator:
159
+ # ensure STR comparisons are coerced through this type
160
+ return self
@@ -280,7 +280,10 @@ class TFKerasMLRunInterface(MLRunInterface, ABC):
280
280
  print(f"Horovod worker #{self._hvd.rank()} is using CPU")
281
281
 
282
282
  # Adjust learning rate based on the number of GPUs:
283
- optimizer.lr = optimizer.lr * self._hvd.size()
283
+ if hasattr(optimizer, "lr"):
284
+ optimizer.lr *= self._hvd.size()
285
+ else:
286
+ optimizer.learning_rate *= self._hvd.size()
284
287
 
285
288
  # Wrap the optimizer in horovod's distributed optimizer: 'hvd.DistributedOptimizer'.
286
289
  optimizer = self._hvd.DistributedOptimizer(optimizer)
@@ -518,7 +518,6 @@ class TFKerasModelHandler(DLModelHandler):
518
518
  )
519
519
 
520
520
  # Read additional files according to the model format used:
521
- # # ModelFormats.SAVED_MODEL - Unzip the SavedModel archive:
522
521
  if self._model_format == TFKerasModelHandler.ModelFormats.SAVED_MODEL:
523
522
  # Unzip the SavedModel directory:
524
523
  with zipfile.ZipFile(self._model_file, "r") as zip_file:
@@ -527,11 +526,18 @@ class TFKerasModelHandler(DLModelHandler):
527
526
  self._model_file = os.path.join(
528
527
  os.path.dirname(self._model_file), self._model_name
529
528
  )
530
- # # ModelFormats.JSON_ARCHITECTURE_H5_WEIGHTS - Get the weights file:
531
- elif (
529
+ elif self._model_format == TFKerasModelHandler.ModelFormats.KERAS:
530
+ # Rename the model file suffix:
531
+ self._rename_model_file_suffix(suffix="keras")
532
+ elif self._model_format == TFKerasModelHandler.ModelFormats.H5:
533
+ # Rename the model file suffix:
534
+ self._rename_model_file_suffix(suffix="h5")
535
+ elif ( # ModelFormats.JSON_ARCHITECTURE_H5_WEIGHTS
532
536
  self._model_format
533
537
  == TFKerasModelHandler.ModelFormats.JSON_ARCHITECTURE_H5_WEIGHTS
534
538
  ):
539
+ # Rename the model file suffix:
540
+ self._rename_model_file_suffix(suffix="json")
535
541
  # Get the weights file:
536
542
  self._weights_file = self._extra_data[
537
543
  self._get_weights_file_artifact_name()
@@ -540,6 +546,20 @@ class TFKerasModelHandler(DLModelHandler):
540
546
  # Continue collecting from abstract class:
541
547
  super()._collect_files_from_store_object()
542
548
 
549
+ def _rename_model_file_suffix(self, suffix: str):
550
+ """
551
+ Rename the model file suffix to the given one.
552
+
553
+ This is used for the case of loading a model from a store object that was saved with a different suffix as when
554
+ keras tries to load it, it validates the suffix. The `artifacts.model.get_model` function is downloading the
555
+ file to a temp file with a `pkl` suffix, so it needs to be replaced:than the one keras expects.
556
+
557
+ :param suffix: The suffix to rename the model file to (without the trailing dot).
558
+ """
559
+ new_name = self._model_file.rsplit(".", 1)[0] + f".{suffix}"
560
+ os.rename(self._model_file, new_name)
561
+ self._model_file = new_name
562
+
543
563
  def _collect_files_from_local_path(self):
544
564
  """
545
565
  If the model path given is of a local path, search for the needed model files and collect them into this handler
mlrun/launcher/base.py CHANGED
@@ -82,7 +82,6 @@ class BaseLauncher(abc.ABC):
82
82
  runtime: "mlrun.runtimes.base.BaseRuntime",
83
83
  project_name: Optional[str] = "",
84
84
  full: bool = True,
85
- client_version: str = "",
86
85
  ):
87
86
  pass
88
87
 
mlrun/launcher/client.py CHANGED
@@ -36,7 +36,6 @@ class ClientBaseLauncher(launcher.BaseLauncher, abc.ABC):
36
36
  runtime: "mlrun.runtimes.base.BaseRuntime",
37
37
  project_name: Optional[str] = "",
38
38
  full: bool = True,
39
- client_version: str = "",
40
39
  ):
41
40
  runtime.try_auto_mount_based_on_config()
42
41
  runtime._fill_credentials()
mlrun/launcher/local.py CHANGED
@@ -13,7 +13,6 @@
13
13
  # limitations under the License.
14
14
  import os
15
15
  import pathlib
16
- from os import environ
17
16
  from typing import Callable, Optional, Union
18
17
 
19
18
  import mlrun.common.constants as mlrun_constants
@@ -252,9 +251,6 @@ class ClientLocalLauncher(launcher.ClientBaseLauncher):
252
251
  # copy the code/base-spec to the local function (for the UI and code logging)
253
252
  fn.spec.description = runtime.spec.description
254
253
  fn.spec.build = runtime.spec.build
255
- serving_spec = getattr(runtime.spec, "serving_spec", None)
256
- if serving_spec:
257
- environ["SERVING_SPEC_ENV"] = serving_spec
258
254
 
259
255
  run.spec.handler = handler
260
256
  run.spec.reset_on_run = reset_on_run
@@ -166,13 +166,29 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
166
166
  return result
167
167
 
168
168
  @staticmethod
169
+ def _check_writer_is_up(project: "mlrun.MlrunProject") -> None:
170
+ try:
171
+ project.get_function(
172
+ mm_constants.MonitoringFunctionNames.WRITER, ignore_cache=True
173
+ )
174
+ except mlrun.errors.MLRunNotFoundError:
175
+ raise mlrun.errors.MLRunValueError(
176
+ "Writing outputs to the databases is blocked as the model monitoring infrastructure is disabled.\n"
177
+ "To unblock, enable model monitoring with `project.enable_model_monitoring()`."
178
+ )
179
+
180
+ @classmethod
169
181
  @contextmanager
170
182
  def _push_to_writer(
183
+ cls,
171
184
  *,
172
185
  write_output: bool,
173
186
  stream_profile: Optional[ds_profile.DatastoreProfile],
187
+ project: "mlrun.MlrunProject",
174
188
  ) -> Iterator[dict[str, list[tuple]]]:
175
189
  endpoints_output: dict[str, list[tuple]] = defaultdict(list)
190
+ if write_output:
191
+ cls._check_writer_is_up(project)
176
192
  try:
177
193
  yield endpoints_output
178
194
  finally:
@@ -220,6 +236,9 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
220
236
  for an MLRun job.
221
237
  This method should not be called directly.
222
238
  """
239
+ project = context.get_project_object()
240
+ if not project:
241
+ raise mlrun.errors.MLRunValueError("Could not load project from context")
223
242
 
224
243
  if write_output and (
225
244
  not endpoints or sample_data is not None or reference_data is not None
@@ -236,7 +255,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
236
255
  )
237
256
 
238
257
  with self._push_to_writer(
239
- write_output=write_output, stream_profile=stream_profile
258
+ write_output=write_output, stream_profile=stream_profile, project=project
240
259
  ) as endpoints_output:
241
260
 
242
261
  def call_do_tracking(event: Optional[dict] = None):
@@ -249,6 +268,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
249
268
  event=event,
250
269
  application_name=self.__class__.__name__,
251
270
  context=context,
271
+ project=project,
252
272
  sample_df=sample_data,
253
273
  feature_stats=feature_stats,
254
274
  )
@@ -137,13 +137,14 @@ class MonitoringApplicationContext:
137
137
  cls,
138
138
  context: "mlrun.MLClientCtx",
139
139
  *,
140
+ project: Optional["mlrun.MlrunProject"] = None,
140
141
  application_name: str,
141
142
  event: dict[str, Any],
142
143
  model_endpoint_dict: Optional[dict[str, ModelEndpoint]] = None,
143
144
  sample_df: Optional[pd.DataFrame] = None,
144
145
  feature_stats: Optional[FeatureStats] = None,
145
146
  ) -> "MonitoringApplicationContext":
146
- project = context.get_project_object()
147
+ project = project or context.get_project_object()
147
148
  if not project:
148
149
  raise mlrun.errors.MLRunValueError("Could not load project from context")
149
150
  logger = context.logger
@@ -32,6 +32,7 @@ from .pipelines import (
32
32
  load_and_run_workflow,
33
33
  load_and_run,
34
34
  pipeline_context,
35
+ rerun_workflow,
35
36
  ) # noqa
36
37
  from .project import (
37
38
  MlrunProject,
@@ -1070,6 +1070,40 @@ def github_webhook(request):
1070
1070
  return {"msg": "pushed"}
1071
1071
 
1072
1072
 
1073
+ def rerun_workflow(
1074
+ context: mlrun.execution.MLClientCtx, run_uid: str, project_name: str
1075
+ ):
1076
+ """
1077
+ Re-run a workflow by retrying a previously failed KFP pipeline.
1078
+
1079
+ :param context: MLRun context.
1080
+ :param run_uid: The run UID of the original workflow to retry.
1081
+ :param project_name: The project name.
1082
+ """
1083
+
1084
+ try:
1085
+ # TODO in followups: handle start and running notifications
1086
+
1087
+ # Retry the pipeline - TODO: add submit-direct flag when created
1088
+ db = mlrun.get_run_db()
1089
+ new_pipeline_id = db.retry_pipeline(run_uid, project_name)
1090
+
1091
+ # Store result for observability
1092
+ context.set_label("workflow-id", new_pipeline_id)
1093
+ context.log_result("workflow_id", new_pipeline_id)
1094
+
1095
+ # wait for pipeline completion so monitor will push terminal notifications
1096
+ wait_for_pipeline_completion(
1097
+ new_pipeline_id,
1098
+ project=project_name,
1099
+ )
1100
+
1101
+ # Temporary exception
1102
+ except Exception as exc:
1103
+ context.logger.error("Failed to rerun workflow", exc=err_to_str(exc))
1104
+ raise
1105
+
1106
+
1073
1107
  def load_and_run(context, *args, **kwargs):
1074
1108
  """
1075
1109
  This function serves as an alias to `load_and_run_workflow`,
@@ -1153,6 +1187,7 @@ def load_and_run_workflow(
1153
1187
  project = mlrun.get_or_create_project(
1154
1188
  context=project_context or f"./{project_name}",
1155
1189
  name=project_name,
1190
+ allow_cross_project=True,
1156
1191
  )
1157
1192
 
1158
1193
  # extract "start" notification if exists
@@ -1245,6 +1280,7 @@ def pull_remote_project_files(
1245
1280
  subpath=subpath,
1246
1281
  clone=clone,
1247
1282
  save=False,
1283
+ allow_cross_project=True,
1248
1284
  )
1249
1285
  except Exception as error:
1250
1286
  notify_scheduled_workflow_failure(
mlrun/projects/project.py CHANGED
@@ -2961,19 +2961,6 @@ class MlrunProject(ModelObj):
2961
2961
  mlrun.db.get_run_db().delete_function(name=name, project=self.metadata.name)
2962
2962
  self.spec.remove_function(name)
2963
2963
 
2964
- def remove_model_monitoring_function(self, name: Union[str, list[str]]):
2965
- """delete the specified model-monitoring-app function/s
2966
-
2967
- :param name: name of the model-monitoring-function/s (under the project)
2968
- """
2969
- # TODO: Remove this in 1.10.0
2970
- warnings.warn(
2971
- "'remove_model_monitoring_function' is deprecated in 1.7.0 and will be removed in 1.10.0. "
2972
- "Please use `delete_model_monitoring_function` instead.",
2973
- FutureWarning,
2974
- )
2975
- self.delete_model_monitoring_function(name)
2976
-
2977
2964
  def delete_model_monitoring_function(self, name: Union[str, list[str]]):
2978
2965
  """delete the specified model-monitoring-app function/s
2979
2966
 
mlrun/runtimes/daskjob.py CHANGED
@@ -92,7 +92,6 @@ class DaskSpec(KubeResourceSpec):
92
92
  preemption_mode=None,
93
93
  security_context=None,
94
94
  state_thresholds=None,
95
- serving_spec=None,
96
95
  ):
97
96
  super().__init__(
98
97
  command=command,
@@ -122,7 +121,6 @@ class DaskSpec(KubeResourceSpec):
122
121
  preemption_mode=preemption_mode,
123
122
  security_context=security_context,
124
123
  state_thresholds=state_thresholds,
125
- serving_spec=serving_spec,
126
124
  )
127
125
  self.args = args
128
126
 
mlrun/runtimes/kubejob.py CHANGED
@@ -207,7 +207,3 @@ class KubejobRuntime(KubeResource):
207
207
  raise NotImplementedError(
208
208
  f"Running a {self.kind} function from the client is not supported. Use .run() to submit the job to the API."
209
209
  )
210
-
211
- @property
212
- def serving_spec(self):
213
- return self.spec.serving_spec