mlrun 1.10.0rc8__py3-none-any.whl → 1.10.0rc10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/common/constants.py +1 -0
- mlrun/common/db/dialects.py +25 -0
- mlrun/common/schemas/__init__.py +1 -0
- mlrun/common/schemas/function.py +1 -0
- mlrun/common/schemas/model_monitoring/model_endpoints.py +8 -0
- mlrun/common/schemas/partition.py +13 -3
- mlrun/common/schemas/workflow.py +7 -0
- mlrun/datastore/utils.py +0 -1
- mlrun/db/__init__.py +1 -0
- mlrun/db/base.py +17 -0
- mlrun/db/nopdb.py +9 -0
- mlrun/db/sql_types.py +160 -0
- mlrun/frameworks/tf_keras/mlrun_interface.py +4 -1
- mlrun/frameworks/tf_keras/model_handler.py +23 -3
- mlrun/launcher/base.py +0 -1
- mlrun/launcher/client.py +0 -1
- mlrun/launcher/local.py +0 -4
- mlrun/model_monitoring/applications/base.py +21 -1
- mlrun/model_monitoring/applications/context.py +2 -1
- mlrun/projects/__init__.py +1 -0
- mlrun/projects/pipelines.py +36 -0
- mlrun/projects/project.py +0 -13
- mlrun/runtimes/daskjob.py +0 -2
- mlrun/runtimes/kubejob.py +0 -4
- mlrun/runtimes/mpijob/abstract.py +0 -2
- mlrun/runtimes/mpijob/v1.py +0 -2
- mlrun/runtimes/nuclio/function.py +0 -2
- mlrun/runtimes/nuclio/serving.py +0 -46
- mlrun/runtimes/pod.py +0 -3
- mlrun/runtimes/remotesparkjob.py +0 -2
- mlrun/runtimes/sparkjob/spark3job.py +0 -2
- mlrun/serving/routers.py +17 -13
- mlrun/serving/server.py +3 -97
- mlrun/serving/system_steps.py +2 -1
- mlrun/serving/v2_serving.py +2 -2
- mlrun/utils/helpers.py +1 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc8.dist-info → mlrun-1.10.0rc10.dist-info}/METADATA +15 -12
- {mlrun-1.10.0rc8.dist-info → mlrun-1.10.0rc10.dist-info}/RECORD +43 -42
- {mlrun-1.10.0rc8.dist-info → mlrun-1.10.0rc10.dist-info}/licenses/LICENSE +1 -1
- mlrun/common/db/sql_session.py +0 -79
- {mlrun-1.10.0rc8.dist-info → mlrun-1.10.0rc10.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc8.dist-info → mlrun-1.10.0rc10.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc8.dist-info → mlrun-1.10.0rc10.dist-info}/top_level.txt +0 -0
mlrun/common/constants.py
CHANGED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Copyright 2025 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
import mlrun.common.types
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Dialects(mlrun.common.types.StrEnum):
|
|
18
|
+
MYSQL = "mysql"
|
|
19
|
+
POSTGRESQL = "postgresql"
|
|
20
|
+
SQLITE = "sqlite"
|
|
21
|
+
|
|
22
|
+
@classmethod
|
|
23
|
+
def all(cls) -> list[str]:
|
|
24
|
+
"""Return all dialects as a list of strings."""
|
|
25
|
+
return [dialect.value for dialect in cls]
|
mlrun/common/schemas/__init__.py
CHANGED
mlrun/common/schemas/function.py
CHANGED
|
@@ -15,7 +15,9 @@ import abc
|
|
|
15
15
|
import json
|
|
16
16
|
from datetime import datetime
|
|
17
17
|
from typing import Any, NamedTuple, Optional, TypeVar
|
|
18
|
+
from uuid import UUID
|
|
18
19
|
|
|
20
|
+
from pydantic import validator # use `validator` if you’re still on Pydantic v1
|
|
19
21
|
from pydantic.v1 import BaseModel, Field, constr
|
|
20
22
|
|
|
21
23
|
# TODO: remove the unused import below after `mlrun.datastore` and `mlrun.utils` usage is removed.
|
|
@@ -121,6 +123,12 @@ class ModelEndpointMetadata(ObjectMetadata, ModelEndpointParser):
|
|
|
121
123
|
def mutable_fields(cls):
|
|
122
124
|
return ["labels"]
|
|
123
125
|
|
|
126
|
+
@validator("uid", pre=True)
|
|
127
|
+
def _uid_to_str(cls, v): # noqa: N805
|
|
128
|
+
if isinstance(v, UUID):
|
|
129
|
+
return str(v)
|
|
130
|
+
return v
|
|
131
|
+
|
|
124
132
|
|
|
125
133
|
class ModelEndpointSpec(ObjectSpec, ModelEndpointParser):
|
|
126
134
|
model_class: Optional[str] = ""
|
|
@@ -14,10 +14,10 @@
|
|
|
14
14
|
|
|
15
15
|
from datetime import datetime, timedelta
|
|
16
16
|
|
|
17
|
-
|
|
17
|
+
import mlrun.common.types
|
|
18
18
|
|
|
19
19
|
|
|
20
|
-
class PartitionInterval(StrEnum):
|
|
20
|
+
class PartitionInterval(mlrun.common.types.StrEnum):
|
|
21
21
|
DAY = "DAY"
|
|
22
22
|
MONTH = "MONTH"
|
|
23
23
|
YEARWEEK = "YEARWEEK"
|
|
@@ -44,6 +44,8 @@ class PartitionInterval(StrEnum):
|
|
|
44
44
|
return timedelta(days=30)
|
|
45
45
|
elif self == PartitionInterval.YEARWEEK:
|
|
46
46
|
return timedelta(weeks=1)
|
|
47
|
+
else:
|
|
48
|
+
raise ValueError(f"Unsupported PartitionInterval: {self}")
|
|
47
49
|
|
|
48
50
|
@classmethod
|
|
49
51
|
def from_expression(cls, partition_expression: str):
|
|
@@ -83,7 +85,7 @@ class PartitionInterval(StrEnum):
|
|
|
83
85
|
current_datetime = start_datetime
|
|
84
86
|
|
|
85
87
|
for _ in range(partition_number):
|
|
86
|
-
partition_name = self.get_partition_name(current_datetime)
|
|
88
|
+
partition_name = f"p{self.get_partition_name(current_datetime)}"
|
|
87
89
|
partition_boundary_date = self.get_next_partition_time(current_datetime)
|
|
88
90
|
partition_value = self.get_partition_name(partition_boundary_date)
|
|
89
91
|
partitioning_information_list.append((partition_name, partition_value))
|
|
@@ -109,6 +111,8 @@ class PartitionInterval(StrEnum):
|
|
|
109
111
|
return (current_datetime.replace(day=1) + timedelta(days=32)).replace(day=1)
|
|
110
112
|
elif self == PartitionInterval.YEARWEEK:
|
|
111
113
|
return current_datetime + timedelta(weeks=1)
|
|
114
|
+
else:
|
|
115
|
+
raise ValueError(f"Unsupported PartitionInterval: {self}")
|
|
112
116
|
|
|
113
117
|
def get_partition_name(self, current_datetime: datetime) -> str:
|
|
114
118
|
if self == PartitionInterval.DAY:
|
|
@@ -118,6 +122,8 @@ class PartitionInterval(StrEnum):
|
|
|
118
122
|
elif self == PartitionInterval.YEARWEEK:
|
|
119
123
|
year, week, _ = current_datetime.isocalendar()
|
|
120
124
|
return f"{year}{week:02d}"
|
|
125
|
+
else:
|
|
126
|
+
raise ValueError(f"Unsupported PartitionInterval: {self}")
|
|
121
127
|
|
|
122
128
|
def get_partition_expression(self, column_name: str):
|
|
123
129
|
if self == PartitionInterval.YEARWEEK:
|
|
@@ -130,6 +136,8 @@ class PartitionInterval(StrEnum):
|
|
|
130
136
|
# generates value in format %Y%m in mysql
|
|
131
137
|
# mysql query example: `select YEAR(NOW())*100 + MONTH(NOW());`
|
|
132
138
|
return f"YEAR({column_name}) * 100 + MONTH({column_name})"
|
|
139
|
+
else:
|
|
140
|
+
raise ValueError(f"Unsupported PartitionInterval: {self}")
|
|
133
141
|
|
|
134
142
|
def get_number_of_partitions(self, days: int) -> int:
|
|
135
143
|
# Calculate the number partitions based on given number of days
|
|
@@ -140,3 +148,5 @@ class PartitionInterval(StrEnum):
|
|
|
140
148
|
return int(days / 30.44)
|
|
141
149
|
elif self == PartitionInterval.YEARWEEK:
|
|
142
150
|
return int(days / 7)
|
|
151
|
+
else:
|
|
152
|
+
raise ValueError(f"Unsupported PartitionInterval: {self}")
|
mlrun/common/schemas/workflow.py
CHANGED
|
@@ -46,6 +46,13 @@ class WorkflowRequest(pydantic.v1.BaseModel):
|
|
|
46
46
|
notifications: typing.Optional[list[Notification]] = None
|
|
47
47
|
|
|
48
48
|
|
|
49
|
+
class RerunWorkflowRequest(pydantic.v1.BaseModel):
|
|
50
|
+
run_name: typing.Optional[str] = (None,)
|
|
51
|
+
run_id: typing.Optional[str] = (None,)
|
|
52
|
+
notifications: typing.Optional[list[Notification]] = None
|
|
53
|
+
workflow_runner_node_selector: typing.Optional[dict[str, str]] = None
|
|
54
|
+
|
|
55
|
+
|
|
49
56
|
class WorkflowResponse(pydantic.v1.BaseModel):
|
|
50
57
|
project: str = None
|
|
51
58
|
name: str = None
|
mlrun/datastore/utils.py
CHANGED
mlrun/db/__init__.py
CHANGED
mlrun/db/base.py
CHANGED
|
@@ -638,6 +638,16 @@ class RunDBInterface(ABC):
|
|
|
638
638
|
):
|
|
639
639
|
pass
|
|
640
640
|
|
|
641
|
+
@abstractmethod
|
|
642
|
+
def retry_pipeline(
|
|
643
|
+
self,
|
|
644
|
+
run_id: str,
|
|
645
|
+
project: str,
|
|
646
|
+
namespace: Optional[str] = None,
|
|
647
|
+
timeout: int = 30,
|
|
648
|
+
):
|
|
649
|
+
pass
|
|
650
|
+
|
|
641
651
|
@abstractmethod
|
|
642
652
|
def list_project_secrets(
|
|
643
653
|
self,
|
|
@@ -1034,6 +1044,13 @@ class RunDBInterface(ABC):
|
|
|
1034
1044
|
):
|
|
1035
1045
|
pass
|
|
1036
1046
|
|
|
1047
|
+
def get_project_background_task(
|
|
1048
|
+
self,
|
|
1049
|
+
project: str,
|
|
1050
|
+
name: str,
|
|
1051
|
+
) -> mlrun.common.schemas.BackgroundTask:
|
|
1052
|
+
pass
|
|
1053
|
+
|
|
1037
1054
|
@abstractmethod
|
|
1038
1055
|
def submit_workflow(
|
|
1039
1056
|
self,
|
mlrun/db/nopdb.py
CHANGED
|
@@ -524,6 +524,15 @@ class NopDB(RunDBInterface):
|
|
|
524
524
|
):
|
|
525
525
|
pass
|
|
526
526
|
|
|
527
|
+
def retry_pipeline(
|
|
528
|
+
self,
|
|
529
|
+
run_id: str,
|
|
530
|
+
project: str,
|
|
531
|
+
namespace: Optional[str] = None,
|
|
532
|
+
timeout: int = 30,
|
|
533
|
+
):
|
|
534
|
+
pass
|
|
535
|
+
|
|
527
536
|
def list_pipelines(
|
|
528
537
|
self,
|
|
529
538
|
project: str,
|
mlrun/db/sql_types.py
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
# Copyright 2025 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
"""
|
|
15
|
+
This module provides SQLAlchemy TypeDecorator subclasses that are aware of
|
|
16
|
+
database dialects (MySQL, PostgreSQL, SQLite) and automatically select
|
|
17
|
+
appropriate native types (e.g., UUID, BLOB, TIMESTAMP with precision) or
|
|
18
|
+
fallbacks (e.g., hex-string storage) to ensure consistent behavior across
|
|
19
|
+
different database backends.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
import uuid
|
|
23
|
+
from typing import Any, Optional, Union
|
|
24
|
+
|
|
25
|
+
import sqlalchemy.types
|
|
26
|
+
from sqlalchemy import CHAR, Text
|
|
27
|
+
from sqlalchemy.dialects.mysql import DATETIME as MYSQL_DATETIME
|
|
28
|
+
from sqlalchemy.dialects.mysql import MEDIUMBLOB
|
|
29
|
+
from sqlalchemy.dialects.postgresql import BYTEA
|
|
30
|
+
from sqlalchemy.dialects.postgresql import TIMESTAMP as PG_TIMESTAMP
|
|
31
|
+
from sqlalchemy.dialects.postgresql import UUID as PG_UUID
|
|
32
|
+
from sqlalchemy.engine.interfaces import Dialect
|
|
33
|
+
from sqlalchemy.types import TypeDecorator
|
|
34
|
+
|
|
35
|
+
import mlrun.common.db.dialects
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class DateTime(TypeDecorator):
|
|
39
|
+
impl = sqlalchemy.types.DateTime
|
|
40
|
+
cache_ok = True
|
|
41
|
+
precision: int = 3
|
|
42
|
+
|
|
43
|
+
def load_dialect_impl(
|
|
44
|
+
self,
|
|
45
|
+
dialect: Dialect,
|
|
46
|
+
) -> sqlalchemy.types.TypeEngine:
|
|
47
|
+
if dialect.name == mlrun.common.db.dialects.Dialects.MYSQL:
|
|
48
|
+
return dialect.type_descriptor(
|
|
49
|
+
MYSQL_DATETIME(
|
|
50
|
+
fsp=self.precision,
|
|
51
|
+
timezone=True,
|
|
52
|
+
)
|
|
53
|
+
)
|
|
54
|
+
if dialect.name == mlrun.common.db.dialects.Dialects.POSTGRESQL:
|
|
55
|
+
return dialect.type_descriptor(
|
|
56
|
+
PG_TIMESTAMP(
|
|
57
|
+
precision=self.precision,
|
|
58
|
+
timezone=True,
|
|
59
|
+
)
|
|
60
|
+
)
|
|
61
|
+
return dialect.type_descriptor(sqlalchemy.types.DateTime)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class MicroSecondDateTime(DateTime):
|
|
65
|
+
cache_ok = True
|
|
66
|
+
precision: int = 6
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class Blob(TypeDecorator):
|
|
70
|
+
impl = sqlalchemy.types.LargeBinary
|
|
71
|
+
cache_ok = True
|
|
72
|
+
|
|
73
|
+
def load_dialect_impl(
|
|
74
|
+
self,
|
|
75
|
+
dialect: Dialect,
|
|
76
|
+
) -> sqlalchemy.types.TypeEngine:
|
|
77
|
+
if dialect.name == mlrun.common.db.dialects.Dialects.MYSQL:
|
|
78
|
+
return dialect.type_descriptor(MEDIUMBLOB)
|
|
79
|
+
if dialect.name == mlrun.common.db.dialects.Dialects.POSTGRESQL:
|
|
80
|
+
return dialect.type_descriptor(BYTEA)
|
|
81
|
+
return dialect.type_descriptor(self.impl)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class Utf8BinText(TypeDecorator):
|
|
85
|
+
impl = Text
|
|
86
|
+
cache_ok = True
|
|
87
|
+
|
|
88
|
+
def load_dialect_impl(
|
|
89
|
+
self,
|
|
90
|
+
dialect: Dialect,
|
|
91
|
+
) -> sqlalchemy.types.TypeEngine:
|
|
92
|
+
if dialect.name == mlrun.common.db.dialects.Dialects.MYSQL:
|
|
93
|
+
return dialect.type_descriptor(
|
|
94
|
+
sqlalchemy.dialects.mysql.VARCHAR(
|
|
95
|
+
collation="utf8_bin",
|
|
96
|
+
length=255,
|
|
97
|
+
)
|
|
98
|
+
)
|
|
99
|
+
if dialect.name == mlrun.common.db.dialects.Dialects.POSTGRESQL:
|
|
100
|
+
# This collation is created as part of the database creation
|
|
101
|
+
return dialect.type_descriptor(
|
|
102
|
+
Text(
|
|
103
|
+
collation="utf8_bin",
|
|
104
|
+
)
|
|
105
|
+
)
|
|
106
|
+
if dialect.name == mlrun.common.db.dialects.Dialects.SQLITE:
|
|
107
|
+
return dialect.type_descriptor(
|
|
108
|
+
Text(
|
|
109
|
+
collation="BINARY",
|
|
110
|
+
)
|
|
111
|
+
)
|
|
112
|
+
return dialect.type_descriptor(self.impl)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class UuidType(TypeDecorator):
|
|
116
|
+
"""
|
|
117
|
+
A UUID type which stores as native UUID on Postgres (as_uuid=True)
|
|
118
|
+
and as 32-char hex strings on other dialects.
|
|
119
|
+
"""
|
|
120
|
+
|
|
121
|
+
impl = CHAR(32)
|
|
122
|
+
cache_ok = True
|
|
123
|
+
|
|
124
|
+
def load_dialect_impl(self, dialect: Dialect) -> sqlalchemy.types.TypeEngine:
|
|
125
|
+
if dialect.name == mlrun.common.db.dialects.Dialects.POSTGRESQL:
|
|
126
|
+
return dialect.type_descriptor(PG_UUID(as_uuid=True))
|
|
127
|
+
return dialect.type_descriptor(CHAR(32))
|
|
128
|
+
|
|
129
|
+
def process_bind_param(
|
|
130
|
+
self,
|
|
131
|
+
value: Optional[Union[uuid.UUID, str]],
|
|
132
|
+
dialect: Dialect,
|
|
133
|
+
) -> Optional[Union[uuid.UUID, str]]:
|
|
134
|
+
if value is None:
|
|
135
|
+
return None
|
|
136
|
+
if isinstance(value, uuid.UUID):
|
|
137
|
+
return (
|
|
138
|
+
value
|
|
139
|
+
if dialect.name == mlrun.common.db.dialects.Dialects.POSTGRESQL
|
|
140
|
+
else value.hex
|
|
141
|
+
)
|
|
142
|
+
if isinstance(value, str):
|
|
143
|
+
u = uuid.UUID(value)
|
|
144
|
+
return (
|
|
145
|
+
u
|
|
146
|
+
if dialect.name == mlrun.common.db.dialects.Dialects.POSTGRESQL
|
|
147
|
+
else u.hex
|
|
148
|
+
)
|
|
149
|
+
raise ValueError(f"Cannot bind UUID value {value!r}")
|
|
150
|
+
|
|
151
|
+
def process_result_value(
|
|
152
|
+
self, value: Optional[Union[uuid.UUID, bytes, str]], dialect: Dialect
|
|
153
|
+
) -> Optional[uuid.UUID]:
|
|
154
|
+
if value is None:
|
|
155
|
+
return None
|
|
156
|
+
return value if isinstance(value, uuid.UUID) else uuid.UUID(value)
|
|
157
|
+
|
|
158
|
+
def coerce_compared_value(self, op: Any, value: Any) -> TypeDecorator:
|
|
159
|
+
# ensure STR comparisons are coerced through this type
|
|
160
|
+
return self
|
|
@@ -280,7 +280,10 @@ class TFKerasMLRunInterface(MLRunInterface, ABC):
|
|
|
280
280
|
print(f"Horovod worker #{self._hvd.rank()} is using CPU")
|
|
281
281
|
|
|
282
282
|
# Adjust learning rate based on the number of GPUs:
|
|
283
|
-
optimizer
|
|
283
|
+
if hasattr(optimizer, "lr"):
|
|
284
|
+
optimizer.lr *= self._hvd.size()
|
|
285
|
+
else:
|
|
286
|
+
optimizer.learning_rate *= self._hvd.size()
|
|
284
287
|
|
|
285
288
|
# Wrap the optimizer in horovod's distributed optimizer: 'hvd.DistributedOptimizer'.
|
|
286
289
|
optimizer = self._hvd.DistributedOptimizer(optimizer)
|
|
@@ -518,7 +518,6 @@ class TFKerasModelHandler(DLModelHandler):
|
|
|
518
518
|
)
|
|
519
519
|
|
|
520
520
|
# Read additional files according to the model format used:
|
|
521
|
-
# # ModelFormats.SAVED_MODEL - Unzip the SavedModel archive:
|
|
522
521
|
if self._model_format == TFKerasModelHandler.ModelFormats.SAVED_MODEL:
|
|
523
522
|
# Unzip the SavedModel directory:
|
|
524
523
|
with zipfile.ZipFile(self._model_file, "r") as zip_file:
|
|
@@ -527,11 +526,18 @@ class TFKerasModelHandler(DLModelHandler):
|
|
|
527
526
|
self._model_file = os.path.join(
|
|
528
527
|
os.path.dirname(self._model_file), self._model_name
|
|
529
528
|
)
|
|
530
|
-
|
|
531
|
-
|
|
529
|
+
elif self._model_format == TFKerasModelHandler.ModelFormats.KERAS:
|
|
530
|
+
# Rename the model file suffix:
|
|
531
|
+
self._rename_model_file_suffix(suffix="keras")
|
|
532
|
+
elif self._model_format == TFKerasModelHandler.ModelFormats.H5:
|
|
533
|
+
# Rename the model file suffix:
|
|
534
|
+
self._rename_model_file_suffix(suffix="h5")
|
|
535
|
+
elif ( # ModelFormats.JSON_ARCHITECTURE_H5_WEIGHTS
|
|
532
536
|
self._model_format
|
|
533
537
|
== TFKerasModelHandler.ModelFormats.JSON_ARCHITECTURE_H5_WEIGHTS
|
|
534
538
|
):
|
|
539
|
+
# Rename the model file suffix:
|
|
540
|
+
self._rename_model_file_suffix(suffix="json")
|
|
535
541
|
# Get the weights file:
|
|
536
542
|
self._weights_file = self._extra_data[
|
|
537
543
|
self._get_weights_file_artifact_name()
|
|
@@ -540,6 +546,20 @@ class TFKerasModelHandler(DLModelHandler):
|
|
|
540
546
|
# Continue collecting from abstract class:
|
|
541
547
|
super()._collect_files_from_store_object()
|
|
542
548
|
|
|
549
|
+
def _rename_model_file_suffix(self, suffix: str):
|
|
550
|
+
"""
|
|
551
|
+
Rename the model file suffix to the given one.
|
|
552
|
+
|
|
553
|
+
This is used for the case of loading a model from a store object that was saved with a different suffix as when
|
|
554
|
+
keras tries to load it, it validates the suffix. The `artifacts.model.get_model` function is downloading the
|
|
555
|
+
file to a temp file with a `pkl` suffix, so it needs to be replaced:than the one keras expects.
|
|
556
|
+
|
|
557
|
+
:param suffix: The suffix to rename the model file to (without the trailing dot).
|
|
558
|
+
"""
|
|
559
|
+
new_name = self._model_file.rsplit(".", 1)[0] + f".{suffix}"
|
|
560
|
+
os.rename(self._model_file, new_name)
|
|
561
|
+
self._model_file = new_name
|
|
562
|
+
|
|
543
563
|
def _collect_files_from_local_path(self):
|
|
544
564
|
"""
|
|
545
565
|
If the model path given is of a local path, search for the needed model files and collect them into this handler
|
mlrun/launcher/base.py
CHANGED
mlrun/launcher/client.py
CHANGED
|
@@ -36,7 +36,6 @@ class ClientBaseLauncher(launcher.BaseLauncher, abc.ABC):
|
|
|
36
36
|
runtime: "mlrun.runtimes.base.BaseRuntime",
|
|
37
37
|
project_name: Optional[str] = "",
|
|
38
38
|
full: bool = True,
|
|
39
|
-
client_version: str = "",
|
|
40
39
|
):
|
|
41
40
|
runtime.try_auto_mount_based_on_config()
|
|
42
41
|
runtime._fill_credentials()
|
mlrun/launcher/local.py
CHANGED
|
@@ -13,7 +13,6 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import os
|
|
15
15
|
import pathlib
|
|
16
|
-
from os import environ
|
|
17
16
|
from typing import Callable, Optional, Union
|
|
18
17
|
|
|
19
18
|
import mlrun.common.constants as mlrun_constants
|
|
@@ -252,9 +251,6 @@ class ClientLocalLauncher(launcher.ClientBaseLauncher):
|
|
|
252
251
|
# copy the code/base-spec to the local function (for the UI and code logging)
|
|
253
252
|
fn.spec.description = runtime.spec.description
|
|
254
253
|
fn.spec.build = runtime.spec.build
|
|
255
|
-
serving_spec = getattr(runtime.spec, "serving_spec", None)
|
|
256
|
-
if serving_spec:
|
|
257
|
-
environ["SERVING_SPEC_ENV"] = serving_spec
|
|
258
254
|
|
|
259
255
|
run.spec.handler = handler
|
|
260
256
|
run.spec.reset_on_run = reset_on_run
|
|
@@ -166,13 +166,29 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
166
166
|
return result
|
|
167
167
|
|
|
168
168
|
@staticmethod
|
|
169
|
+
def _check_writer_is_up(project: "mlrun.MlrunProject") -> None:
|
|
170
|
+
try:
|
|
171
|
+
project.get_function(
|
|
172
|
+
mm_constants.MonitoringFunctionNames.WRITER, ignore_cache=True
|
|
173
|
+
)
|
|
174
|
+
except mlrun.errors.MLRunNotFoundError:
|
|
175
|
+
raise mlrun.errors.MLRunValueError(
|
|
176
|
+
"Writing outputs to the databases is blocked as the model monitoring infrastructure is disabled.\n"
|
|
177
|
+
"To unblock, enable model monitoring with `project.enable_model_monitoring()`."
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
@classmethod
|
|
169
181
|
@contextmanager
|
|
170
182
|
def _push_to_writer(
|
|
183
|
+
cls,
|
|
171
184
|
*,
|
|
172
185
|
write_output: bool,
|
|
173
186
|
stream_profile: Optional[ds_profile.DatastoreProfile],
|
|
187
|
+
project: "mlrun.MlrunProject",
|
|
174
188
|
) -> Iterator[dict[str, list[tuple]]]:
|
|
175
189
|
endpoints_output: dict[str, list[tuple]] = defaultdict(list)
|
|
190
|
+
if write_output:
|
|
191
|
+
cls._check_writer_is_up(project)
|
|
176
192
|
try:
|
|
177
193
|
yield endpoints_output
|
|
178
194
|
finally:
|
|
@@ -220,6 +236,9 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
220
236
|
for an MLRun job.
|
|
221
237
|
This method should not be called directly.
|
|
222
238
|
"""
|
|
239
|
+
project = context.get_project_object()
|
|
240
|
+
if not project:
|
|
241
|
+
raise mlrun.errors.MLRunValueError("Could not load project from context")
|
|
223
242
|
|
|
224
243
|
if write_output and (
|
|
225
244
|
not endpoints or sample_data is not None or reference_data is not None
|
|
@@ -236,7 +255,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
236
255
|
)
|
|
237
256
|
|
|
238
257
|
with self._push_to_writer(
|
|
239
|
-
write_output=write_output, stream_profile=stream_profile
|
|
258
|
+
write_output=write_output, stream_profile=stream_profile, project=project
|
|
240
259
|
) as endpoints_output:
|
|
241
260
|
|
|
242
261
|
def call_do_tracking(event: Optional[dict] = None):
|
|
@@ -249,6 +268,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
249
268
|
event=event,
|
|
250
269
|
application_name=self.__class__.__name__,
|
|
251
270
|
context=context,
|
|
271
|
+
project=project,
|
|
252
272
|
sample_df=sample_data,
|
|
253
273
|
feature_stats=feature_stats,
|
|
254
274
|
)
|
|
@@ -137,13 +137,14 @@ class MonitoringApplicationContext:
|
|
|
137
137
|
cls,
|
|
138
138
|
context: "mlrun.MLClientCtx",
|
|
139
139
|
*,
|
|
140
|
+
project: Optional["mlrun.MlrunProject"] = None,
|
|
140
141
|
application_name: str,
|
|
141
142
|
event: dict[str, Any],
|
|
142
143
|
model_endpoint_dict: Optional[dict[str, ModelEndpoint]] = None,
|
|
143
144
|
sample_df: Optional[pd.DataFrame] = None,
|
|
144
145
|
feature_stats: Optional[FeatureStats] = None,
|
|
145
146
|
) -> "MonitoringApplicationContext":
|
|
146
|
-
project = context.get_project_object()
|
|
147
|
+
project = project or context.get_project_object()
|
|
147
148
|
if not project:
|
|
148
149
|
raise mlrun.errors.MLRunValueError("Could not load project from context")
|
|
149
150
|
logger = context.logger
|
mlrun/projects/__init__.py
CHANGED
mlrun/projects/pipelines.py
CHANGED
|
@@ -1070,6 +1070,40 @@ def github_webhook(request):
|
|
|
1070
1070
|
return {"msg": "pushed"}
|
|
1071
1071
|
|
|
1072
1072
|
|
|
1073
|
+
def rerun_workflow(
|
|
1074
|
+
context: mlrun.execution.MLClientCtx, run_uid: str, project_name: str
|
|
1075
|
+
):
|
|
1076
|
+
"""
|
|
1077
|
+
Re-run a workflow by retrying a previously failed KFP pipeline.
|
|
1078
|
+
|
|
1079
|
+
:param context: MLRun context.
|
|
1080
|
+
:param run_uid: The run UID of the original workflow to retry.
|
|
1081
|
+
:param project_name: The project name.
|
|
1082
|
+
"""
|
|
1083
|
+
|
|
1084
|
+
try:
|
|
1085
|
+
# TODO in followups: handle start and running notifications
|
|
1086
|
+
|
|
1087
|
+
# Retry the pipeline - TODO: add submit-direct flag when created
|
|
1088
|
+
db = mlrun.get_run_db()
|
|
1089
|
+
new_pipeline_id = db.retry_pipeline(run_uid, project_name)
|
|
1090
|
+
|
|
1091
|
+
# Store result for observability
|
|
1092
|
+
context.set_label("workflow-id", new_pipeline_id)
|
|
1093
|
+
context.log_result("workflow_id", new_pipeline_id)
|
|
1094
|
+
|
|
1095
|
+
# wait for pipeline completion so monitor will push terminal notifications
|
|
1096
|
+
wait_for_pipeline_completion(
|
|
1097
|
+
new_pipeline_id,
|
|
1098
|
+
project=project_name,
|
|
1099
|
+
)
|
|
1100
|
+
|
|
1101
|
+
# Temporary exception
|
|
1102
|
+
except Exception as exc:
|
|
1103
|
+
context.logger.error("Failed to rerun workflow", exc=err_to_str(exc))
|
|
1104
|
+
raise
|
|
1105
|
+
|
|
1106
|
+
|
|
1073
1107
|
def load_and_run(context, *args, **kwargs):
|
|
1074
1108
|
"""
|
|
1075
1109
|
This function serves as an alias to `load_and_run_workflow`,
|
|
@@ -1153,6 +1187,7 @@ def load_and_run_workflow(
|
|
|
1153
1187
|
project = mlrun.get_or_create_project(
|
|
1154
1188
|
context=project_context or f"./{project_name}",
|
|
1155
1189
|
name=project_name,
|
|
1190
|
+
allow_cross_project=True,
|
|
1156
1191
|
)
|
|
1157
1192
|
|
|
1158
1193
|
# extract "start" notification if exists
|
|
@@ -1245,6 +1280,7 @@ def pull_remote_project_files(
|
|
|
1245
1280
|
subpath=subpath,
|
|
1246
1281
|
clone=clone,
|
|
1247
1282
|
save=False,
|
|
1283
|
+
allow_cross_project=True,
|
|
1248
1284
|
)
|
|
1249
1285
|
except Exception as error:
|
|
1250
1286
|
notify_scheduled_workflow_failure(
|
mlrun/projects/project.py
CHANGED
|
@@ -2961,19 +2961,6 @@ class MlrunProject(ModelObj):
|
|
|
2961
2961
|
mlrun.db.get_run_db().delete_function(name=name, project=self.metadata.name)
|
|
2962
2962
|
self.spec.remove_function(name)
|
|
2963
2963
|
|
|
2964
|
-
def remove_model_monitoring_function(self, name: Union[str, list[str]]):
|
|
2965
|
-
"""delete the specified model-monitoring-app function/s
|
|
2966
|
-
|
|
2967
|
-
:param name: name of the model-monitoring-function/s (under the project)
|
|
2968
|
-
"""
|
|
2969
|
-
# TODO: Remove this in 1.10.0
|
|
2970
|
-
warnings.warn(
|
|
2971
|
-
"'remove_model_monitoring_function' is deprecated in 1.7.0 and will be removed in 1.10.0. "
|
|
2972
|
-
"Please use `delete_model_monitoring_function` instead.",
|
|
2973
|
-
FutureWarning,
|
|
2974
|
-
)
|
|
2975
|
-
self.delete_model_monitoring_function(name)
|
|
2976
|
-
|
|
2977
2964
|
def delete_model_monitoring_function(self, name: Union[str, list[str]]):
|
|
2978
2965
|
"""delete the specified model-monitoring-app function/s
|
|
2979
2966
|
|
mlrun/runtimes/daskjob.py
CHANGED
|
@@ -92,7 +92,6 @@ class DaskSpec(KubeResourceSpec):
|
|
|
92
92
|
preemption_mode=None,
|
|
93
93
|
security_context=None,
|
|
94
94
|
state_thresholds=None,
|
|
95
|
-
serving_spec=None,
|
|
96
95
|
):
|
|
97
96
|
super().__init__(
|
|
98
97
|
command=command,
|
|
@@ -122,7 +121,6 @@ class DaskSpec(KubeResourceSpec):
|
|
|
122
121
|
preemption_mode=preemption_mode,
|
|
123
122
|
security_context=security_context,
|
|
124
123
|
state_thresholds=state_thresholds,
|
|
125
|
-
serving_spec=serving_spec,
|
|
126
124
|
)
|
|
127
125
|
self.args = args
|
|
128
126
|
|
mlrun/runtimes/kubejob.py
CHANGED
|
@@ -207,7 +207,3 @@ class KubejobRuntime(KubeResource):
|
|
|
207
207
|
raise NotImplementedError(
|
|
208
208
|
f"Running a {self.kind} function from the client is not supported. Use .run() to submit the job to the API."
|
|
209
209
|
)
|
|
210
|
-
|
|
211
|
-
@property
|
|
212
|
-
def serving_spec(self):
|
|
213
|
-
return self.spec.serving_spec
|