mlrun 1.8.0rc12__py3-none-any.whl → 1.8.0rc15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/document.py +32 -6
- mlrun/common/formatters/artifact.py +1 -1
- mlrun/common/schemas/partition.py +23 -18
- mlrun/common/types.py +1 -0
- mlrun/config.py +3 -2
- mlrun/datastore/vectorstore.py +69 -26
- mlrun/db/base.py +21 -1
- mlrun/db/httpdb.py +53 -17
- mlrun/db/nopdb.py +12 -1
- mlrun/execution.py +43 -11
- mlrun/model_monitoring/applications/_application_steps.py +1 -1
- mlrun/model_monitoring/applications/base.py +2 -3
- mlrun/model_monitoring/applications/context.py +94 -71
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +0 -21
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +3 -3
- mlrun/projects/pipelines.py +13 -6
- mlrun/projects/project.py +80 -2
- mlrun/runtimes/nuclio/function.py +2 -1
- mlrun/runtimes/nuclio/serving.py +10 -5
- mlrun/serving/routers.py +16 -7
- mlrun/serving/states.py +14 -6
- mlrun/serving/v2_serving.py +11 -6
- mlrun/utils/helpers.py +23 -1
- mlrun/utils/notifications/notification/base.py +1 -1
- mlrun/utils/notifications/notification/webhook.py +13 -12
- mlrun/utils/notifications/notification_pusher.py +18 -23
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.8.0rc12.dist-info → mlrun-1.8.0rc15.dist-info}/METADATA +14 -8
- {mlrun-1.8.0rc12.dist-info → mlrun-1.8.0rc15.dist-info}/RECORD +33 -33
- {mlrun-1.8.0rc12.dist-info → mlrun-1.8.0rc15.dist-info}/LICENSE +0 -0
- {mlrun-1.8.0rc12.dist-info → mlrun-1.8.0rc15.dist-info}/WHEEL +0 -0
- {mlrun-1.8.0rc12.dist-info → mlrun-1.8.0rc15.dist-info}/entry_points.txt +0 -0
- {mlrun-1.8.0rc12.dist-info → mlrun-1.8.0rc15.dist-info}/top_level.txt +0 -0
mlrun/execution.py
CHANGED
|
@@ -880,7 +880,7 @@ class MLClientCtx:
|
|
|
880
880
|
tag: str = "",
|
|
881
881
|
local_path: str = "",
|
|
882
882
|
artifact_path: Optional[str] = None,
|
|
883
|
-
|
|
883
|
+
document_loader_spec: DocumentLoaderSpec = DocumentLoaderSpec(),
|
|
884
884
|
upload: Optional[bool] = False,
|
|
885
885
|
labels: Optional[dict[str, str]] = None,
|
|
886
886
|
target_path: Optional[str] = None,
|
|
@@ -891,22 +891,48 @@ class MLClientCtx:
|
|
|
891
891
|
|
|
892
892
|
:param key: Artifact key
|
|
893
893
|
:param tag: Version tag
|
|
894
|
-
:param local_path:
|
|
895
|
-
|
|
896
|
-
:param artifact_path:
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
:param
|
|
894
|
+
:param local_path: path to the local file we upload, will also be use
|
|
895
|
+
as the destination subpath (under "artifact_path")
|
|
896
|
+
:param artifact_path: Target artifact path (when not using the default)
|
|
897
|
+
to define a subpath under the default location use:
|
|
898
|
+
`artifact_path=context.artifact_subpath('data')`
|
|
899
|
+
:param document_loader_spec: Spec to use to load the artifact as langchain document.
|
|
900
|
+
|
|
901
|
+
By default, uses DocumentLoaderSpec() which initializes with:
|
|
902
|
+
|
|
903
|
+
* loader_class_name="langchain_community.document_loaders.TextLoader"
|
|
904
|
+
* src_name="file_path"
|
|
905
|
+
* kwargs=None
|
|
906
|
+
|
|
907
|
+
Can be customized for different document types, e.g.::
|
|
908
|
+
|
|
909
|
+
DocumentLoaderSpec(
|
|
910
|
+
loader_class_name="langchain_community.document_loaders.PDFLoader",
|
|
911
|
+
src_name="file_path",
|
|
912
|
+
kwargs={"extract_images": True}
|
|
913
|
+
)
|
|
900
914
|
:param upload: Whether to upload the artifact
|
|
901
915
|
:param labels: Key-value labels
|
|
902
916
|
:param target_path: Path to the local file
|
|
903
917
|
:param kwargs: Additional keyword arguments
|
|
904
918
|
:return: DocumentArtifact object
|
|
919
|
+
|
|
920
|
+
Example:
|
|
921
|
+
>>> # Log a PDF document with custom loader
|
|
922
|
+
>>> project.log_document(
|
|
923
|
+
... key="my_doc",
|
|
924
|
+
... local_path="path/to/doc.pdf",
|
|
925
|
+
... document_loader_spec=DocumentLoaderSpec(
|
|
926
|
+
... loader_class_name="langchain_community.document_loaders.PDFLoader",
|
|
927
|
+
... src_name="file_path",
|
|
928
|
+
... kwargs={"extract_images": True},
|
|
929
|
+
... ),
|
|
930
|
+
... )
|
|
905
931
|
"""
|
|
906
932
|
doc_artifact = DocumentArtifact(
|
|
907
933
|
key=key,
|
|
908
934
|
original_source=local_path or target_path,
|
|
909
|
-
|
|
935
|
+
document_loader_spec=document_loader_spec,
|
|
910
936
|
**kwargs,
|
|
911
937
|
)
|
|
912
938
|
|
|
@@ -929,9 +955,15 @@ class MLClientCtx:
|
|
|
929
955
|
)
|
|
930
956
|
return self.get_artifact(key)
|
|
931
957
|
|
|
932
|
-
def get_artifact(
|
|
933
|
-
|
|
934
|
-
|
|
958
|
+
def get_artifact(
|
|
959
|
+
self, key, tag=None, iter=None, tree=None, uid=None
|
|
960
|
+
) -> Optional[Artifact]:
|
|
961
|
+
if tag or iter or tree or uid:
|
|
962
|
+
project = self.get_project_object()
|
|
963
|
+
return project.get_artifact(key=key, tag=tag, iter=iter, tree=tree, uid=uid)
|
|
964
|
+
else:
|
|
965
|
+
artifact_uri = self._artifacts_manager.artifact_uris[key]
|
|
966
|
+
return self.get_store_resource(artifact_uri)
|
|
935
967
|
|
|
936
968
|
def update_artifact(self, artifact_object: Artifact):
|
|
937
969
|
"""Update an artifact object in the DB and the cached uri"""
|
|
@@ -136,7 +136,7 @@ class _PrepareMonitoringEvent(StepToDict):
|
|
|
136
136
|
:param event: Application event.
|
|
137
137
|
:return: Application context.
|
|
138
138
|
"""
|
|
139
|
-
application_context = MonitoringApplicationContext(
|
|
139
|
+
application_context = MonitoringApplicationContext._from_graph_ctx(
|
|
140
140
|
application_name=self.application_name,
|
|
141
141
|
event=event,
|
|
142
142
|
model_endpoint_dict=self.model_endpoints,
|
|
@@ -112,11 +112,10 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
112
112
|
def call_do_tracking(event: Optional[dict] = None):
|
|
113
113
|
if event is None:
|
|
114
114
|
event = {}
|
|
115
|
-
monitoring_context = mm_context.MonitoringApplicationContext(
|
|
115
|
+
monitoring_context = mm_context.MonitoringApplicationContext._from_ml_ctx(
|
|
116
116
|
event=event,
|
|
117
117
|
application_name=self.__class__.__name__,
|
|
118
|
-
|
|
119
|
-
artifacts_logger=context,
|
|
118
|
+
context=context,
|
|
120
119
|
sample_df=sample_data,
|
|
121
120
|
feature_stats=feature_stats,
|
|
122
121
|
)
|
|
@@ -45,32 +45,6 @@ class _ArtifactsLogger(Protocol):
|
|
|
45
45
|
|
|
46
46
|
|
|
47
47
|
class MonitoringApplicationContext:
|
|
48
|
-
"""
|
|
49
|
-
The monitoring context holds all the relevant information for the monitoring application,
|
|
50
|
-
and also it can be used for logging artifacts and results.
|
|
51
|
-
The monitoring context has the following attributes:
|
|
52
|
-
|
|
53
|
-
:param application_name: (str) The model monitoring application name.
|
|
54
|
-
:param project_name: (str) The project name.
|
|
55
|
-
:param project: (MlrunProject) The project object.
|
|
56
|
-
:param logger: (mlrun.utils.Logger) MLRun logger.
|
|
57
|
-
:param nuclio_logger: (nuclio.request.Logger) Nuclio logger.
|
|
58
|
-
:param sample_df_stats: (FeatureStats) The new sample distribution dictionary.
|
|
59
|
-
:param feature_stats: (FeatureStats) The train sample distribution dictionary.
|
|
60
|
-
:param sample_df: (pd.DataFrame) The new sample DataFrame.
|
|
61
|
-
:param start_infer_time: (pd.Timestamp) Start time of the monitoring schedule.
|
|
62
|
-
:param end_infer_time: (pd.Timestamp) End time of the monitoring schedule.
|
|
63
|
-
:param latest_request: (pd.Timestamp) Timestamp of the latest request on this endpoint_id.
|
|
64
|
-
:param endpoint_id: (str) ID of the monitored model endpoint
|
|
65
|
-
:param endpoint_name: (str) Name of the monitored model endpoint
|
|
66
|
-
:param output_stream_uri: (str) URI of the output stream for results
|
|
67
|
-
:param model_endpoint: (ModelEndpoint) The model endpoint object.
|
|
68
|
-
:param feature_names: (list[str]) List of models feature names.
|
|
69
|
-
:param label_names: (list[str]) List of models label names.
|
|
70
|
-
:param model: (tuple[str, ModelArtifact, dict]) The model file, model spec object,
|
|
71
|
-
and a list of extra data items.
|
|
72
|
-
"""
|
|
73
|
-
|
|
74
48
|
_logger_name = "monitoring-application"
|
|
75
49
|
|
|
76
50
|
def __init__(
|
|
@@ -78,64 +52,51 @@ class MonitoringApplicationContext:
|
|
|
78
52
|
*,
|
|
79
53
|
application_name: str,
|
|
80
54
|
event: dict[str, Any],
|
|
55
|
+
project: "mlrun.MlrunProject",
|
|
56
|
+
artifacts_logger: _ArtifactsLogger,
|
|
57
|
+
logger: mlrun.utils.Logger,
|
|
58
|
+
nuclio_logger: nuclio.request.Logger,
|
|
81
59
|
model_endpoint_dict: Optional[dict[str, ModelEndpoint]] = None,
|
|
82
|
-
logger: Optional[mlrun.utils.Logger] = None,
|
|
83
|
-
graph_context: Optional[mlrun.serving.GraphContext] = None,
|
|
84
|
-
context: Optional["mlrun.MLClientCtx"] = None,
|
|
85
|
-
artifacts_logger: Optional[_ArtifactsLogger] = None,
|
|
86
60
|
sample_df: Optional[pd.DataFrame] = None,
|
|
87
61
|
feature_stats: Optional[FeatureStats] = None,
|
|
88
62
|
) -> None:
|
|
89
63
|
"""
|
|
90
|
-
The :code:`
|
|
91
|
-
and
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
:param application_name:
|
|
95
|
-
:param
|
|
96
|
-
:param
|
|
97
|
-
:param logger:
|
|
98
|
-
:param
|
|
99
|
-
:param
|
|
100
|
-
:param
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
:param
|
|
104
|
-
|
|
105
|
-
:param
|
|
106
|
-
|
|
64
|
+
The :code:`MonitoringApplicationContext` object holds all the relevant information for the
|
|
65
|
+
model monitoring application, and can be used for logging artifacts and messages.
|
|
66
|
+
The monitoring context has the following attributes:
|
|
67
|
+
|
|
68
|
+
:param application_name: (str) The model monitoring application name.
|
|
69
|
+
:param project: (:py:class:`~mlrun.projects.MlrunProject`) The current MLRun project object.
|
|
70
|
+
:param project_name: (str) The project name.
|
|
71
|
+
:param logger: (:py:class:`~mlrun.utils.Logger`) MLRun logger.
|
|
72
|
+
:param nuclio_logger: (nuclio.request.Logger) Nuclio logger.
|
|
73
|
+
:param sample_df_stats: (FeatureStats) The new sample distribution dictionary.
|
|
74
|
+
:param feature_stats: (FeatureStats) The train sample distribution dictionary.
|
|
75
|
+
:param sample_df: (pd.DataFrame) The new sample DataFrame.
|
|
76
|
+
:param start_infer_time: (pd.Timestamp) Start time of the monitoring schedule.
|
|
77
|
+
:param end_infer_time: (pd.Timestamp) End time of the monitoring schedule.
|
|
78
|
+
:param latest_request: (pd.Timestamp) Timestamp of the latest request on this endpoint_id.
|
|
79
|
+
:param endpoint_id: (str) ID of the monitored model endpoint
|
|
80
|
+
:param endpoint_name: (str) Name of the monitored model endpoint
|
|
81
|
+
:param output_stream_uri: (str) URI of the output stream for results
|
|
82
|
+
:param model_endpoint: (ModelEndpoint) The model endpoint object.
|
|
83
|
+
:param feature_names: (list[str]) List of models feature names.
|
|
84
|
+
:param label_names: (list[str]) List of models label names.
|
|
85
|
+
:param model: (tuple[str, ModelArtifact, dict]) The model file, model spec object,
|
|
86
|
+
and a list of extra data items.
|
|
107
87
|
"""
|
|
108
88
|
self.application_name = application_name
|
|
109
89
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
self.project = mlrun.load_project(url=self.project_name)
|
|
113
|
-
elif context:
|
|
114
|
-
potential_project = context.get_project_object()
|
|
115
|
-
if not potential_project:
|
|
116
|
-
raise mlrun.errors.MLRunValueError(
|
|
117
|
-
"Could not load project from context"
|
|
118
|
-
)
|
|
119
|
-
self.project = potential_project
|
|
120
|
-
self.project_name = self.project.name
|
|
90
|
+
self.project = project
|
|
91
|
+
self.project_name = project.name
|
|
121
92
|
|
|
122
|
-
self._artifacts_logger
|
|
93
|
+
self._artifacts_logger = artifacts_logger
|
|
123
94
|
|
|
124
95
|
# MLRun Logger
|
|
125
|
-
self.logger = logger
|
|
126
|
-
level=mlrun.mlconf.log_level,
|
|
127
|
-
formatter_kind=mlrun.mlconf.log_formatter,
|
|
128
|
-
name=self._logger_name,
|
|
129
|
-
)
|
|
96
|
+
self.logger = logger
|
|
130
97
|
# Nuclio logger - `nuclio.request.Logger`.
|
|
131
98
|
# Note: this logger accepts keyword arguments only in its `_with` methods, e.g. `info_with`.
|
|
132
|
-
self.nuclio_logger =
|
|
133
|
-
graph_context.logger
|
|
134
|
-
if graph_context
|
|
135
|
-
else nuclio.request.Logger(
|
|
136
|
-
level=mlrun.mlconf.log_level, name=self._logger_name
|
|
137
|
-
)
|
|
138
|
-
)
|
|
99
|
+
self.nuclio_logger = nuclio_logger
|
|
139
100
|
|
|
140
101
|
# event data
|
|
141
102
|
self.start_infer_time = pd.Timestamp(
|
|
@@ -166,6 +127,68 @@ class MonitoringApplicationContext:
|
|
|
166
127
|
model_endpoint_dict.get(self.endpoint_id) if model_endpoint_dict else None
|
|
167
128
|
)
|
|
168
129
|
|
|
130
|
+
@classmethod
|
|
131
|
+
def _from_ml_ctx(
|
|
132
|
+
cls,
|
|
133
|
+
context: "mlrun.MLClientCtx",
|
|
134
|
+
*,
|
|
135
|
+
application_name: str,
|
|
136
|
+
event: dict[str, Any],
|
|
137
|
+
model_endpoint_dict: Optional[dict[str, ModelEndpoint]] = None,
|
|
138
|
+
sample_df: Optional[pd.DataFrame] = None,
|
|
139
|
+
feature_stats: Optional[FeatureStats] = None,
|
|
140
|
+
) -> "MonitoringApplicationContext":
|
|
141
|
+
project = context.get_project_object()
|
|
142
|
+
if not project:
|
|
143
|
+
raise mlrun.errors.MLRunValueError("Could not load project from context")
|
|
144
|
+
logger = context.logger
|
|
145
|
+
artifacts_logger = context
|
|
146
|
+
nuclio_logger = nuclio.request.Logger(
|
|
147
|
+
level=mlrun.mlconf.log_level, name=cls._logger_name
|
|
148
|
+
)
|
|
149
|
+
return cls(
|
|
150
|
+
application_name=application_name,
|
|
151
|
+
event=event,
|
|
152
|
+
model_endpoint_dict=model_endpoint_dict,
|
|
153
|
+
project=project,
|
|
154
|
+
logger=logger,
|
|
155
|
+
nuclio_logger=nuclio_logger,
|
|
156
|
+
artifacts_logger=artifacts_logger,
|
|
157
|
+
sample_df=sample_df,
|
|
158
|
+
feature_stats=feature_stats,
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
@classmethod
|
|
162
|
+
def _from_graph_ctx(
|
|
163
|
+
cls,
|
|
164
|
+
graph_context: mlrun.serving.GraphContext,
|
|
165
|
+
*,
|
|
166
|
+
application_name: str,
|
|
167
|
+
event: dict[str, Any],
|
|
168
|
+
model_endpoint_dict: Optional[dict[str, ModelEndpoint]] = None,
|
|
169
|
+
sample_df: Optional[pd.DataFrame] = None,
|
|
170
|
+
feature_stats: Optional[FeatureStats] = None,
|
|
171
|
+
) -> "MonitoringApplicationContext":
|
|
172
|
+
project = mlrun.load_project(url=graph_context.project)
|
|
173
|
+
nuclio_logger = graph_context.logger
|
|
174
|
+
artifacts_logger = project
|
|
175
|
+
logger = mlrun.utils.create_logger(
|
|
176
|
+
level=mlrun.mlconf.log_level,
|
|
177
|
+
formatter_kind=mlrun.mlconf.log_formatter,
|
|
178
|
+
name=cls._logger_name,
|
|
179
|
+
)
|
|
180
|
+
return cls(
|
|
181
|
+
application_name=application_name,
|
|
182
|
+
event=event,
|
|
183
|
+
project=project,
|
|
184
|
+
model_endpoint_dict=model_endpoint_dict,
|
|
185
|
+
logger=logger,
|
|
186
|
+
nuclio_logger=nuclio_logger,
|
|
187
|
+
artifacts_logger=artifacts_logger,
|
|
188
|
+
sample_df=sample_df,
|
|
189
|
+
feature_stats=feature_stats,
|
|
190
|
+
)
|
|
191
|
+
|
|
169
192
|
def _get_default_labels(self) -> dict[str, str]:
|
|
170
193
|
labels = {
|
|
171
194
|
mlrun_constants.MLRunInternalLabels.runner_pod: socket.gethostname(),
|
|
@@ -108,27 +108,6 @@ class TDEngineSchema:
|
|
|
108
108
|
)
|
|
109
109
|
return f"CREATE TABLE if NOT EXISTS {self.database}.{subtable} USING {self.super_table} TAGS ({tags});"
|
|
110
110
|
|
|
111
|
-
@staticmethod
|
|
112
|
-
def _insert_subtable_stmt(
|
|
113
|
-
statement: taosws.TaosStmt,
|
|
114
|
-
columns: dict[str, _TDEngineColumn],
|
|
115
|
-
subtable: str,
|
|
116
|
-
values: dict[str, Union[str, int, float, datetime.datetime]],
|
|
117
|
-
) -> taosws.TaosStmt:
|
|
118
|
-
question_marks = ", ".join("?" * len(columns))
|
|
119
|
-
statement.prepare(f"INSERT INTO ? VALUES ({question_marks});")
|
|
120
|
-
statement.set_tbname(subtable)
|
|
121
|
-
|
|
122
|
-
bind_params = []
|
|
123
|
-
|
|
124
|
-
for col_name, col_type in columns.items():
|
|
125
|
-
val = values[col_name]
|
|
126
|
-
bind_params.append(values_to_column([val], col_type))
|
|
127
|
-
|
|
128
|
-
statement.bind_param(bind_params)
|
|
129
|
-
statement.add_batch()
|
|
130
|
-
return statement
|
|
131
|
-
|
|
132
111
|
def _delete_subtable_query(
|
|
133
112
|
self,
|
|
134
113
|
subtable: str,
|
|
@@ -26,7 +26,6 @@ import mlrun.common.schemas.model_monitoring as mm_schemas
|
|
|
26
26
|
import mlrun.model_monitoring.db.tsdb.tdengine.schemas as tdengine_schemas
|
|
27
27
|
import mlrun.model_monitoring.db.tsdb.tdengine.stream_graph_steps
|
|
28
28
|
from mlrun.model_monitoring.db import TSDBConnector
|
|
29
|
-
from mlrun.model_monitoring.db.tsdb.tdengine.schemas import TDEngineSchema
|
|
30
29
|
from mlrun.model_monitoring.helpers import get_invocations_fqn
|
|
31
30
|
from mlrun.utils import logger
|
|
32
31
|
|
|
@@ -147,8 +146,9 @@ class TDEngineConnector(TSDBConnector):
|
|
|
147
146
|
create_table_sql = table._create_subtable_sql(subtable=table_name, values=event)
|
|
148
147
|
|
|
149
148
|
insert_statement = Statement(
|
|
150
|
-
|
|
151
|
-
|
|
149
|
+
columns=table.columns,
|
|
150
|
+
subtable=table_name,
|
|
151
|
+
values=event,
|
|
152
152
|
)
|
|
153
153
|
|
|
154
154
|
self.connection.run(
|
mlrun/projects/pipelines.py
CHANGED
|
@@ -471,6 +471,7 @@ class _PipelineRunner(abc.ABC):
|
|
|
471
471
|
namespace=None,
|
|
472
472
|
source=None,
|
|
473
473
|
notifications: typing.Optional[list[mlrun.model.Notification]] = None,
|
|
474
|
+
context: typing.Optional[mlrun.execution.MLClientCtx] = None,
|
|
474
475
|
) -> _PipelineRunStatus:
|
|
475
476
|
pass
|
|
476
477
|
|
|
@@ -595,6 +596,7 @@ class _KFPRunner(_PipelineRunner):
|
|
|
595
596
|
namespace=None,
|
|
596
597
|
source=None,
|
|
597
598
|
notifications: typing.Optional[list[mlrun.model.Notification]] = None,
|
|
599
|
+
context: typing.Optional[mlrun.execution.MLClientCtx] = None,
|
|
598
600
|
) -> _PipelineRunStatus:
|
|
599
601
|
pipeline_context.set(project, workflow_spec)
|
|
600
602
|
workflow_handler = _PipelineRunner._get_handler(
|
|
@@ -644,12 +646,14 @@ class _KFPRunner(_PipelineRunner):
|
|
|
644
646
|
func_name=func.metadata.name,
|
|
645
647
|
exc_info=err_to_str(exc),
|
|
646
648
|
)
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
649
|
+
|
|
650
|
+
# TODO: we should check how can we get the run uid when we don't the the context (for example on
|
|
651
|
+
# mlrun.load_project() and later call directly to project.run)
|
|
652
|
+
if context:
|
|
653
|
+
project.notifiers.push_pipeline_start_message(
|
|
654
|
+
project.metadata.name,
|
|
655
|
+
context.uid,
|
|
656
|
+
)
|
|
653
657
|
pipeline_context.clear()
|
|
654
658
|
return _PipelineRunStatus(run_id, cls, project=project, workflow=workflow_spec)
|
|
655
659
|
|
|
@@ -722,6 +726,7 @@ class _LocalRunner(_PipelineRunner):
|
|
|
722
726
|
namespace=None,
|
|
723
727
|
source=None,
|
|
724
728
|
notifications: typing.Optional[list[mlrun.model.Notification]] = None,
|
|
729
|
+
context: typing.Optional[mlrun.execution.MLClientCtx] = None,
|
|
725
730
|
) -> _PipelineRunStatus:
|
|
726
731
|
pipeline_context.set(project, workflow_spec)
|
|
727
732
|
workflow_handler = _PipelineRunner._get_handler(
|
|
@@ -805,6 +810,7 @@ class _RemoteRunner(_PipelineRunner):
|
|
|
805
810
|
namespace: typing.Optional[str] = None,
|
|
806
811
|
source: typing.Optional[str] = None,
|
|
807
812
|
notifications: typing.Optional[list[mlrun.model.Notification]] = None,
|
|
813
|
+
context: typing.Optional[mlrun.execution.MLClientCtx] = None,
|
|
808
814
|
) -> typing.Optional[_PipelineRunStatus]:
|
|
809
815
|
workflow_name = normalize_workflow_name(name=name, project_name=project.name)
|
|
810
816
|
workflow_id = None
|
|
@@ -1127,6 +1133,7 @@ def load_and_run_workflow(
|
|
|
1127
1133
|
engine=engine,
|
|
1128
1134
|
local=local,
|
|
1129
1135
|
notifications=start_notifications,
|
|
1136
|
+
context=context,
|
|
1130
1137
|
)
|
|
1131
1138
|
context.log_result(key="workflow_id", value=run.run_id)
|
|
1132
1139
|
context.log_result(key="engine", value=run._engine.engine, commit=True)
|
mlrun/projects/project.py
CHANGED
|
@@ -1873,6 +1873,34 @@ class MlrunProject(ModelObj):
|
|
|
1873
1873
|
vector_store: "VectorStore", # noqa: F821
|
|
1874
1874
|
collection_name: Optional[str] = None,
|
|
1875
1875
|
) -> VectorStoreCollection:
|
|
1876
|
+
"""
|
|
1877
|
+
Create a VectorStoreCollection wrapper for a given vector store instance.
|
|
1878
|
+
|
|
1879
|
+
This method wraps a vector store implementation (like Milvus, Chroma) with MLRun
|
|
1880
|
+
integration capabilities. The wrapper provides access to the underlying vector
|
|
1881
|
+
store's functionality while adding MLRun-specific features like document and
|
|
1882
|
+
artifact management.
|
|
1883
|
+
|
|
1884
|
+
Args:
|
|
1885
|
+
vector_store: The vector store instance to wrap (e.g., Milvus, Chroma).
|
|
1886
|
+
This is the underlying implementation that will handle
|
|
1887
|
+
vector storage and retrieval.
|
|
1888
|
+
collection_name: Optional name for the collection. If not provided,
|
|
1889
|
+
will attempt to extract it from the vector_store object
|
|
1890
|
+
by looking for 'collection_name', '_collection_name',
|
|
1891
|
+
'index_name', or '_index_name' attributes.
|
|
1892
|
+
|
|
1893
|
+
Returns:
|
|
1894
|
+
VectorStoreCollection: A wrapped vector store instance with MLRun integration.
|
|
1895
|
+
This wrapper provides both access to the original vector
|
|
1896
|
+
store's capabilities and additional MLRun functionality.
|
|
1897
|
+
|
|
1898
|
+
Example:
|
|
1899
|
+
>>> vector_store = Chroma(embedding_function=embeddings)
|
|
1900
|
+
>>> collection = project.get_vector_store_collection(
|
|
1901
|
+
... vector_store, collection_name="my_collection"
|
|
1902
|
+
... )
|
|
1903
|
+
"""
|
|
1876
1904
|
return VectorStoreCollection(
|
|
1877
1905
|
self,
|
|
1878
1906
|
vector_store,
|
|
@@ -1899,12 +1927,39 @@ class MlrunProject(ModelObj):
|
|
|
1899
1927
|
:param local_path: path to the local file we upload, will also be use
|
|
1900
1928
|
as the destination subpath (under "artifact_path")
|
|
1901
1929
|
:param artifact_path: Target path for artifact storage
|
|
1902
|
-
:param document_loader_spec: Spec to use to load the artifact as langchain document
|
|
1930
|
+
:param document_loader_spec: Spec to use to load the artifact as langchain document.
|
|
1931
|
+
|
|
1932
|
+
By default, uses DocumentLoaderSpec() which initializes with:
|
|
1933
|
+
|
|
1934
|
+
* loader_class_name="langchain_community.document_loaders.TextLoader"
|
|
1935
|
+
* src_name="file_path"
|
|
1936
|
+
* kwargs=None
|
|
1937
|
+
|
|
1938
|
+
Can be customized for different document types, e.g.::
|
|
1939
|
+
|
|
1940
|
+
DocumentLoaderSpec(
|
|
1941
|
+
loader_class_name="langchain_community.document_loaders.PDFLoader",
|
|
1942
|
+
src_name="file_path",
|
|
1943
|
+
kwargs={"extract_images": True}
|
|
1944
|
+
)
|
|
1903
1945
|
:param upload: Whether to upload the artifact
|
|
1904
1946
|
:param labels: Key-value labels
|
|
1905
1947
|
:param target_path: Target file path
|
|
1906
1948
|
:param kwargs: Additional keyword arguments
|
|
1907
1949
|
:return: DocumentArtifact object
|
|
1950
|
+
|
|
1951
|
+
Example:
|
|
1952
|
+
>>> # Log a PDF document with custom loader
|
|
1953
|
+
>>> project.log_document(
|
|
1954
|
+
... key="my_doc",
|
|
1955
|
+
... local_path="path/to/doc.pdf",
|
|
1956
|
+
... document_loader=DocumentLoaderSpec(
|
|
1957
|
+
... loader_class_name="langchain_community.document_loaders.PDFLoader",
|
|
1958
|
+
... src_name="file_path",
|
|
1959
|
+
... kwargs={"extract_images": True},
|
|
1960
|
+
... ),
|
|
1961
|
+
... )
|
|
1962
|
+
|
|
1908
1963
|
"""
|
|
1909
1964
|
doc_artifact = DocumentArtifact(
|
|
1910
1965
|
key=key,
|
|
@@ -2586,6 +2641,24 @@ class MlrunProject(ModelObj):
|
|
|
2586
2641
|
self._set_function(resolved_function_name, tag, function_object, func)
|
|
2587
2642
|
return function_object
|
|
2588
2643
|
|
|
2644
|
+
def push_run_notifications(
|
|
2645
|
+
self,
|
|
2646
|
+
uid,
|
|
2647
|
+
timeout=45,
|
|
2648
|
+
):
|
|
2649
|
+
"""
|
|
2650
|
+
Push notifications for a run.
|
|
2651
|
+
|
|
2652
|
+
:param uid: Unique ID of the run.
|
|
2653
|
+
:returns: :py:class:`~mlrun.common.schemas.BackgroundTask`.
|
|
2654
|
+
"""
|
|
2655
|
+
db = mlrun.db.get_run_db(secrets=self._secrets)
|
|
2656
|
+
return db.push_run_notifications(
|
|
2657
|
+
project=self.name,
|
|
2658
|
+
uid=uid,
|
|
2659
|
+
timeout=timeout,
|
|
2660
|
+
)
|
|
2661
|
+
|
|
2589
2662
|
def _instantiate_function(
|
|
2590
2663
|
self,
|
|
2591
2664
|
func: typing.Union[str, mlrun.runtimes.BaseRuntime] = None,
|
|
@@ -3239,6 +3312,7 @@ class MlrunProject(ModelObj):
|
|
|
3239
3312
|
cleanup_ttl: Optional[int] = None,
|
|
3240
3313
|
notifications: Optional[list[mlrun.model.Notification]] = None,
|
|
3241
3314
|
workflow_runner_node_selector: typing.Optional[dict[str, str]] = None,
|
|
3315
|
+
context: typing.Optional[mlrun.execution.MLClientCtx] = None,
|
|
3242
3316
|
) -> _PipelineRunStatus:
|
|
3243
3317
|
"""Run a workflow using kubeflow pipelines
|
|
3244
3318
|
|
|
@@ -3281,6 +3355,7 @@ class MlrunProject(ModelObj):
|
|
|
3281
3355
|
This allows you to control and specify where the workflow runner pod will be scheduled.
|
|
3282
3356
|
This setting is only relevant when the engine is set to 'remote' or for scheduled workflows,
|
|
3283
3357
|
and it will be ignored if the workflow is not run on a remote engine.
|
|
3358
|
+
:param context: mlrun context.
|
|
3284
3359
|
:returns: ~py:class:`~mlrun.projects.pipelines._PipelineRunStatus` instance
|
|
3285
3360
|
"""
|
|
3286
3361
|
|
|
@@ -3367,6 +3442,7 @@ class MlrunProject(ModelObj):
|
|
|
3367
3442
|
namespace=namespace,
|
|
3368
3443
|
source=source,
|
|
3369
3444
|
notifications=notifications,
|
|
3445
|
+
context=context,
|
|
3370
3446
|
)
|
|
3371
3447
|
# run is None when scheduling
|
|
3372
3448
|
if run and run.state == mlrun_pipelines.common.models.RunStatuses.failed:
|
|
@@ -4443,7 +4519,9 @@ class MlrunProject(ModelObj):
|
|
|
4443
4519
|
last_update_time_to: Optional[datetime.datetime] = None,
|
|
4444
4520
|
**kwargs,
|
|
4445
4521
|
) -> mlrun.lists.RunList:
|
|
4446
|
-
"""Retrieve a list of runs
|
|
4522
|
+
"""Retrieve a list of runs.
|
|
4523
|
+
The default returns the runs from the last week, partitioned by name.
|
|
4524
|
+
To override the default, specify any filter.
|
|
4447
4525
|
|
|
4448
4526
|
The returned result is a `` (list of dict), use `.to_objects()` to convert it to a list of RunObjects,
|
|
4449
4527
|
`.show()` to view graphically in Jupyter, `.to_df()` to convert to a DataFrame, and `compare()` to
|
|
@@ -1036,9 +1036,10 @@ class RemoteRuntime(KubeResource):
|
|
|
1036
1036
|
if args and sidecar.get("command"):
|
|
1037
1037
|
sidecar["args"] = mlrun.utils.helpers.as_list(args)
|
|
1038
1038
|
|
|
1039
|
-
#
|
|
1039
|
+
# put the configured resources on the sidecar container instead of the reverse proxy container
|
|
1040
1040
|
if self.spec.resources:
|
|
1041
1041
|
sidecar["resources"] = self.spec.resources
|
|
1042
|
+
self.spec.resources = None
|
|
1042
1043
|
|
|
1043
1044
|
def _set_sidecar(self, name: str) -> dict:
|
|
1044
1045
|
self.spec.config.setdefault("spec.sidecars", [])
|
mlrun/runtimes/nuclio/serving.py
CHANGED
|
@@ -387,11 +387,16 @@ class ServingRuntime(RemoteRuntime):
|
|
|
387
387
|
:param router_step: router step name (to determine which router we add the model to in graphs
|
|
388
388
|
with multiple router steps)
|
|
389
389
|
:param child_function: child function name, when the model runs in a child function
|
|
390
|
-
:param creation_strategy:
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
390
|
+
:param creation_strategy: Strategy for creating or updating the model endpoint:
|
|
391
|
+
* **overwrite**:
|
|
392
|
+
1. If model endpoints with the same name exist, delete the `latest` one.
|
|
393
|
+
2. Create a new model endpoint entry and set it as `latest`.
|
|
394
|
+
* **inplace** (default):
|
|
395
|
+
1. If model endpoints with the same name exist, update the `latest` entry.
|
|
396
|
+
2. Otherwise, create a new entry.
|
|
397
|
+
* **archive**:
|
|
398
|
+
1. If model endpoints with the same name exist, preserve them.
|
|
399
|
+
2. Create a new model endpoint with the same name and set it to `latest`.
|
|
395
400
|
:param class_args: extra kwargs to pass to the model serving class __init__
|
|
396
401
|
(can be read in the model using .get_param(key) method)
|
|
397
402
|
"""
|
mlrun/serving/routers.py
CHANGED
|
@@ -619,7 +619,10 @@ class VotingEnsemble(ParallelRun):
|
|
|
619
619
|
|
|
620
620
|
if not self.context.is_mock or self.context.monitoring_mock:
|
|
621
621
|
self.model_endpoint_uid = _init_endpoint_record(
|
|
622
|
-
server,
|
|
622
|
+
server,
|
|
623
|
+
self,
|
|
624
|
+
creation_strategy=kwargs.get("creation_strategy"),
|
|
625
|
+
endpoint_type=kwargs.get("endpoint_type"),
|
|
623
626
|
)
|
|
624
627
|
|
|
625
628
|
self._update_weights(self.weights)
|
|
@@ -1004,7 +1007,7 @@ class VotingEnsemble(ParallelRun):
|
|
|
1004
1007
|
def _init_endpoint_record(
|
|
1005
1008
|
graph_server: GraphServer,
|
|
1006
1009
|
voting_ensemble: VotingEnsemble,
|
|
1007
|
-
creation_strategy:
|
|
1010
|
+
creation_strategy: mlrun.common.schemas.ModelEndpointCreationStrategy,
|
|
1008
1011
|
endpoint_type: mlrun.common.schemas.EndpointType,
|
|
1009
1012
|
) -> Union[str, None]:
|
|
1010
1013
|
"""
|
|
@@ -1015,11 +1018,17 @@ def _init_endpoint_record(
|
|
|
1015
1018
|
:param graph_server: A GraphServer object which will be used for getting the function uri.
|
|
1016
1019
|
:param voting_ensemble: Voting ensemble serving class. It contains important details for the model endpoint record
|
|
1017
1020
|
such as model name, model path, model version, and the ids of the children model endpoints.
|
|
1018
|
-
:param creation_strategy:
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1021
|
+
:param creation_strategy: Strategy for creating or updating the model endpoint:
|
|
1022
|
+
* **overwrite**:
|
|
1023
|
+
1. If model endpoints with the same name exist, delete the `latest` one.
|
|
1024
|
+
2. Create a new model endpoint entry and set it as `latest`.
|
|
1025
|
+
* **inplace** (default):
|
|
1026
|
+
1. If model endpoints with the same name exist, update the `latest` entry.
|
|
1027
|
+
2. Otherwise, create a new entry.
|
|
1028
|
+
* **archive**:
|
|
1029
|
+
1. If model endpoints with the same name exist, preserve them.
|
|
1030
|
+
2. Create a new model endpoint with the same name and set it to `latest`.
|
|
1031
|
+
|
|
1023
1032
|
:param endpoint_type: model endpoint type
|
|
1024
1033
|
:return: Model endpoint unique ID.
|
|
1025
1034
|
"""
|