mlrun 1.8.0rc27__py3-none-any.whl → 1.8.0rc28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/document.py +9 -6
- mlrun/common/schemas/serving.py +22 -0
- mlrun/config.py +9 -0
- mlrun/datastore/base.py +0 -7
- mlrun/datastore/s3.py +9 -2
- mlrun/db/base.py +0 -1
- mlrun/db/httpdb.py +5 -10
- mlrun/db/nopdb.py +0 -1
- mlrun/execution.py +15 -4
- mlrun/model_monitoring/applications/_application_steps.py +1 -0
- mlrun/model_monitoring/applications/base.py +132 -21
- mlrun/model_monitoring/applications/context.py +2 -3
- mlrun/model_monitoring/controller.py +117 -57
- mlrun/model_monitoring/db/_schedules.py +8 -0
- mlrun/model_monitoring/db/tsdb/__init__.py +12 -5
- mlrun/model_monitoring/stream_processing.py +3 -2
- mlrun/projects/project.py +38 -7
- mlrun/runtimes/base.py +1 -1
- mlrun/runtimes/generators.py +1 -1
- mlrun/runtimes/nuclio/function.py +37 -0
- mlrun/runtimes/nuclio/serving.py +3 -0
- mlrun/runtimes/pod.py +1 -3
- mlrun/serving/routers.py +62 -17
- mlrun/serving/server.py +11 -0
- mlrun/serving/states.py +0 -4
- mlrun/serving/v2_serving.py +45 -10
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.8.0rc27.dist-info → mlrun-1.8.0rc28.dist-info}/METADATA +4 -2
- {mlrun-1.8.0rc27.dist-info → mlrun-1.8.0rc28.dist-info}/RECORD +33 -32
- {mlrun-1.8.0rc27.dist-info → mlrun-1.8.0rc28.dist-info}/LICENSE +0 -0
- {mlrun-1.8.0rc27.dist-info → mlrun-1.8.0rc28.dist-info}/WHEEL +0 -0
- {mlrun-1.8.0rc27.dist-info → mlrun-1.8.0rc28.dist-info}/entry_points.txt +0 -0
- {mlrun-1.8.0rc27.dist-info → mlrun-1.8.0rc28.dist-info}/top_level.txt +0 -0
mlrun/artifacts/document.py
CHANGED
|
@@ -34,7 +34,9 @@ class DocumentLoaderSpec(ModelObj):
|
|
|
34
34
|
|
|
35
35
|
This class is responsible for loading documents from a given source path using a specified loader class.
|
|
36
36
|
The loader class is dynamically imported and instantiated with the provided arguments. The loaded documents
|
|
37
|
-
can be optionally uploaded as artifacts.
|
|
37
|
+
can be optionally uploaded as artifacts. Note that only loader classes that return single results
|
|
38
|
+
(e.g., TextLoader, UnstructuredHTMLLoader, WebBaseLoader(scalar)) are supported - loaders returning multiple
|
|
39
|
+
results like DirectoryLoader or WebBaseLoader(list) are not compatible.
|
|
38
40
|
|
|
39
41
|
Attributes:
|
|
40
42
|
loader_class_name (str): The name of the loader class to use for loading documents.
|
|
@@ -61,7 +63,7 @@ class DocumentLoaderSpec(ModelObj):
|
|
|
61
63
|
kwargs (Optional[dict]): Additional keyword arguments to pass to the loader class.
|
|
62
64
|
download_object (bool, optional): If True, the file will be downloaded before launching
|
|
63
65
|
the loader. If False, the loader accepts a link that should not be downloaded.
|
|
64
|
-
Defaults to
|
|
66
|
+
Defaults to True.
|
|
65
67
|
Example:
|
|
66
68
|
>>> # Create a loader specification for PDF documents
|
|
67
69
|
>>> loader_spec = DocumentLoaderSpec(
|
|
@@ -97,7 +99,7 @@ class MLRunLoader:
|
|
|
97
99
|
Args:
|
|
98
100
|
artifact_key (str, optional): The key for the artifact to be logged. Special characters and symbols
|
|
99
101
|
not valid in artifact names will be encoded as their hexadecimal representation. The '%%' pattern
|
|
100
|
-
in the key will be replaced by the hex-encoded version of the source path. Defaults to "
|
|
102
|
+
in the key will be replaced by the hex-encoded version of the source path. Defaults to "%%".
|
|
101
103
|
local_path (str): The source path of the document to be loaded.
|
|
102
104
|
loader_spec (DocumentLoaderSpec): Specification for the document loader.
|
|
103
105
|
producer (Optional[Union[MlrunProject, str, MLClientCtx]], optional): The producer of the document.
|
|
@@ -129,7 +131,7 @@ class MLRunLoader:
|
|
|
129
131
|
>>> loader = MLRunLoader(
|
|
130
132
|
... source_path="/path/to/document.txt",
|
|
131
133
|
... loader_spec=loader_spec,
|
|
132
|
-
... artifact_key="
|
|
134
|
+
... artifact_key="%%", # %% will be replaced with encoded path
|
|
133
135
|
... producer=project,
|
|
134
136
|
... )
|
|
135
137
|
>>> documents = loader.load()
|
|
@@ -141,7 +143,7 @@ class MLRunLoader:
|
|
|
141
143
|
... loader_cls=MLRunLoader,
|
|
142
144
|
... loader_kwargs={
|
|
143
145
|
... "loader_spec": loader_spec,
|
|
144
|
-
... "artifact_key": "
|
|
146
|
+
... "artifact_key": "%%",
|
|
145
147
|
... "producer": project,
|
|
146
148
|
... "upload": True,
|
|
147
149
|
... },
|
|
@@ -154,7 +156,7 @@ class MLRunLoader:
|
|
|
154
156
|
cls,
|
|
155
157
|
source_path: str,
|
|
156
158
|
loader_spec: "DocumentLoaderSpec",
|
|
157
|
-
artifact_key="
|
|
159
|
+
artifact_key="%%",
|
|
158
160
|
producer: Optional[Union["MlrunProject", str, "MLClientCtx"]] = None, # noqa: F821
|
|
159
161
|
upload: bool = False,
|
|
160
162
|
tag: str = "",
|
|
@@ -271,6 +273,7 @@ class DocumentArtifact(Artifact):
|
|
|
271
273
|
result.append("_")
|
|
272
274
|
|
|
273
275
|
resolved_path = "".join(result)
|
|
276
|
+
resolved_path = resolved_path.lstrip("_")
|
|
274
277
|
return resolved_path
|
|
275
278
|
|
|
276
279
|
class DocumentArtifactSpec(ArtifactSpec):
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Copyright 2025 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from pydantic.v1 import BaseModel
|
|
16
|
+
|
|
17
|
+
from .background_task import BackgroundTaskList
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class DeployResponse(BaseModel):
|
|
21
|
+
data: dict
|
|
22
|
+
background_tasks: BackgroundTaskList
|
mlrun/config.py
CHANGED
|
@@ -232,6 +232,7 @@ default_config = {
|
|
|
232
232
|
"abort_grace_period": "10",
|
|
233
233
|
"delete_project": "900",
|
|
234
234
|
"delete_function": "900",
|
|
235
|
+
"model_endpoint_creation": "600",
|
|
235
236
|
},
|
|
236
237
|
"runtimes": {"dask": "600"},
|
|
237
238
|
"push_notifications": "60",
|
|
@@ -870,6 +871,14 @@ class Config:
|
|
|
870
871
|
return self.__class__(val)
|
|
871
872
|
return val
|
|
872
873
|
|
|
874
|
+
def __deepcopy__(self, memo):
|
|
875
|
+
cls = self.__class__
|
|
876
|
+
# create a new Config without calling __init__ (avoid recursion)
|
|
877
|
+
result = cls.__new__(cls)
|
|
878
|
+
# manually deep-copy _cfg
|
|
879
|
+
object.__setattr__(result, "_cfg", copy.deepcopy(self._cfg, memo))
|
|
880
|
+
return result
|
|
881
|
+
|
|
873
882
|
def __setattr__(self, attr, value):
|
|
874
883
|
# in order for the dbpath setter to work
|
|
875
884
|
if attr == "dbpath":
|
mlrun/datastore/base.py
CHANGED
|
@@ -677,13 +677,6 @@ class DataItem:
|
|
|
677
677
|
return f"'{self.url}'"
|
|
678
678
|
|
|
679
679
|
|
|
680
|
-
def get_range(size, offset):
|
|
681
|
-
byterange = f"bytes={offset}-"
|
|
682
|
-
if size:
|
|
683
|
-
byterange += str(offset + size)
|
|
684
|
-
return byterange
|
|
685
|
-
|
|
686
|
-
|
|
687
680
|
def basic_auth_header(user, password):
|
|
688
681
|
username = user.encode("latin1")
|
|
689
682
|
password = password.encode("latin1")
|
mlrun/datastore/s3.py
CHANGED
|
@@ -21,7 +21,7 @@ from fsspec.registry import get_filesystem_class
|
|
|
21
21
|
|
|
22
22
|
import mlrun.errors
|
|
23
23
|
|
|
24
|
-
from .base import DataStore, FileStats,
|
|
24
|
+
from .base import DataStore, FileStats, make_datastore_schema_sanitizer
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
class S3Store(DataStore):
|
|
@@ -108,6 +108,13 @@ class S3Store(DataStore):
|
|
|
108
108
|
"choose-signer.s3.*", disable_signing
|
|
109
109
|
)
|
|
110
110
|
|
|
111
|
+
@staticmethod
|
|
112
|
+
def get_range(size, offset):
|
|
113
|
+
byterange = f"bytes={offset}-"
|
|
114
|
+
if size:
|
|
115
|
+
byterange += str(offset + size - 1)
|
|
116
|
+
return byterange
|
|
117
|
+
|
|
111
118
|
def get_spark_options(self):
|
|
112
119
|
res = {}
|
|
113
120
|
st = self.get_storage_options()
|
|
@@ -185,7 +192,7 @@ class S3Store(DataStore):
|
|
|
185
192
|
bucket, key = self.get_bucket_and_key(key)
|
|
186
193
|
obj = self.s3.Object(bucket, key)
|
|
187
194
|
if size or offset:
|
|
188
|
-
return obj.get(Range=get_range(size, offset))["Body"].read()
|
|
195
|
+
return obj.get(Range=S3Store.get_range(size, offset))["Body"].read()
|
|
189
196
|
return obj.get()["Body"].read()
|
|
190
197
|
|
|
191
198
|
def put(self, key, data, append=False):
|
mlrun/db/base.py
CHANGED
|
@@ -1077,7 +1077,6 @@ class RunDBInterface(ABC):
|
|
|
1077
1077
|
base_period: int = 10,
|
|
1078
1078
|
image: str = "mlrun/mlrun",
|
|
1079
1079
|
deploy_histogram_data_drift_app: bool = True,
|
|
1080
|
-
rebuild_images: bool = False,
|
|
1081
1080
|
fetch_credentials_from_sys_config: bool = False,
|
|
1082
1081
|
) -> None:
|
|
1083
1082
|
pass
|
mlrun/db/httpdb.py
CHANGED
|
@@ -2374,9 +2374,9 @@ class HTTPRunDB(RunDBInterface):
|
|
|
2374
2374
|
def retry_pipeline(
|
|
2375
2375
|
self,
|
|
2376
2376
|
run_id: str,
|
|
2377
|
+
project: str,
|
|
2377
2378
|
namespace: Optional[str] = None,
|
|
2378
2379
|
timeout: int = 30,
|
|
2379
|
-
project: Optional[str] = None,
|
|
2380
2380
|
):
|
|
2381
2381
|
"""
|
|
2382
2382
|
Retry a specific pipeline run using its run ID. This function sends an API request
|
|
@@ -2386,8 +2386,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
2386
2386
|
:param run_id: The unique ID of the pipeline run to retry.
|
|
2387
2387
|
:param namespace: Kubernetes namespace where the pipeline is running. Optional.
|
|
2388
2388
|
:param timeout: Timeout (in seconds) for the API call. Defaults to 30 seconds.
|
|
2389
|
-
:param project: Name of the MLRun project associated with the pipeline.
|
|
2390
|
-
``*`` to query across all projects. Optional.
|
|
2389
|
+
:param project: Name of the MLRun project associated with the pipeline.
|
|
2391
2390
|
|
|
2392
2391
|
:raises ValueError: Raised if the API response is not successful or contains an
|
|
2393
2392
|
error.
|
|
@@ -2398,14 +2397,13 @@ class HTTPRunDB(RunDBInterface):
|
|
|
2398
2397
|
params = {}
|
|
2399
2398
|
if namespace:
|
|
2400
2399
|
params["namespace"] = namespace
|
|
2401
|
-
project_path = project if project else "*"
|
|
2402
2400
|
|
|
2403
2401
|
resp_text = ""
|
|
2404
2402
|
resp_code = None
|
|
2405
2403
|
try:
|
|
2406
2404
|
resp = self.api_call(
|
|
2407
2405
|
"POST",
|
|
2408
|
-
f"projects/{
|
|
2406
|
+
f"projects/{project}/pipelines/{run_id}/retry",
|
|
2409
2407
|
params=params,
|
|
2410
2408
|
timeout=timeout,
|
|
2411
2409
|
)
|
|
@@ -2420,7 +2418,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
2420
2418
|
logger.error(
|
|
2421
2419
|
"Retry pipeline API call encountered an error.",
|
|
2422
2420
|
run_id=run_id,
|
|
2423
|
-
project=
|
|
2421
|
+
project=project,
|
|
2424
2422
|
namespace=namespace,
|
|
2425
2423
|
response_code=resp_code,
|
|
2426
2424
|
response_text=resp_text,
|
|
@@ -2435,7 +2433,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
2435
2433
|
logger.info(
|
|
2436
2434
|
"Successfully retried pipeline run",
|
|
2437
2435
|
run_id=run_id,
|
|
2438
|
-
project=
|
|
2436
|
+
project=project,
|
|
2439
2437
|
namespace=namespace,
|
|
2440
2438
|
)
|
|
2441
2439
|
return resp.json()
|
|
@@ -3973,7 +3971,6 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3973
3971
|
base_period: int = 10,
|
|
3974
3972
|
image: str = "mlrun/mlrun",
|
|
3975
3973
|
deploy_histogram_data_drift_app: bool = True,
|
|
3976
|
-
rebuild_images: bool = False,
|
|
3977
3974
|
fetch_credentials_from_sys_config: bool = False,
|
|
3978
3975
|
) -> None:
|
|
3979
3976
|
"""
|
|
@@ -3991,7 +3988,6 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3991
3988
|
stream functions, which are real time nuclio functions.
|
|
3992
3989
|
By default, the image is mlrun/mlrun.
|
|
3993
3990
|
:param deploy_histogram_data_drift_app: If true, deploy the default histogram-based data drift application.
|
|
3994
|
-
:param rebuild_images: If true, force rebuild of model monitoring infrastructure images.
|
|
3995
3991
|
:param fetch_credentials_from_sys_config: If true, fetch the credentials from the system configuration.
|
|
3996
3992
|
|
|
3997
3993
|
"""
|
|
@@ -4002,7 +3998,6 @@ class HTTPRunDB(RunDBInterface):
|
|
|
4002
3998
|
"base_period": base_period,
|
|
4003
3999
|
"image": image,
|
|
4004
4000
|
"deploy_histogram_data_drift_app": deploy_histogram_data_drift_app,
|
|
4005
|
-
"rebuild_images": rebuild_images,
|
|
4006
4001
|
"fetch_credentials_from_sys_config": fetch_credentials_from_sys_config,
|
|
4007
4002
|
},
|
|
4008
4003
|
)
|
mlrun/db/nopdb.py
CHANGED
mlrun/execution.py
CHANGED
|
@@ -914,7 +914,8 @@ class MLClientCtx:
|
|
|
914
914
|
kwargs={"extract_images": True}
|
|
915
915
|
)
|
|
916
916
|
:param upload: Whether to upload the artifact
|
|
917
|
-
:param labels:
|
|
917
|
+
:param labels: Key-value labels. A 'source' label is automatically added using either
|
|
918
|
+
local_path or target_path to facilitate easier document searching.
|
|
918
919
|
:param target_path: Path to the local file
|
|
919
920
|
:param db_key: The key to use in the artifact DB table, by default its run name + '_' + key
|
|
920
921
|
db_key=False will not register it in the artifacts table
|
|
@@ -932,22 +933,32 @@ class MLClientCtx:
|
|
|
932
933
|
... ),
|
|
933
934
|
... )
|
|
934
935
|
"""
|
|
936
|
+
original_source = local_path or target_path
|
|
935
937
|
|
|
936
|
-
if not key and not
|
|
938
|
+
if not key and not original_source:
|
|
937
939
|
raise ValueError(
|
|
938
940
|
"Must provide either 'key' parameter or 'local_path'/'target_path' to derive the key from"
|
|
939
941
|
)
|
|
940
942
|
if not key:
|
|
941
|
-
key = DocumentArtifact.key_from_source(
|
|
943
|
+
key = DocumentArtifact.key_from_source(original_source)
|
|
942
944
|
|
|
943
945
|
doc_artifact = DocumentArtifact(
|
|
944
946
|
key=key,
|
|
945
|
-
original_source=
|
|
947
|
+
original_source=original_source,
|
|
946
948
|
document_loader_spec=document_loader_spec,
|
|
947
949
|
collections=kwargs.pop("collections", None),
|
|
948
950
|
**kwargs,
|
|
949
951
|
)
|
|
950
952
|
|
|
953
|
+
# limit label to a max of 255 characters (for db reasons)
|
|
954
|
+
max_length = 255
|
|
955
|
+
labels = labels or {}
|
|
956
|
+
labels["source"] = (
|
|
957
|
+
original_source[: max_length - 3] + "..."
|
|
958
|
+
if len(original_source) > max_length
|
|
959
|
+
else original_source
|
|
960
|
+
)
|
|
961
|
+
|
|
951
962
|
item = self._artifacts_manager.log_artifact(
|
|
952
963
|
self,
|
|
953
964
|
doc_artifact,
|
|
@@ -126,6 +126,7 @@ class _PrepareMonitoringEvent(StepToDict):
|
|
|
126
126
|
:param application_name: Application name.
|
|
127
127
|
"""
|
|
128
128
|
self.graph_context = context
|
|
129
|
+
_ = self.graph_context.project_obj # Ensure project exists
|
|
129
130
|
self.application_name = application_name
|
|
130
131
|
self.model_endpoints: dict[str, mlrun.common.schemas.ModelEndpoint] = {}
|
|
131
132
|
|
|
@@ -215,6 +215,116 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
215
215
|
)
|
|
216
216
|
function.deploy()
|
|
217
217
|
|
|
218
|
+
@classmethod
|
|
219
|
+
def get_job_handler(cls, handler_to_class: str) -> str:
|
|
220
|
+
"""
|
|
221
|
+
A helper function to get the handler to the application job ``_handler``.
|
|
222
|
+
|
|
223
|
+
:param handler_to_class: The handler to the application class, e.g. ``my_package.sub_module1.MonitoringApp1``.
|
|
224
|
+
:returns: The handler to the job of the application class.
|
|
225
|
+
"""
|
|
226
|
+
return f"{handler_to_class}::{cls._handler.__name__}"
|
|
227
|
+
|
|
228
|
+
@classmethod
|
|
229
|
+
def to_job(
|
|
230
|
+
cls,
|
|
231
|
+
*,
|
|
232
|
+
class_handler: Optional[str] = None,
|
|
233
|
+
func_path: Optional[str] = None,
|
|
234
|
+
func_name: Optional[str] = None,
|
|
235
|
+
tag: Optional[str] = None,
|
|
236
|
+
image: Optional[str] = None,
|
|
237
|
+
with_repo: Optional[bool] = False,
|
|
238
|
+
requirements: Optional[Union[str, list[str]]] = None,
|
|
239
|
+
requirements_file: str = "",
|
|
240
|
+
project: Optional["mlrun.MlrunProject"] = None,
|
|
241
|
+
) -> mlrun.runtimes.KubejobRuntime:
|
|
242
|
+
"""
|
|
243
|
+
Get the application's :py:meth:`~mlrun.model_monitoring.applications.ModelMonitoringApplicationBase.do_tracking`
|
|
244
|
+
model monitoring logic as a :py:class:`~mlrun.runtimes.KubejobRuntime`.
|
|
245
|
+
|
|
246
|
+
The returned job can be run as any MLRun job with the relevant inputs and params to your application:
|
|
247
|
+
|
|
248
|
+
.. code-block:: python
|
|
249
|
+
|
|
250
|
+
job = ModelMonitoringApplicationBase.to_job(
|
|
251
|
+
class_handler="package.module.AppClass"
|
|
252
|
+
)
|
|
253
|
+
job.run(inputs={}, params={}, local=False) # Add the relevant inputs and params
|
|
254
|
+
|
|
255
|
+
Optional inputs:
|
|
256
|
+
|
|
257
|
+
* ``sample_data``, ``pd.DataFrame``
|
|
258
|
+
* ``reference_data``, ``pd.DataFrame``
|
|
259
|
+
|
|
260
|
+
Optional params:
|
|
261
|
+
|
|
262
|
+
* ``endpoints``, ``list[tuple[str, str]]``
|
|
263
|
+
* ``start``, ``datetime``
|
|
264
|
+
* ``end``, ``datetime``
|
|
265
|
+
* ``base_period``, ``int``
|
|
266
|
+
|
|
267
|
+
For Git sources, add the source archive to the returned job and change the handler:
|
|
268
|
+
|
|
269
|
+
.. code-block:: python
|
|
270
|
+
|
|
271
|
+
handler = ModelMonitoringApplicationBase.get_job_handler("module.AppClass")
|
|
272
|
+
job.with_source_archive(
|
|
273
|
+
"git://github.com/owner/repo.git#branch-category/specific-task",
|
|
274
|
+
workdir="path/to/application/folder",
|
|
275
|
+
handler=handler,
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
:param class_handler: The handler to the class, e.g. ``path.to.module::MonitoringApplication``,
|
|
279
|
+
useful when using Git sources or code from images.
|
|
280
|
+
If ``None``, the current class, deriving from
|
|
281
|
+
:py:class:`~mlrun.model_monitoring.applications.ModelMonitoringApplicationBase`,
|
|
282
|
+
is used.
|
|
283
|
+
:param func_path: The path to the function. If ``None``, the current notebook is used.
|
|
284
|
+
:param func_name: The name of the function. If not ``None``, the class name is used.
|
|
285
|
+
:param tag: Tag for the function.
|
|
286
|
+
:param image: Docker image to run the job on (when running remotely).
|
|
287
|
+
:param with_repo: Whether to clone the current repo to the build source.
|
|
288
|
+
:param requirements: List of Python requirements to be installed in the image.
|
|
289
|
+
:param requirements_file: Path to a Python requirements file to be installed in the image.
|
|
290
|
+
:param project: The current project to set the function to. If not set, the current project is used.
|
|
291
|
+
|
|
292
|
+
:returns: The :py:class:`~mlrun.runtimes.KubejobRuntime` job that wraps the model monitoring application's
|
|
293
|
+
logic.
|
|
294
|
+
"""
|
|
295
|
+
project = project or cast("mlrun.MlrunProject", mlrun.get_current_project())
|
|
296
|
+
|
|
297
|
+
if not class_handler and cls == ModelMonitoringApplicationBase:
|
|
298
|
+
raise ValueError(
|
|
299
|
+
"You must provide a handler to the model monitoring application class"
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
handler_to_class = class_handler or cls.__name__
|
|
303
|
+
handler = cls.get_job_handler(handler_to_class)
|
|
304
|
+
|
|
305
|
+
if not class_handler:
|
|
306
|
+
class_name = cls.__name__
|
|
307
|
+
else:
|
|
308
|
+
class_name = handler_to_class.split(".")[-1].split("::")[-1]
|
|
309
|
+
|
|
310
|
+
job_name = func_name if func_name else class_name
|
|
311
|
+
|
|
312
|
+
job = cast(
|
|
313
|
+
mlrun.runtimes.KubejobRuntime,
|
|
314
|
+
project.set_function(
|
|
315
|
+
func=func_path,
|
|
316
|
+
name=job_name,
|
|
317
|
+
kind=mlrun.runtimes.KubejobRuntime.kind,
|
|
318
|
+
handler=handler,
|
|
319
|
+
tag=tag,
|
|
320
|
+
image=image,
|
|
321
|
+
with_repo=with_repo,
|
|
322
|
+
requirements=requirements,
|
|
323
|
+
requirements_file=requirements_file,
|
|
324
|
+
),
|
|
325
|
+
)
|
|
326
|
+
return job
|
|
327
|
+
|
|
218
328
|
@classmethod
|
|
219
329
|
def evaluate(
|
|
220
330
|
cls,
|
|
@@ -223,10 +333,12 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
223
333
|
*,
|
|
224
334
|
tag: Optional[str] = None,
|
|
225
335
|
run_local: bool = True,
|
|
336
|
+
auto_build: bool = True,
|
|
226
337
|
sample_data: Optional[pd.DataFrame] = None,
|
|
227
338
|
reference_data: Optional[pd.DataFrame] = None,
|
|
228
339
|
image: Optional[str] = None,
|
|
229
340
|
with_repo: Optional[bool] = False,
|
|
341
|
+
class_handler: Optional[str] = None,
|
|
230
342
|
requirements: Optional[Union[str, list[str]]] = None,
|
|
231
343
|
requirements_file: str = "",
|
|
232
344
|
endpoints: Optional[list[tuple[str, str]]] = None,
|
|
@@ -239,19 +351,21 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
239
351
|
:py:meth:`~mlrun.model_monitoring.applications.ModelMonitoringApplicationBase.do_tracking`
|
|
240
352
|
model monitoring logic as a :py:class:`~mlrun.runtimes.KubejobRuntime`, which is an MLRun function.
|
|
241
353
|
|
|
242
|
-
This
|
|
354
|
+
This function has default values for all of its arguments. You should be change them when you want to pass
|
|
243
355
|
data to the application.
|
|
244
356
|
|
|
245
357
|
:param func_path: The path to the function. If ``None``, the current notebook is used.
|
|
246
358
|
:param func_name: The name of the function. If not ``None``, the class name is used.
|
|
247
359
|
:param tag: Tag for the function.
|
|
248
360
|
:param run_local: Whether to run the function locally or remotely.
|
|
361
|
+
:param auto_build: Whether to auto build the function.
|
|
249
362
|
:param sample_data: Pandas data-frame as the current dataset.
|
|
250
363
|
When set, it replaces the data read from the model endpoint's offline source.
|
|
251
364
|
:param reference_data: Pandas data-frame of the reference dataset.
|
|
252
365
|
When set, its statistics override the model endpoint's feature statistics.
|
|
253
|
-
:param image: Docker image to run the job on.
|
|
366
|
+
:param image: Docker image to run the job on (when running remotely).
|
|
254
367
|
:param with_repo: Whether to clone the current repo to the build source.
|
|
368
|
+
:param class_handler: The relative path to the class, useful when using Git sources or code from images.
|
|
255
369
|
:param requirements: List of Python requirements to be installed in the image.
|
|
256
370
|
:param requirements_file: Path to a Python requirements file to be installed in the image.
|
|
257
371
|
:param endpoints: A list of tuples of the model endpoint (name, uid) to get the data from.
|
|
@@ -268,23 +382,17 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
268
382
|
method with the given parameters and inputs, wrapped in a :py:class:`~mlrun.model.RunObject`.
|
|
269
383
|
"""
|
|
270
384
|
project = cast("mlrun.MlrunProject", mlrun.get_current_project())
|
|
271
|
-
class_name = cls.__name__
|
|
272
|
-
job_name = func_name if func_name is not None else class_name
|
|
273
|
-
handler = f"{class_name}::{cls._handler.__name__}"
|
|
274
385
|
|
|
275
|
-
job =
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
requirements=requirements,
|
|
286
|
-
requirements_file=requirements_file,
|
|
287
|
-
),
|
|
386
|
+
job = cls.to_job(
|
|
387
|
+
func_path=func_path,
|
|
388
|
+
func_name=func_name,
|
|
389
|
+
class_handler=class_handler,
|
|
390
|
+
tag=tag,
|
|
391
|
+
image=image,
|
|
392
|
+
with_repo=with_repo,
|
|
393
|
+
requirements=requirements,
|
|
394
|
+
requirements_file=requirements_file,
|
|
395
|
+
project=project,
|
|
288
396
|
)
|
|
289
397
|
|
|
290
398
|
params: dict[str, Union[list[tuple[str, str]], datetime, int, None]] = {}
|
|
@@ -305,18 +413,21 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
305
413
|
(reference_data, "reference_data"),
|
|
306
414
|
]:
|
|
307
415
|
if data is not None:
|
|
308
|
-
key = f"{
|
|
416
|
+
key = f"{job.metadata.name}_{identifier}"
|
|
309
417
|
inputs[identifier] = project.log_dataset(
|
|
310
418
|
key,
|
|
311
419
|
data,
|
|
312
420
|
labels={
|
|
313
421
|
mlrun_constants.MLRunInternalLabels.runner_pod: socket.gethostname(),
|
|
314
422
|
mlrun_constants.MLRunInternalLabels.producer_type: "model-monitoring-job",
|
|
315
|
-
mlrun_constants.MLRunInternalLabels.app_name:
|
|
423
|
+
mlrun_constants.MLRunInternalLabels.app_name: func_name
|
|
424
|
+
or cls.__name__,
|
|
316
425
|
},
|
|
317
426
|
).uri
|
|
318
427
|
|
|
319
|
-
run_result = job.run(
|
|
428
|
+
run_result = job.run(
|
|
429
|
+
local=run_local, auto_build=auto_build, params=params, inputs=inputs
|
|
430
|
+
)
|
|
320
431
|
return run_result
|
|
321
432
|
|
|
322
433
|
@abstractmethod
|
|
@@ -169,9 +169,8 @@ class MonitoringApplicationContext:
|
|
|
169
169
|
sample_df: Optional[pd.DataFrame] = None,
|
|
170
170
|
feature_stats: Optional[FeatureStats] = None,
|
|
171
171
|
) -> "MonitoringApplicationContext":
|
|
172
|
-
project = mlrun.load_project(url=graph_context.project)
|
|
173
172
|
nuclio_logger = graph_context.logger
|
|
174
|
-
artifacts_logger =
|
|
173
|
+
artifacts_logger = graph_context.project_obj
|
|
175
174
|
logger = mlrun.utils.create_logger(
|
|
176
175
|
level=mlrun.mlconf.log_level,
|
|
177
176
|
formatter_kind=mlrun.mlconf.log_formatter,
|
|
@@ -180,7 +179,7 @@ class MonitoringApplicationContext:
|
|
|
180
179
|
return cls(
|
|
181
180
|
application_name=application_name,
|
|
182
181
|
event=event,
|
|
183
|
-
project=
|
|
182
|
+
project=graph_context.project_obj,
|
|
184
183
|
model_endpoint_dict=model_endpoint_dict,
|
|
185
184
|
logger=logger,
|
|
186
185
|
nuclio_logger=nuclio_logger,
|