mlrun 1.8.0rc19__py3-none-any.whl → 1.8.0rc26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +37 -3
- mlrun/__main__.py +5 -0
- mlrun/alerts/alert.py +1 -0
- mlrun/artifacts/document.py +78 -36
- mlrun/common/formatters/feature_set.py +1 -0
- mlrun/common/runtimes/constants.py +17 -0
- mlrun/common/schemas/alert.py +3 -0
- mlrun/common/schemas/client_spec.py +0 -1
- mlrun/common/schemas/model_monitoring/constants.py +32 -9
- mlrun/common/schemas/model_monitoring/model_endpoints.py +2 -0
- mlrun/common/schemas/workflow.py +1 -0
- mlrun/config.py +39 -6
- mlrun/datastore/datastore_profile.py +58 -16
- mlrun/datastore/sources.py +7 -1
- mlrun/datastore/vectorstore.py +20 -1
- mlrun/db/base.py +20 -0
- mlrun/db/httpdb.py +97 -10
- mlrun/db/nopdb.py +19 -0
- mlrun/errors.py +4 -0
- mlrun/execution.py +15 -6
- mlrun/frameworks/_common/model_handler.py +0 -2
- mlrun/launcher/client.py +2 -2
- mlrun/launcher/local.py +5 -1
- mlrun/model_monitoring/applications/_application_steps.py +3 -1
- mlrun/model_monitoring/controller.py +266 -103
- mlrun/model_monitoring/db/tsdb/__init__.py +11 -23
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +2 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +20 -21
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +35 -34
- mlrun/model_monitoring/helpers.py +16 -10
- mlrun/model_monitoring/stream_processing.py +106 -35
- mlrun/package/context_handler.py +1 -1
- mlrun/package/packagers_manager.py +4 -18
- mlrun/projects/pipelines.py +18 -5
- mlrun/projects/project.py +156 -39
- mlrun/runtimes/nuclio/serving.py +22 -13
- mlrun/runtimes/sparkjob/spark3job.py +1 -1
- mlrun/secrets.py +1 -1
- mlrun/serving/server.py +11 -3
- mlrun/serving/states.py +65 -8
- mlrun/serving/v2_serving.py +67 -44
- mlrun/utils/helpers.py +111 -23
- mlrun/utils/notifications/notification/base.py +6 -1
- mlrun/utils/notifications/notification/slack.py +5 -1
- mlrun/utils/notifications/notification_pusher.py +67 -36
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/METADATA +33 -16
- {mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/RECORD +52 -52
- {mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/WHEEL +1 -1
- {mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/LICENSE +0 -0
- {mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/entry_points.txt +0 -0
- {mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/top_level.txt +0 -0
mlrun/projects/project.py
CHANGED
|
@@ -29,6 +29,7 @@ import zipfile
|
|
|
29
29
|
from copy import deepcopy
|
|
30
30
|
from os import environ, makedirs, path
|
|
31
31
|
from typing import Callable, Optional, Union, cast
|
|
32
|
+
from urllib.parse import urlparse
|
|
32
33
|
|
|
33
34
|
import dotenv
|
|
34
35
|
import git
|
|
@@ -83,6 +84,7 @@ from ..artifacts import (
|
|
|
83
84
|
ModelArtifact,
|
|
84
85
|
)
|
|
85
86
|
from ..artifacts.manager import ArtifactManager, dict_to_artifact, extend_artifact_path
|
|
87
|
+
from ..common.runtimes.constants import RunStates
|
|
86
88
|
from ..datastore import store_manager
|
|
87
89
|
from ..features import Feature
|
|
88
90
|
from ..model import EntrypointParam, ImageBuilder, ModelObj
|
|
@@ -851,6 +853,7 @@ class ProjectSpec(ModelObj):
|
|
|
851
853
|
build=None,
|
|
852
854
|
custom_packagers: Optional[list[tuple[str, bool]]] = None,
|
|
853
855
|
default_function_node_selector=None,
|
|
856
|
+
notifications=None,
|
|
854
857
|
):
|
|
855
858
|
self.repo = None
|
|
856
859
|
|
|
@@ -891,6 +894,7 @@ class ProjectSpec(ModelObj):
|
|
|
891
894
|
# whether it is mandatory for a run (raise exception on collection error) or not.
|
|
892
895
|
self.custom_packagers = custom_packagers or []
|
|
893
896
|
self._default_function_node_selector = default_function_node_selector or None
|
|
897
|
+
self.notifications = notifications or []
|
|
894
898
|
|
|
895
899
|
@property
|
|
896
900
|
def source(self) -> str:
|
|
@@ -1172,7 +1176,6 @@ class MlrunProject(ModelObj):
|
|
|
1172
1176
|
self._artifact_manager = None
|
|
1173
1177
|
self._notifiers = CustomNotificationPusher(
|
|
1174
1178
|
[
|
|
1175
|
-
NotificationTypes.slack,
|
|
1176
1179
|
NotificationTypes.console,
|
|
1177
1180
|
NotificationTypes.ipython,
|
|
1178
1181
|
]
|
|
@@ -1910,7 +1913,7 @@ class MlrunProject(ModelObj):
|
|
|
1910
1913
|
|
|
1911
1914
|
def log_document(
|
|
1912
1915
|
self,
|
|
1913
|
-
key: str,
|
|
1916
|
+
key: str = "",
|
|
1914
1917
|
tag: str = "",
|
|
1915
1918
|
local_path: str = "",
|
|
1916
1919
|
artifact_path: Optional[str] = None,
|
|
@@ -1923,7 +1926,8 @@ class MlrunProject(ModelObj):
|
|
|
1923
1926
|
"""
|
|
1924
1927
|
Log a document as an artifact.
|
|
1925
1928
|
|
|
1926
|
-
:param key:
|
|
1929
|
+
:param key: Optional artifact key. If not provided, will be derived from local_path
|
|
1930
|
+
or target_path using DocumentArtifact.key_from_source()
|
|
1927
1931
|
:param tag: Version tag
|
|
1928
1932
|
:param local_path: path to the local file we upload, will also be use
|
|
1929
1933
|
as the destination subpath (under "artifact_path")
|
|
@@ -1952,7 +1956,6 @@ class MlrunProject(ModelObj):
|
|
|
1952
1956
|
Example:
|
|
1953
1957
|
>>> # Log a PDF document with custom loader
|
|
1954
1958
|
>>> project.log_document(
|
|
1955
|
-
... key="my_doc",
|
|
1956
1959
|
... local_path="path/to/doc.pdf",
|
|
1957
1960
|
... document_loader=DocumentLoaderSpec(
|
|
1958
1961
|
... loader_class_name="langchain_community.document_loaders.PDFLoader",
|
|
@@ -1962,17 +1965,24 @@ class MlrunProject(ModelObj):
|
|
|
1962
1965
|
... )
|
|
1963
1966
|
|
|
1964
1967
|
"""
|
|
1968
|
+
if not key and not local_path and not target_path:
|
|
1969
|
+
raise ValueError(
|
|
1970
|
+
"Must provide either 'key' parameter or 'local_path'/'target_path' to derive the key from"
|
|
1971
|
+
)
|
|
1972
|
+
if not key:
|
|
1973
|
+
key = DocumentArtifact.key_from_source(local_path or target_path)
|
|
1974
|
+
|
|
1975
|
+
document_loader_spec = document_loader_spec or DocumentLoaderSpec()
|
|
1965
1976
|
if not document_loader_spec.download_object and upload:
|
|
1966
1977
|
raise ValueError(
|
|
1967
|
-
"
|
|
1968
|
-
"Either set download_object=True or set upload=False"
|
|
1978
|
+
"The document loader is configured to not support downloads but the upload flag is set to True."
|
|
1979
|
+
"Either set loader.download_object=True or set upload=False"
|
|
1969
1980
|
)
|
|
1970
1981
|
doc_artifact = DocumentArtifact(
|
|
1971
1982
|
key=key,
|
|
1972
1983
|
original_source=local_path or target_path,
|
|
1973
|
-
document_loader_spec=document_loader_spec
|
|
1974
|
-
|
|
1975
|
-
else DocumentLoaderSpec(),
|
|
1984
|
+
document_loader_spec=document_loader_spec,
|
|
1985
|
+
collections=kwargs.pop("collections", None),
|
|
1976
1986
|
**kwargs,
|
|
1977
1987
|
)
|
|
1978
1988
|
return self.log_artifact(
|
|
@@ -2116,8 +2126,9 @@ class MlrunProject(ModelObj):
|
|
|
2116
2126
|
"""
|
|
2117
2127
|
:param name: AlertConfig name.
|
|
2118
2128
|
:param summary: Summary of the alert, will be sent in the generated notifications
|
|
2119
|
-
:param endpoints: The endpoints from which
|
|
2120
|
-
|
|
2129
|
+
:param endpoints: The endpoints from which metrics will be retrieved to configure the alerts.
|
|
2130
|
+
This `ModelEndpointList` object obtained via the `list_model_endpoints`
|
|
2131
|
+
method or created manually using `ModelEndpoint` objects.
|
|
2121
2132
|
:param events: AlertTrigger event types (EventKind).
|
|
2122
2133
|
:param notifications: List of notifications to invoke once the alert is triggered
|
|
2123
2134
|
:param result_names: Optional. Filters the result names used to create the alert configuration,
|
|
@@ -2126,6 +2137,8 @@ class MlrunProject(ModelObj):
|
|
|
2126
2137
|
For example:
|
|
2127
2138
|
[`app1.result-*`, `*.result1`]
|
|
2128
2139
|
will match "mep1.app1.result.result-1" and "mep1.app2.result.result1".
|
|
2140
|
+
A specific result_name (not a wildcard) will always create a new alert
|
|
2141
|
+
config, regardless of whether the result name exists.
|
|
2129
2142
|
:param severity: Severity of the alert.
|
|
2130
2143
|
:param criteria: When the alert will be triggered based on the
|
|
2131
2144
|
specified number of events within the defined time period.
|
|
@@ -2136,6 +2149,11 @@ class MlrunProject(ModelObj):
|
|
|
2136
2149
|
"""
|
|
2137
2150
|
db = mlrun.db.get_run_db(secrets=self._secrets)
|
|
2138
2151
|
matching_results = []
|
|
2152
|
+
specific_result_names = [
|
|
2153
|
+
result_name
|
|
2154
|
+
for result_name in result_names
|
|
2155
|
+
if result_name.count(".") == 3 and "*" not in result_name
|
|
2156
|
+
]
|
|
2139
2157
|
alerts = []
|
|
2140
2158
|
endpoint_ids = [endpoint.metadata.uid for endpoint in endpoints.endpoints]
|
|
2141
2159
|
# using separation to group by endpoint IDs:
|
|
@@ -2159,7 +2177,14 @@ class MlrunProject(ModelObj):
|
|
|
2159
2177
|
existing_result_names=results_fqn_by_endpoint,
|
|
2160
2178
|
result_name_filters=result_names,
|
|
2161
2179
|
)
|
|
2162
|
-
for
|
|
2180
|
+
for specific_result_name in specific_result_names:
|
|
2181
|
+
if specific_result_name not in matching_results:
|
|
2182
|
+
logger.warning(
|
|
2183
|
+
f"The specific result name '{specific_result_name}' was"
|
|
2184
|
+
f" not found in the existing endpoint results. Adding alert configuration anyway."
|
|
2185
|
+
)
|
|
2186
|
+
alert_result_names = list(set(specific_result_names + matching_results))
|
|
2187
|
+
for result_fqn in alert_result_names:
|
|
2163
2188
|
alerts.append(
|
|
2164
2189
|
mlrun.alerts.alert.AlertConfig(
|
|
2165
2190
|
project=self.name,
|
|
@@ -2670,6 +2695,36 @@ class MlrunProject(ModelObj):
|
|
|
2670
2695
|
timeout=timeout,
|
|
2671
2696
|
)
|
|
2672
2697
|
|
|
2698
|
+
def push_pipeline_notification_kfp_runner(
|
|
2699
|
+
self,
|
|
2700
|
+
pipeline_id: str,
|
|
2701
|
+
current_run_state: mlrun_pipelines.common.models.RunStatuses,
|
|
2702
|
+
message: str,
|
|
2703
|
+
notifications: Optional[list] = None,
|
|
2704
|
+
):
|
|
2705
|
+
"""
|
|
2706
|
+
Push notifications for a pipeline run(KFP).
|
|
2707
|
+
|
|
2708
|
+
:param pipeline_id: Unique ID of the pipeline run.
|
|
2709
|
+
:param current_run_state: Current run state of the pipeline.
|
|
2710
|
+
:param message: Message to send in the notification.
|
|
2711
|
+
:param notifications: List of notifications to send.
|
|
2712
|
+
"""
|
|
2713
|
+
current_run_state = RunStates.pipeline_run_status_to_run_state(
|
|
2714
|
+
current_run_state
|
|
2715
|
+
)
|
|
2716
|
+
db = mlrun.get_run_db()
|
|
2717
|
+
notifications = notifications or self.spec.notifications
|
|
2718
|
+
notifications_to_send = []
|
|
2719
|
+
for notification in notifications:
|
|
2720
|
+
if current_run_state in notification.when:
|
|
2721
|
+
notification_copy = notification.copy()
|
|
2722
|
+
notification_copy.message = message
|
|
2723
|
+
notifications_to_send.append(notification_copy)
|
|
2724
|
+
db.push_pipeline_notifications(
|
|
2725
|
+
pipeline_id, self.metadata.name, notifications_to_send
|
|
2726
|
+
)
|
|
2727
|
+
|
|
2673
2728
|
def _instantiate_function(
|
|
2674
2729
|
self,
|
|
2675
2730
|
func: typing.Union[str, mlrun.runtimes.BaseRuntime] = None,
|
|
@@ -3576,9 +3631,12 @@ class MlrunProject(ModelObj):
|
|
|
3576
3631
|
def set_model_monitoring_credentials(
|
|
3577
3632
|
self,
|
|
3578
3633
|
access_key: Optional[str] = None,
|
|
3579
|
-
stream_path: Optional[str] = None,
|
|
3580
|
-
tsdb_connection: Optional[str] = None,
|
|
3634
|
+
stream_path: Optional[str] = None, # Deprecated
|
|
3635
|
+
tsdb_connection: Optional[str] = None, # Deprecated
|
|
3581
3636
|
replace_creds: bool = False,
|
|
3637
|
+
*,
|
|
3638
|
+
stream_profile_name: Optional[str] = None,
|
|
3639
|
+
tsdb_profile_name: Optional[str] = None,
|
|
3582
3640
|
):
|
|
3583
3641
|
"""
|
|
3584
3642
|
Set the credentials that will be used by the project's model monitoring
|
|
@@ -3590,50 +3648,109 @@ class MlrunProject(ModelObj):
|
|
|
3590
3648
|
* None - will be set from the system configuration.
|
|
3591
3649
|
* v3io - for v3io endpoint store, pass `v3io` and the system will generate the
|
|
3592
3650
|
exact path.
|
|
3593
|
-
:param stream_path:
|
|
3594
|
-
|
|
3595
|
-
|
|
3596
|
-
* v3io - for v3io stream, pass
|
|
3597
|
-
path.
|
|
3598
|
-
* Kafka - for Kafka stream, provide the full connection string without
|
|
3599
|
-
topic, for example kafka://<some_kafka_broker>:<port
|
|
3600
|
-
:param tsdb_connection: Connection string to the time series database. By default, None.
|
|
3651
|
+
:param stream_path: (Deprecated) This argument is deprecated. Use ``stream_profile_name`` instead.
|
|
3652
|
+
Path to the model monitoring stream. By default, None. Options:
|
|
3653
|
+
|
|
3654
|
+
* ``"v3io"`` - for v3io stream, pass ``"v3io"`` and the system will generate
|
|
3655
|
+
the exact path.
|
|
3656
|
+
* Kafka - for Kafka stream, provide the full connection string without acustom
|
|
3657
|
+
topic, for example ``"kafka://<some_kafka_broker>:<port>"``.
|
|
3658
|
+
:param tsdb_connection: (Deprecated) Connection string to the time series database. By default, None.
|
|
3601
3659
|
Options:
|
|
3602
3660
|
|
|
3603
|
-
*
|
|
3604
|
-
|
|
3605
|
-
path.
|
|
3661
|
+
* v3io - for v3io stream, pass ``"v3io"`` and the system will generate the
|
|
3662
|
+
exact path.
|
|
3606
3663
|
* TDEngine - for TDEngine tsdb, provide the full websocket connection URL,
|
|
3607
|
-
for example taosws://<username>:<password>@<host>:<port
|
|
3664
|
+
for example ``"taosws://<username>:<password>@<host>:<port>"``.
|
|
3608
3665
|
:param replace_creds: If True, will override the existing credentials.
|
|
3609
3666
|
Please keep in mind that if you already enabled model monitoring on
|
|
3610
3667
|
your project this action can cause data loose and will require redeploying
|
|
3611
3668
|
all model monitoring functions & model monitoring infra
|
|
3612
3669
|
& tracked model server.
|
|
3670
|
+
:param stream_profile_name: The datastore profile name of the stream to be used in model monitoring.
|
|
3671
|
+
The supported profiles are:
|
|
3672
|
+
|
|
3673
|
+
* :py:class:`~mlrun.datastore.datastore_profile.DatastoreProfileV3io`
|
|
3674
|
+
* :py:class:`~mlrun.datastore.datastore_profile.DatastoreProfileKafkaSource`
|
|
3675
|
+
|
|
3676
|
+
You need to register one of them, and pass the profile's name.
|
|
3677
|
+
:param tsdb_profile_name: The datastore profile name of the time-series database to be used in model
|
|
3678
|
+
monitoring. The supported profiles are:
|
|
3679
|
+
|
|
3680
|
+
* :py:class:`~mlrun.datastore.datastore_profile.DatastoreProfileV3io`
|
|
3681
|
+
* :py:class:`~mlrun.datastore.datastore_profile.TDEngineDatastoreProfile`
|
|
3682
|
+
|
|
3683
|
+
You need to register one of them, and pass the profile's name.
|
|
3613
3684
|
"""
|
|
3614
3685
|
db = mlrun.db.get_run_db(secrets=self._secrets)
|
|
3615
|
-
|
|
3616
|
-
|
|
3617
|
-
|
|
3686
|
+
|
|
3687
|
+
if tsdb_connection:
|
|
3688
|
+
warnings.warn(
|
|
3689
|
+
"The `tsdb_connection` argument is deprecated and will be removed in MLRun version 1.8.0. "
|
|
3690
|
+
"Use `tsdb_profile_name` instead.",
|
|
3691
|
+
FutureWarning,
|
|
3618
3692
|
)
|
|
3693
|
+
if tsdb_profile_name:
|
|
3694
|
+
raise mlrun.errors.MLRunValueError(
|
|
3695
|
+
"If you set `tsdb_profile_name`, you must not pass `tsdb_connection`."
|
|
3696
|
+
)
|
|
3697
|
+
if tsdb_connection == "v3io":
|
|
3698
|
+
tsdb_profile = mlrun.datastore.datastore_profile.DatastoreProfileV3io(
|
|
3699
|
+
name=mm_constants.DefaultProfileName.TSDB
|
|
3700
|
+
)
|
|
3701
|
+
else:
|
|
3702
|
+
parsed_url = urlparse(tsdb_connection)
|
|
3703
|
+
if parsed_url.scheme != "taosws":
|
|
3704
|
+
raise mlrun.errors.MLRunValueError(
|
|
3705
|
+
f"Unsupported `tsdb_connection`: '{tsdb_connection}'."
|
|
3706
|
+
)
|
|
3707
|
+
tsdb_profile = (
|
|
3708
|
+
mlrun.datastore.datastore_profile.TDEngineDatastoreProfile(
|
|
3709
|
+
name=mm_constants.DefaultProfileName.TSDB,
|
|
3710
|
+
user=parsed_url.username,
|
|
3711
|
+
password=parsed_url.password,
|
|
3712
|
+
host=parsed_url.hostname,
|
|
3713
|
+
port=parsed_url.port,
|
|
3714
|
+
)
|
|
3715
|
+
)
|
|
3716
|
+
|
|
3619
3717
|
self.register_datastore_profile(tsdb_profile)
|
|
3620
3718
|
tsdb_profile_name = tsdb_profile.name
|
|
3621
|
-
|
|
3622
|
-
|
|
3623
|
-
|
|
3624
|
-
|
|
3625
|
-
|
|
3719
|
+
|
|
3720
|
+
if stream_path:
|
|
3721
|
+
warnings.warn(
|
|
3722
|
+
"The `stream_path` argument is deprecated and will be removed in MLRun version 1.8.0. "
|
|
3723
|
+
"Use `stream_profile_name` instead.",
|
|
3724
|
+
FutureWarning,
|
|
3626
3725
|
)
|
|
3726
|
+
if stream_profile_name:
|
|
3727
|
+
raise mlrun.errors.MLRunValueError(
|
|
3728
|
+
"If you set `stream_profile_name`, you must not pass `stream_path`."
|
|
3729
|
+
)
|
|
3730
|
+
if stream_path == "v3io":
|
|
3731
|
+
stream_profile = mlrun.datastore.datastore_profile.DatastoreProfileV3io(
|
|
3732
|
+
name=mm_constants.DefaultProfileName.STREAM
|
|
3733
|
+
)
|
|
3734
|
+
else:
|
|
3735
|
+
parsed_stream = urlparse(stream_path)
|
|
3736
|
+
if parsed_stream.scheme != "kafka":
|
|
3737
|
+
raise mlrun.errors.MLRunValueError(
|
|
3738
|
+
f"Unsupported `stream_path`: '{stream_path}'."
|
|
3739
|
+
)
|
|
3740
|
+
stream_profile = (
|
|
3741
|
+
mlrun.datastore.datastore_profile.DatastoreProfileKafkaSource(
|
|
3742
|
+
name=mm_constants.DefaultProfileName.STREAM,
|
|
3743
|
+
brokers=[parsed_stream.netloc],
|
|
3744
|
+
topics=[],
|
|
3745
|
+
)
|
|
3746
|
+
)
|
|
3627
3747
|
self.register_datastore_profile(stream_profile)
|
|
3628
3748
|
stream_profile_name = stream_profile.name
|
|
3629
|
-
|
|
3630
|
-
stream_profile_name = None
|
|
3749
|
+
|
|
3631
3750
|
db.set_model_monitoring_credentials(
|
|
3632
3751
|
project=self.name,
|
|
3633
3752
|
credentials={
|
|
3634
3753
|
"access_key": access_key,
|
|
3635
|
-
"stream_path": stream_path,
|
|
3636
|
-
"tsdb_connection": tsdb_connection,
|
|
3637
3754
|
"tsdb_profile_name": tsdb_profile_name,
|
|
3638
3755
|
"stream_profile_name": stream_profile_name,
|
|
3639
3756
|
},
|
|
@@ -3644,7 +3761,7 @@ class MlrunProject(ModelObj):
|
|
|
3644
3761
|
"Model monitoring credentials were set successfully. "
|
|
3645
3762
|
"Please keep in mind that if you already had model monitoring functions "
|
|
3646
3763
|
"/ model monitoring infra / tracked model server "
|
|
3647
|
-
"deployed on your project, you will need to redeploy them."
|
|
3764
|
+
"deployed on your project, you will need to redeploy them. "
|
|
3648
3765
|
"For redeploying the model monitoring infra, please use `enable_model_monitoring` API "
|
|
3649
3766
|
"and set `rebuild_images=True`"
|
|
3650
3767
|
)
|
mlrun/runtimes/nuclio/serving.py
CHANGED
|
@@ -309,7 +309,7 @@ class ServingRuntime(RemoteRuntime):
|
|
|
309
309
|
self,
|
|
310
310
|
stream_path: Optional[str] = None,
|
|
311
311
|
batch: Optional[int] = None,
|
|
312
|
-
|
|
312
|
+
sampling_percentage: float = 100,
|
|
313
313
|
stream_args: Optional[dict] = None,
|
|
314
314
|
tracking_policy: Optional[Union["TrackingPolicy", dict]] = None,
|
|
315
315
|
enable_tracking: bool = True,
|
|
@@ -317,13 +317,13 @@ class ServingRuntime(RemoteRuntime):
|
|
|
317
317
|
"""Apply on your serving function to monitor a deployed model, including real-time dashboards to detect drift
|
|
318
318
|
and analyze performance.
|
|
319
319
|
|
|
320
|
-
:param stream_path:
|
|
321
|
-
|
|
322
|
-
:param batch:
|
|
323
|
-
:param
|
|
324
|
-
|
|
325
|
-
:param
|
|
326
|
-
|
|
320
|
+
:param stream_path: Path/url of the tracking stream e.g. v3io:///users/mike/mystream
|
|
321
|
+
you can use the "dummy://" path for test/simulation.
|
|
322
|
+
:param batch: Deprecated. Micro batch size (send micro batches of N records at a time).
|
|
323
|
+
:param sampling_percentage: Down sampling events that will be pushed to the monitoring stream based on
|
|
324
|
+
a specified percentage. e.g. 50 for 50%. By default, all events are pushed.
|
|
325
|
+
:param stream_args: Stream initialization parameters, e.g. shards, retention_in_hours, ..
|
|
326
|
+
:param enable_tracking: Enabled/Disable model-monitoring tracking. Default True (tracking enabled).
|
|
327
327
|
|
|
328
328
|
Example::
|
|
329
329
|
|
|
@@ -336,12 +336,21 @@ class ServingRuntime(RemoteRuntime):
|
|
|
336
336
|
# Applying model monitoring configurations
|
|
337
337
|
self.spec.track_models = enable_tracking
|
|
338
338
|
|
|
339
|
+
if not 0 < sampling_percentage <= 100:
|
|
340
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
341
|
+
"`sampling_percentage` must be greater than 0 and less or equal to 100."
|
|
342
|
+
)
|
|
343
|
+
self.spec.parameters["sampling_percentage"] = sampling_percentage
|
|
344
|
+
|
|
339
345
|
if stream_path:
|
|
340
346
|
self.spec.parameters["log_stream"] = stream_path
|
|
341
347
|
if batch:
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
348
|
+
warnings.warn(
|
|
349
|
+
"The `batch` size parameter was deprecated in version 1.8.0 and is no longer used. "
|
|
350
|
+
"It will be removed in 1.10.",
|
|
351
|
+
# TODO: Remove this in 1.10
|
|
352
|
+
FutureWarning,
|
|
353
|
+
)
|
|
345
354
|
if stream_args:
|
|
346
355
|
self.spec.parameters["stream_args"] = stream_args
|
|
347
356
|
if tracking_policy is not None:
|
|
@@ -679,7 +688,7 @@ class ServingRuntime(RemoteRuntime):
|
|
|
679
688
|
"project": self.metadata.project,
|
|
680
689
|
"version": "v2",
|
|
681
690
|
"parameters": self.spec.parameters,
|
|
682
|
-
"graph": self.spec.graph.to_dict() if self.spec.graph else {},
|
|
691
|
+
"graph": self.spec.graph.to_dict(strip=True) if self.spec.graph else {},
|
|
683
692
|
"load_mode": self.spec.load_mode,
|
|
684
693
|
"functions": function_name_uri_map,
|
|
685
694
|
"graph_initializer": self.spec.graph_initializer,
|
|
@@ -747,7 +756,7 @@ class ServingRuntime(RemoteRuntime):
|
|
|
747
756
|
namespace=namespace,
|
|
748
757
|
logger=logger,
|
|
749
758
|
is_mock=True,
|
|
750
|
-
monitoring_mock=track_models,
|
|
759
|
+
monitoring_mock=self.spec.track_models,
|
|
751
760
|
)
|
|
752
761
|
|
|
753
762
|
if workdir:
|
|
@@ -789,7 +789,7 @@ class Spark3Runtime(KubejobRuntime):
|
|
|
789
789
|
Spark itself uses the spec.[executor|driver].cores parameter to set the parallelism of tasks and cores
|
|
790
790
|
assigned to each task within the pod. This function sets the .cores parameters for the job executed.
|
|
791
791
|
|
|
792
|
-
See https://github.com/
|
|
792
|
+
See https://github.com/kubeflow/spark-operator/issues/581 for a discussion about those
|
|
793
793
|
parameters and their meaning in Spark operator.
|
|
794
794
|
|
|
795
795
|
:param executor_cores: Number of cores to use for executor (spark.executor.cores)
|
mlrun/secrets.py
CHANGED
|
@@ -134,7 +134,7 @@ class SecretsStore:
|
|
|
134
134
|
def k8s_env_variable_name_for_secret(secret_name):
|
|
135
135
|
from mlrun.config import config
|
|
136
136
|
|
|
137
|
-
return config.secret_stores.kubernetes.env_variable_prefix + secret_name
|
|
137
|
+
return config.secret_stores.kubernetes.env_variable_prefix + secret_name
|
|
138
138
|
|
|
139
139
|
def get_k8s_secrets(self):
|
|
140
140
|
for source in self._hidden_sources:
|
mlrun/serving/server.py
CHANGED
|
@@ -44,6 +44,8 @@ from ..utils import get_caller_globals
|
|
|
44
44
|
from .states import RootFlowStep, RouterStep, get_function, graph_root_setter
|
|
45
45
|
from .utils import event_id_key, event_path_key
|
|
46
46
|
|
|
47
|
+
DUMMY_STREAM = "dummy://"
|
|
48
|
+
|
|
47
49
|
|
|
48
50
|
class _StreamContext:
|
|
49
51
|
"""Handles the stream context for the events stream process. Includes the configuration for the output stream
|
|
@@ -72,14 +74,20 @@ class _StreamContext:
|
|
|
72
74
|
function_uri, config.default_project
|
|
73
75
|
)
|
|
74
76
|
|
|
75
|
-
|
|
77
|
+
stream_args = parameters.get("stream_args", {})
|
|
78
|
+
|
|
79
|
+
if log_stream == DUMMY_STREAM:
|
|
80
|
+
# Dummy stream used for testing, see tests/serving/test_serving.py
|
|
81
|
+
self.stream_uri = DUMMY_STREAM
|
|
82
|
+
elif not stream_args.get("mock"): # if not a mock: `context.is_mock = True`
|
|
83
|
+
self.stream_uri = mlrun.model_monitoring.get_stream_path(
|
|
84
|
+
project=project
|
|
85
|
+
)
|
|
76
86
|
|
|
77
87
|
if log_stream:
|
|
78
88
|
# Update the stream path to the log stream value
|
|
79
89
|
self.stream_uri = log_stream.format(project=project)
|
|
80
90
|
|
|
81
|
-
stream_args = parameters.get("stream_args", {})
|
|
82
|
-
|
|
83
91
|
self.output_stream = get_stream_pusher(self.stream_uri, **stream_args)
|
|
84
92
|
|
|
85
93
|
|
mlrun/serving/states.py
CHANGED
|
@@ -31,6 +31,7 @@ import storey.utils
|
|
|
31
31
|
|
|
32
32
|
import mlrun
|
|
33
33
|
import mlrun.common.schemas as schemas
|
|
34
|
+
from mlrun.utils import logger
|
|
34
35
|
|
|
35
36
|
from ..config import config
|
|
36
37
|
from ..datastore import get_stream_pusher
|
|
@@ -49,6 +50,8 @@ path_splitter = "/"
|
|
|
49
50
|
previous_step = "$prev"
|
|
50
51
|
queue_class_names = [">>", "$queue"]
|
|
51
52
|
|
|
53
|
+
MAX_MODELS_PER_ROUTER = 5000
|
|
54
|
+
|
|
52
55
|
|
|
53
56
|
class GraphError(Exception):
|
|
54
57
|
"""error in graph topology or configuration"""
|
|
@@ -86,8 +89,10 @@ _task_step_fields = [
|
|
|
86
89
|
"endpoint_type",
|
|
87
90
|
]
|
|
88
91
|
|
|
89
|
-
|
|
90
|
-
|
|
92
|
+
_default_fields_to_strip_from_step = [
|
|
93
|
+
"model_endpoint_creation_strategy",
|
|
94
|
+
"endpoint_type",
|
|
95
|
+
]
|
|
91
96
|
|
|
92
97
|
|
|
93
98
|
def new_remote_endpoint(
|
|
@@ -110,6 +115,7 @@ class BaseStep(ModelObj):
|
|
|
110
115
|
kind = "BaseStep"
|
|
111
116
|
default_shape = "ellipse"
|
|
112
117
|
_dict_fields = ["kind", "comment", "after", "on_error"]
|
|
118
|
+
_default_fields_to_strip = _default_fields_to_strip_from_step
|
|
113
119
|
|
|
114
120
|
def __init__(
|
|
115
121
|
self,
|
|
@@ -319,6 +325,9 @@ class BaseStep(ModelObj):
|
|
|
319
325
|
full_event: Optional[bool] = None,
|
|
320
326
|
input_path: Optional[str] = None,
|
|
321
327
|
result_path: Optional[str] = None,
|
|
328
|
+
model_endpoint_creation_strategy: Optional[
|
|
329
|
+
schemas.ModelEndpointCreationStrategy
|
|
330
|
+
] = None,
|
|
322
331
|
**class_args,
|
|
323
332
|
):
|
|
324
333
|
"""add a step right after this step and return the new step
|
|
@@ -346,6 +355,16 @@ class BaseStep(ModelObj):
|
|
|
346
355
|
this require that the event body will behave like a dict, example:
|
|
347
356
|
event: {"x": 5} , result_path="y" means the output of the step will be written
|
|
348
357
|
to event["y"] resulting in {"x": 5, "y": <result>}
|
|
358
|
+
:param model_endpoint_creation_strategy: Strategy for creating or updating the model endpoint:
|
|
359
|
+
* **overwrite**:
|
|
360
|
+
1. If model endpoints with the same name exist, delete the `latest` one.
|
|
361
|
+
2. Create a new model endpoint entry and set it as `latest`.
|
|
362
|
+
* **inplace** (default):
|
|
363
|
+
1. If model endpoints with the same name exist, update the `latest` entry.
|
|
364
|
+
2. Otherwise, create a new entry.
|
|
365
|
+
* **archive**:
|
|
366
|
+
1. If model endpoints with the same name exist, preserve them.
|
|
367
|
+
2. Create a new model endpoint with the same name and set it to `latest`.
|
|
349
368
|
:param class_args: class init arguments
|
|
350
369
|
"""
|
|
351
370
|
if hasattr(self, "steps"):
|
|
@@ -367,6 +386,7 @@ class BaseStep(ModelObj):
|
|
|
367
386
|
input_path=input_path,
|
|
368
387
|
result_path=result_path,
|
|
369
388
|
class_args=class_args,
|
|
389
|
+
model_endpoint_creation_strategy=model_endpoint_creation_strategy,
|
|
370
390
|
)
|
|
371
391
|
step = parent._steps.update(name, step)
|
|
372
392
|
step.set_parent(parent)
|
|
@@ -625,6 +645,19 @@ class TaskStep(BaseStep):
|
|
|
625
645
|
raise exc
|
|
626
646
|
return event
|
|
627
647
|
|
|
648
|
+
def to_dict(
|
|
649
|
+
self,
|
|
650
|
+
fields: Optional[list] = None,
|
|
651
|
+
exclude: Optional[list] = None,
|
|
652
|
+
strip: bool = False,
|
|
653
|
+
) -> dict:
|
|
654
|
+
self.endpoint_type = (
|
|
655
|
+
self.endpoint_type.value
|
|
656
|
+
if isinstance(self.endpoint_type, schemas.EndpointType)
|
|
657
|
+
else self.endpoint_type
|
|
658
|
+
)
|
|
659
|
+
return super().to_dict(fields, exclude, strip)
|
|
660
|
+
|
|
628
661
|
|
|
629
662
|
class MonitoringApplicationStep(TaskStep):
|
|
630
663
|
"""monitoring application execution step, runs users class code"""
|
|
@@ -755,7 +788,7 @@ class RouterStep(TaskStep):
|
|
|
755
788
|
creation_strategy: schemas.ModelEndpointCreationStrategy = schemas.ModelEndpointCreationStrategy.INPLACE,
|
|
756
789
|
**class_args,
|
|
757
790
|
):
|
|
758
|
-
"""add child route step or class to the router
|
|
791
|
+
"""add child route step or class to the router, if key exists it will be updated
|
|
759
792
|
|
|
760
793
|
:param key: unique name (and route path) for the child step
|
|
761
794
|
:param route: child step object (Task, ..)
|
|
@@ -775,7 +808,13 @@ class RouterStep(TaskStep):
|
|
|
775
808
|
2. Create a new model endpoint with the same name and set it to `latest`.
|
|
776
809
|
|
|
777
810
|
"""
|
|
778
|
-
|
|
811
|
+
if len(self.routes.keys()) >= MAX_MODELS_PER_ROUTER and key not in self.routes:
|
|
812
|
+
raise mlrun.errors.MLRunModelLimitExceededError(
|
|
813
|
+
f"Router cannot support more than {MAX_MODELS_PER_ROUTER} model endpoints. "
|
|
814
|
+
f"To add a new route, edit an existing one by passing the same key."
|
|
815
|
+
)
|
|
816
|
+
if key in self.routes:
|
|
817
|
+
logger.info(f"Model {key} already exists, updating it.")
|
|
779
818
|
if not route and not class_name and not handler:
|
|
780
819
|
raise MLRunInvalidArgumentError("route or class_name must be specified")
|
|
781
820
|
if not route:
|
|
@@ -790,10 +829,6 @@ class RouterStep(TaskStep):
|
|
|
790
829
|
)
|
|
791
830
|
route.function = function or route.function
|
|
792
831
|
|
|
793
|
-
if len(self._routes) >= MAX_ALLOWED_STEPS:
|
|
794
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
795
|
-
f"Cannot create the serving graph: the maximum number of steps is {MAX_ALLOWED_STEPS}"
|
|
796
|
-
)
|
|
797
832
|
route = self._routes.update(key, route)
|
|
798
833
|
route.set_parent(self)
|
|
799
834
|
return route
|
|
@@ -806,6 +841,10 @@ class RouterStep(TaskStep):
|
|
|
806
841
|
del self._routes[key]
|
|
807
842
|
|
|
808
843
|
def init_object(self, context, namespace, mode="sync", reset=False, **extra_kwargs):
|
|
844
|
+
if not self.routes:
|
|
845
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
846
|
+
"You have to add models to the router step before initializing it"
|
|
847
|
+
)
|
|
809
848
|
if not self._is_local_function(context):
|
|
810
849
|
return
|
|
811
850
|
|
|
@@ -1010,6 +1049,9 @@ class QueueStep(BaseStep):
|
|
|
1010
1049
|
full_event: Optional[bool] = None,
|
|
1011
1050
|
input_path: Optional[str] = None,
|
|
1012
1051
|
result_path: Optional[str] = None,
|
|
1052
|
+
model_endpoint_creation_strategy: Optional[
|
|
1053
|
+
schemas.ModelEndpointCreationStrategy
|
|
1054
|
+
] = None,
|
|
1013
1055
|
**class_args,
|
|
1014
1056
|
):
|
|
1015
1057
|
if not function:
|
|
@@ -1026,6 +1068,7 @@ class QueueStep(BaseStep):
|
|
|
1026
1068
|
full_event,
|
|
1027
1069
|
input_path,
|
|
1028
1070
|
result_path,
|
|
1071
|
+
model_endpoint_creation_strategy,
|
|
1029
1072
|
**class_args,
|
|
1030
1073
|
)
|
|
1031
1074
|
|
|
@@ -1104,6 +1147,9 @@ class FlowStep(BaseStep):
|
|
|
1104
1147
|
full_event: Optional[bool] = None,
|
|
1105
1148
|
input_path: Optional[str] = None,
|
|
1106
1149
|
result_path: Optional[str] = None,
|
|
1150
|
+
model_endpoint_creation_strategy: Optional[
|
|
1151
|
+
schemas.ModelEndpointCreationStrategy
|
|
1152
|
+
] = None,
|
|
1107
1153
|
**class_args,
|
|
1108
1154
|
):
|
|
1109
1155
|
"""add task, queue or router step/class to the flow
|
|
@@ -1135,6 +1181,16 @@ class FlowStep(BaseStep):
|
|
|
1135
1181
|
this require that the event body will behave like a dict, example:
|
|
1136
1182
|
event: {"x": 5} , result_path="y" means the output of the step will be written
|
|
1137
1183
|
to event["y"] resulting in {"x": 5, "y": <result>}
|
|
1184
|
+
:param model_endpoint_creation_strategy: Strategy for creating or updating the model endpoint:
|
|
1185
|
+
* **overwrite**:
|
|
1186
|
+
1. If model endpoints with the same name exist, delete the `latest` one.
|
|
1187
|
+
2. Create a new model endpoint entry and set it as `latest`.
|
|
1188
|
+
* **inplace** (default):
|
|
1189
|
+
1. If model endpoints with the same name exist, update the `latest` entry.
|
|
1190
|
+
2. Otherwise, create a new entry.
|
|
1191
|
+
* **archive**:
|
|
1192
|
+
1. If model endpoints with the same name exist, preserve them.
|
|
1193
|
+
2. Create a new model endpoint with the same name and set it to `latest`.
|
|
1138
1194
|
:param class_args: class init arguments
|
|
1139
1195
|
"""
|
|
1140
1196
|
|
|
@@ -1147,6 +1203,7 @@ class FlowStep(BaseStep):
|
|
|
1147
1203
|
full_event=full_event,
|
|
1148
1204
|
input_path=input_path,
|
|
1149
1205
|
result_path=result_path,
|
|
1206
|
+
model_endpoint_creation_strategy=model_endpoint_creation_strategy,
|
|
1150
1207
|
class_args=class_args,
|
|
1151
1208
|
)
|
|
1152
1209
|
|