mlrun 1.8.0rc26__py3-none-any.whl → 1.8.0rc28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__main__.py +3 -2
- mlrun/artifacts/document.py +9 -6
- mlrun/artifacts/model.py +19 -4
- mlrun/common/model_monitoring/helpers.py +2 -2
- mlrun/common/schemas/model_monitoring/constants.py +0 -1
- mlrun/common/schemas/serving.py +22 -0
- mlrun/config.py +22 -9
- mlrun/datastore/base.py +0 -7
- mlrun/datastore/s3.py +9 -2
- mlrun/db/base.py +2 -1
- mlrun/db/httpdb.py +17 -10
- mlrun/db/nopdb.py +2 -1
- mlrun/execution.py +15 -4
- mlrun/lists.py +4 -1
- mlrun/model.py +2 -0
- mlrun/model_monitoring/applications/_application_steps.py +1 -0
- mlrun/model_monitoring/applications/base.py +132 -21
- mlrun/model_monitoring/applications/context.py +2 -3
- mlrun/model_monitoring/controller.py +117 -57
- mlrun/model_monitoring/db/_schedules.py +8 -0
- mlrun/model_monitoring/db/tsdb/__init__.py +12 -5
- mlrun/model_monitoring/stream_processing.py +3 -2
- mlrun/projects/project.py +44 -7
- mlrun/runtimes/base.py +1 -1
- mlrun/runtimes/generators.py +1 -1
- mlrun/runtimes/nuclio/function.py +37 -0
- mlrun/runtimes/nuclio/serving.py +3 -0
- mlrun/runtimes/pod.py +1 -3
- mlrun/serving/routers.py +62 -17
- mlrun/serving/server.py +11 -0
- mlrun/serving/states.py +0 -4
- mlrun/serving/v2_serving.py +45 -10
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.8.0rc26.dist-info → mlrun-1.8.0rc28.dist-info}/METADATA +4 -2
- {mlrun-1.8.0rc26.dist-info → mlrun-1.8.0rc28.dist-info}/RECORD +39 -38
- {mlrun-1.8.0rc26.dist-info → mlrun-1.8.0rc28.dist-info}/LICENSE +0 -0
- {mlrun-1.8.0rc26.dist-info → mlrun-1.8.0rc28.dist-info}/WHEEL +0 -0
- {mlrun-1.8.0rc26.dist-info → mlrun-1.8.0rc28.dist-info}/entry_points.txt +0 -0
- {mlrun-1.8.0rc26.dist-info → mlrun-1.8.0rc28.dist-info}/top_level.txt +0 -0
mlrun/projects/project.py
CHANGED
|
@@ -31,6 +31,7 @@ from os import environ, makedirs, path
|
|
|
31
31
|
from typing import Callable, Optional, Union, cast
|
|
32
32
|
from urllib.parse import urlparse
|
|
33
33
|
|
|
34
|
+
import deprecated
|
|
34
35
|
import dotenv
|
|
35
36
|
import git
|
|
36
37
|
import git.exc
|
|
@@ -1948,7 +1949,8 @@ class MlrunProject(ModelObj):
|
|
|
1948
1949
|
kwargs={"extract_images": True}
|
|
1949
1950
|
)
|
|
1950
1951
|
:param upload: Whether to upload the artifact
|
|
1951
|
-
:param labels:
|
|
1952
|
+
:param labels: Key-value labels. A 'source' label is automatically added using either
|
|
1953
|
+
local_path or target_path to facilitate easier document searching.
|
|
1952
1954
|
:param target_path: Target file path
|
|
1953
1955
|
:param kwargs: Additional keyword arguments
|
|
1954
1956
|
:return: DocumentArtifact object
|
|
@@ -1978,13 +1980,24 @@ class MlrunProject(ModelObj):
|
|
|
1978
1980
|
"The document loader is configured to not support downloads but the upload flag is set to True."
|
|
1979
1981
|
"Either set loader.download_object=True or set upload=False"
|
|
1980
1982
|
)
|
|
1983
|
+
original_source = local_path or target_path
|
|
1981
1984
|
doc_artifact = DocumentArtifact(
|
|
1982
1985
|
key=key,
|
|
1983
|
-
original_source=
|
|
1986
|
+
original_source=original_source,
|
|
1984
1987
|
document_loader_spec=document_loader_spec,
|
|
1985
1988
|
collections=kwargs.pop("collections", None),
|
|
1986
1989
|
**kwargs,
|
|
1987
1990
|
)
|
|
1991
|
+
|
|
1992
|
+
# limit label to a max of 255 characters (for db reasons)
|
|
1993
|
+
max_length = 255
|
|
1994
|
+
labels = labels or {}
|
|
1995
|
+
labels["source"] = (
|
|
1996
|
+
original_source[: max_length - 3] + "..."
|
|
1997
|
+
if len(original_source) > max_length
|
|
1998
|
+
else original_source
|
|
1999
|
+
)
|
|
2000
|
+
|
|
1988
2001
|
return self.log_artifact(
|
|
1989
2002
|
item=doc_artifact,
|
|
1990
2003
|
tag=tag,
|
|
@@ -2406,7 +2419,6 @@ class MlrunProject(ModelObj):
|
|
|
2406
2419
|
*,
|
|
2407
2420
|
deploy_histogram_data_drift_app: bool = True,
|
|
2408
2421
|
wait_for_deployment: bool = False,
|
|
2409
|
-
rebuild_images: bool = False,
|
|
2410
2422
|
fetch_credentials_from_sys_config: bool = False,
|
|
2411
2423
|
) -> None:
|
|
2412
2424
|
"""
|
|
@@ -2428,7 +2440,6 @@ class MlrunProject(ModelObj):
|
|
|
2428
2440
|
:param wait_for_deployment: If true, return only after the deployment is done on the backend.
|
|
2429
2441
|
Otherwise, deploy the model monitoring infrastructure on the
|
|
2430
2442
|
background, including the histogram data drift app if selected.
|
|
2431
|
-
:param rebuild_images: If true, force rebuild of model monitoring infrastructure images.
|
|
2432
2443
|
:param fetch_credentials_from_sys_config: If true, fetch the credentials from the system configuration.
|
|
2433
2444
|
"""
|
|
2434
2445
|
if default_controller_image != "mlrun/mlrun":
|
|
@@ -2451,7 +2462,6 @@ class MlrunProject(ModelObj):
|
|
|
2451
2462
|
image=image,
|
|
2452
2463
|
base_period=base_period,
|
|
2453
2464
|
deploy_histogram_data_drift_app=deploy_histogram_data_drift_app,
|
|
2454
|
-
rebuild_images=rebuild_images,
|
|
2455
2465
|
fetch_credentials_from_sys_config=fetch_credentials_from_sys_config,
|
|
2456
2466
|
)
|
|
2457
2467
|
|
|
@@ -2839,6 +2849,13 @@ class MlrunProject(ModelObj):
|
|
|
2839
2849
|
|
|
2840
2850
|
self.spec.set_function(name, function_object, func)
|
|
2841
2851
|
|
|
2852
|
+
# TODO: Remove this in 1.10.0
|
|
2853
|
+
@deprecated.deprecated(
|
|
2854
|
+
version="1.8.0",
|
|
2855
|
+
reason="'remove_function' is deprecated and will be removed in 1.10.0. "
|
|
2856
|
+
"Please use `delete_function` instead.",
|
|
2857
|
+
category=FutureWarning,
|
|
2858
|
+
)
|
|
2842
2859
|
def remove_function(self, name):
|
|
2843
2860
|
"""remove the specified function from the project
|
|
2844
2861
|
|
|
@@ -2846,6 +2863,18 @@ class MlrunProject(ModelObj):
|
|
|
2846
2863
|
"""
|
|
2847
2864
|
self.spec.remove_function(name)
|
|
2848
2865
|
|
|
2866
|
+
def delete_function(self, name, delete_from_db=False):
|
|
2867
|
+
"""deletes the specified function from the project
|
|
2868
|
+
|
|
2869
|
+
:param name: name of the function (under the project)
|
|
2870
|
+
:param delete_from_db: default is False. If False, the function is removed
|
|
2871
|
+
only from the project's cache and spec.
|
|
2872
|
+
If True, the function is also removed from the database.
|
|
2873
|
+
"""
|
|
2874
|
+
if delete_from_db:
|
|
2875
|
+
mlrun.db.get_run_db().delete_function(name=name, project=self.metadata.name)
|
|
2876
|
+
self.spec.remove_function(name)
|
|
2877
|
+
|
|
2849
2878
|
def remove_model_monitoring_function(self, name: Union[str, list[str]]):
|
|
2850
2879
|
"""delete the specified model-monitoring-app function/s
|
|
2851
2880
|
|
|
@@ -3762,8 +3791,8 @@ class MlrunProject(ModelObj):
|
|
|
3762
3791
|
"Please keep in mind that if you already had model monitoring functions "
|
|
3763
3792
|
"/ model monitoring infra / tracked model server "
|
|
3764
3793
|
"deployed on your project, you will need to redeploy them. "
|
|
3765
|
-
"For redeploying the model monitoring infra,
|
|
3766
|
-
"and
|
|
3794
|
+
"For redeploying the model monitoring infra, first disable it using "
|
|
3795
|
+
"`project.disable_model_monitoring()` and then enable it using `project.enable_model_monitoring()`."
|
|
3767
3796
|
)
|
|
3768
3797
|
|
|
3769
3798
|
def list_model_endpoints(
|
|
@@ -3779,6 +3808,7 @@ class MlrunProject(ModelObj):
|
|
|
3779
3808
|
top_level: bool = False,
|
|
3780
3809
|
uids: Optional[list[str]] = None,
|
|
3781
3810
|
latest_only: bool = False,
|
|
3811
|
+
tsdb_metrics: bool = True,
|
|
3782
3812
|
) -> mlrun.common.schemas.ModelEndpointList:
|
|
3783
3813
|
"""
|
|
3784
3814
|
Returns a list of `ModelEndpoint` objects. Each `ModelEndpoint` object represents the current state of a
|
|
@@ -3829,6 +3859,7 @@ class MlrunProject(ModelObj):
|
|
|
3829
3859
|
top_level=top_level,
|
|
3830
3860
|
uids=uids,
|
|
3831
3861
|
latest_only=latest_only,
|
|
3862
|
+
tsdb_metrics=tsdb_metrics,
|
|
3832
3863
|
)
|
|
3833
3864
|
|
|
3834
3865
|
def run_function(
|
|
@@ -4661,6 +4692,8 @@ class MlrunProject(ModelObj):
|
|
|
4661
4692
|
start_time_to: Optional[datetime.datetime] = None,
|
|
4662
4693
|
last_update_time_from: Optional[datetime.datetime] = None,
|
|
4663
4694
|
last_update_time_to: Optional[datetime.datetime] = None,
|
|
4695
|
+
end_time_from: Optional[datetime.datetime] = None,
|
|
4696
|
+
end_time_to: Optional[datetime.datetime] = None,
|
|
4664
4697
|
**kwargs,
|
|
4665
4698
|
) -> mlrun.lists.RunList:
|
|
4666
4699
|
"""Retrieve a list of runs.
|
|
@@ -4704,6 +4737,8 @@ class MlrunProject(ModelObj):
|
|
|
4704
4737
|
:param last_update_time_from: Filter by run last update time in ``(last_update_time_from,
|
|
4705
4738
|
last_update_time_to)``.
|
|
4706
4739
|
:param last_update_time_to: Filter by run last update time in ``(last_update_time_from, last_update_time_to)``.
|
|
4740
|
+
:param end_time_from: Filter by run end time in ``[end_time_from, end_time_to]``.
|
|
4741
|
+
:param end_time_to: Filter by run end time in ``[end_time_from, end_time_to]``.
|
|
4707
4742
|
"""
|
|
4708
4743
|
if state:
|
|
4709
4744
|
# TODO: Remove this in 1.9.0
|
|
@@ -4730,6 +4765,8 @@ class MlrunProject(ModelObj):
|
|
|
4730
4765
|
start_time_to=start_time_to,
|
|
4731
4766
|
last_update_time_from=last_update_time_from,
|
|
4732
4767
|
last_update_time_to=last_update_time_to,
|
|
4768
|
+
end_time_from=end_time_from,
|
|
4769
|
+
end_time_to=end_time_to,
|
|
4733
4770
|
**kwargs,
|
|
4734
4771
|
)
|
|
4735
4772
|
|
mlrun/runtimes/base.py
CHANGED
mlrun/runtimes/generators.py
CHANGED
|
@@ -599,6 +599,16 @@ class RemoteRuntime(KubeResource):
|
|
|
599
599
|
# when a function is deployed, we wait for it to be ready by default
|
|
600
600
|
# this also means that the function object will be updated with the function status
|
|
601
601
|
self._wait_for_function_deployment(db, verbose=verbose)
|
|
602
|
+
# check if there are any background tasks related to creating model endpoints
|
|
603
|
+
background_tasks = mlrun.common.schemas.BackgroundTaskList(
|
|
604
|
+
**data.pop("background_tasks", {"background_tasks": []})
|
|
605
|
+
).background_tasks
|
|
606
|
+
if background_tasks:
|
|
607
|
+
self._check_model_endpoint_task_state(
|
|
608
|
+
db=db,
|
|
609
|
+
background_task=background_tasks[0],
|
|
610
|
+
wait_for_completion=False,
|
|
611
|
+
)
|
|
602
612
|
|
|
603
613
|
return self._enrich_command_from_status()
|
|
604
614
|
|
|
@@ -1285,6 +1295,33 @@ class RemoteRuntime(KubeResource):
|
|
|
1285
1295
|
return mlrun.model.Credentials.generate_access_key
|
|
1286
1296
|
return None
|
|
1287
1297
|
|
|
1298
|
+
def _check_model_endpoint_task_state(
|
|
1299
|
+
self,
|
|
1300
|
+
db: mlrun.db.RunDBInterface,
|
|
1301
|
+
background_task: mlrun.common.schemas.BackgroundTask,
|
|
1302
|
+
wait_for_completion: bool,
|
|
1303
|
+
):
|
|
1304
|
+
if wait_for_completion:
|
|
1305
|
+
background_task = db._wait_for_background_task_to_reach_terminal_state(
|
|
1306
|
+
name=background_task.metadata.name, project=self.metadata.project
|
|
1307
|
+
)
|
|
1308
|
+
else:
|
|
1309
|
+
background_task = db.get_project_background_task(
|
|
1310
|
+
project=self.metadata.project, name=background_task.metadata.name
|
|
1311
|
+
)
|
|
1312
|
+
if (
|
|
1313
|
+
background_task.status.state
|
|
1314
|
+
in mlrun.common.schemas.BackgroundTaskState.terminal_states()
|
|
1315
|
+
):
|
|
1316
|
+
logger.info(
|
|
1317
|
+
f"Model endpoint creation task completed with state {background_task.status.state}"
|
|
1318
|
+
)
|
|
1319
|
+
else:
|
|
1320
|
+
logger.warning(
|
|
1321
|
+
f"Model endpoint creation task is still running with state {background_task.status.state}"
|
|
1322
|
+
f"You can use the serving function, but it won't be monitored for the next few minutes"
|
|
1323
|
+
)
|
|
1324
|
+
|
|
1288
1325
|
|
|
1289
1326
|
def parse_logs(logs):
|
|
1290
1327
|
logs = json.loads(logs)
|
mlrun/runtimes/nuclio/serving.py
CHANGED
|
@@ -152,6 +152,7 @@ class ServingSpec(NuclioSpec):
|
|
|
152
152
|
clone_target_dir=None,
|
|
153
153
|
state_thresholds=None,
|
|
154
154
|
disable_default_http_trigger=None,
|
|
155
|
+
model_endpoint_creation_task_name=None,
|
|
155
156
|
):
|
|
156
157
|
super().__init__(
|
|
157
158
|
command=command,
|
|
@@ -209,6 +210,7 @@ class ServingSpec(NuclioSpec):
|
|
|
209
210
|
self.tracking_policy = tracking_policy
|
|
210
211
|
self.secret_sources = secret_sources or []
|
|
211
212
|
self.default_content_type = default_content_type
|
|
213
|
+
self.model_endpoint_creation_task_name = model_endpoint_creation_task_name
|
|
212
214
|
|
|
213
215
|
@property
|
|
214
216
|
def graph(self) -> Union[RouterStep, RootFlowStep]:
|
|
@@ -696,6 +698,7 @@ class ServingRuntime(RemoteRuntime):
|
|
|
696
698
|
"track_models": self.spec.track_models,
|
|
697
699
|
"tracking_policy": None,
|
|
698
700
|
"default_content_type": self.spec.default_content_type,
|
|
701
|
+
"model_endpoint_creation_task_name": self.spec.model_endpoint_creation_task_name,
|
|
699
702
|
}
|
|
700
703
|
|
|
701
704
|
if self.spec.secret_sources:
|
mlrun/runtimes/pod.py
CHANGED
|
@@ -214,9 +214,7 @@ class KubeResourceSpec(FunctionSpec):
|
|
|
214
214
|
# default service account is set in mlrun.utils.process_function_service_account
|
|
215
215
|
# due to project specific defaults
|
|
216
216
|
self.service_account = service_account
|
|
217
|
-
self.image_pull_secret =
|
|
218
|
-
image_pull_secret or mlrun.mlconf.function.spec.image_pull_secret.default
|
|
219
|
-
)
|
|
217
|
+
self.image_pull_secret = image_pull_secret
|
|
220
218
|
self.node_name = node_name
|
|
221
219
|
self.node_selector = node_selector or {}
|
|
222
220
|
self._affinity = affinity
|
mlrun/serving/routers.py
CHANGED
|
@@ -389,7 +389,7 @@ class ParallelRun(BaseModelRouter):
|
|
|
389
389
|
self._pool = executor_class(
|
|
390
390
|
max_workers=len(self.routes),
|
|
391
391
|
initializer=ParallelRun.init_pool,
|
|
392
|
-
initargs=(server, routes),
|
|
392
|
+
initargs=(server, routes, self.context.is_mock),
|
|
393
393
|
)
|
|
394
394
|
elif self.executor_type == ParallelRunnerModes.thread:
|
|
395
395
|
executor_class = concurrent.futures.ThreadPoolExecutor
|
|
@@ -452,9 +452,9 @@ class ParallelRun(BaseModelRouter):
|
|
|
452
452
|
return results
|
|
453
453
|
|
|
454
454
|
@staticmethod
|
|
455
|
-
def init_pool(server_spec, routes):
|
|
455
|
+
def init_pool(server_spec, routes, is_mock):
|
|
456
456
|
server = mlrun.serving.GraphServer.from_dict(server_spec)
|
|
457
|
-
server.init_states(None, None)
|
|
457
|
+
server.init_states(None, None, is_mock=is_mock)
|
|
458
458
|
global local_routes
|
|
459
459
|
for route in routes.values():
|
|
460
460
|
route.context = server.context
|
|
@@ -596,11 +596,6 @@ class VotingEnsemble(ParallelRun):
|
|
|
596
596
|
self.vote_type = vote_type
|
|
597
597
|
self.vote_flag = True if self.vote_type is not None else False
|
|
598
598
|
self.weights = weights
|
|
599
|
-
self._model_logger = (
|
|
600
|
-
_ModelLogPusher(self, context)
|
|
601
|
-
if context and context.stream.enabled
|
|
602
|
-
else None
|
|
603
|
-
)
|
|
604
599
|
self.version = kwargs.get("version", "v1")
|
|
605
600
|
self.log_router = True
|
|
606
601
|
self.prediction_col_name = prediction_col_name or "prediction"
|
|
@@ -608,8 +603,12 @@ class VotingEnsemble(ParallelRun):
|
|
|
608
603
|
self.model_endpoint_uid = None
|
|
609
604
|
self.model_endpoint = None
|
|
610
605
|
self.shard_by_endpoint = shard_by_endpoint
|
|
606
|
+
self.initialized = False
|
|
611
607
|
|
|
612
608
|
def post_init(self, mode="sync", **kwargs):
|
|
609
|
+
self._update_weights(self.weights)
|
|
610
|
+
|
|
611
|
+
def _lazy_init(self, event_id):
|
|
613
612
|
server: mlrun.serving.GraphServer = getattr(
|
|
614
613
|
self.context, "_server", None
|
|
615
614
|
) or getattr(self.context, "server", None)
|
|
@@ -617,14 +616,59 @@ class VotingEnsemble(ParallelRun):
|
|
|
617
616
|
logger.warn("GraphServer not initialized for VotingEnsemble instance")
|
|
618
617
|
return
|
|
619
618
|
if not self.context.is_mock or self.context.monitoring_mock:
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
619
|
+
if server.model_endpoint_creation_task_name:
|
|
620
|
+
background_task = mlrun.get_run_db().get_project_background_task(
|
|
621
|
+
server.project, server.model_endpoint_creation_task_name
|
|
622
|
+
)
|
|
623
|
+
logger.info(
|
|
624
|
+
"Checking model endpoint creation task status",
|
|
625
|
+
task_name=server.model_endpoint_creation_task_name,
|
|
626
|
+
)
|
|
627
|
+
if (
|
|
628
|
+
background_task.status.state
|
|
629
|
+
in mlrun.common.schemas.BackgroundTaskState.terminal_states()
|
|
630
|
+
):
|
|
631
|
+
logger.info(
|
|
632
|
+
f"Model endpoint creation task completed with state {background_task.status.state}"
|
|
633
|
+
)
|
|
634
|
+
else: # in progress
|
|
635
|
+
logger.debug(
|
|
636
|
+
f"Model endpoint creation task is still in progress with the current state: "
|
|
637
|
+
f"{background_task.status.state}. This event will not be monitored.",
|
|
638
|
+
name=self.name,
|
|
639
|
+
event_id=event_id,
|
|
640
|
+
)
|
|
641
|
+
self.initialized = False
|
|
642
|
+
return
|
|
643
|
+
else:
|
|
644
|
+
logger.info(
|
|
645
|
+
"Model endpoint creation task name not provided",
|
|
646
|
+
)
|
|
647
|
+
try:
|
|
648
|
+
self.model_endpoint_uid = (
|
|
649
|
+
mlrun.get_run_db()
|
|
650
|
+
.get_model_endpoint(
|
|
651
|
+
project=server.project,
|
|
652
|
+
name=self.name,
|
|
653
|
+
function_name=server.function_name,
|
|
654
|
+
function_tag=server.function_tag or "latest",
|
|
655
|
+
tsdb_metrics=False,
|
|
656
|
+
)
|
|
657
|
+
.metadata.uid
|
|
658
|
+
)
|
|
659
|
+
except mlrun.errors.MLRunNotFoundError:
|
|
660
|
+
logger.info(
|
|
661
|
+
"Model endpoint not found for this step; monitoring for this model will not be performed",
|
|
662
|
+
function_name=server.function_name,
|
|
663
|
+
name=self.name,
|
|
664
|
+
)
|
|
665
|
+
self.model_endpoint_uid = None
|
|
666
|
+
self._model_logger = (
|
|
667
|
+
_ModelLogPusher(self, self.context)
|
|
668
|
+
if self.context and self.context.stream.enabled and self.model_endpoint_uid
|
|
669
|
+
else None
|
|
670
|
+
)
|
|
671
|
+
self.initialized = True
|
|
628
672
|
|
|
629
673
|
def _resolve_route(self, body, urlpath):
|
|
630
674
|
"""Resolves the appropriate model to send the event to.
|
|
@@ -829,8 +873,9 @@ class VotingEnsemble(ParallelRun):
|
|
|
829
873
|
Response
|
|
830
874
|
Event response after running the requested logic
|
|
831
875
|
"""
|
|
876
|
+
if not self.initialized:
|
|
877
|
+
self._lazy_init(event.id)
|
|
832
878
|
start = now_date()
|
|
833
|
-
|
|
834
879
|
# Handle and verify the request
|
|
835
880
|
original_body = event.body
|
|
836
881
|
event.body = _extract_input_data(self._input_path, event.body)
|
mlrun/serving/server.py
CHANGED
|
@@ -112,6 +112,7 @@ class GraphServer(ModelObj):
|
|
|
112
112
|
function_name=None,
|
|
113
113
|
function_tag=None,
|
|
114
114
|
project=None,
|
|
115
|
+
model_endpoint_creation_task_name=None,
|
|
115
116
|
):
|
|
116
117
|
self._graph = None
|
|
117
118
|
self.graph: Union[RouterStep, RootFlowStep] = graph
|
|
@@ -137,6 +138,7 @@ class GraphServer(ModelObj):
|
|
|
137
138
|
self.function_name = function_name
|
|
138
139
|
self.function_tag = function_tag
|
|
139
140
|
self.project = project
|
|
141
|
+
self.model_endpoint_creation_task_name = model_endpoint_creation_task_name
|
|
140
142
|
|
|
141
143
|
def set_current_function(self, function):
|
|
142
144
|
"""set which child function this server is currently running on"""
|
|
@@ -332,6 +334,7 @@ def v2_serving_init(context, namespace=None):
|
|
|
332
334
|
context.logger.info("Initializing server from spec")
|
|
333
335
|
spec = mlrun.utils.get_serving_spec()
|
|
334
336
|
server = GraphServer.from_dict(spec)
|
|
337
|
+
|
|
335
338
|
if config.log_level.lower() == "debug":
|
|
336
339
|
server.verbose = True
|
|
337
340
|
if hasattr(context, "trigger"):
|
|
@@ -544,11 +547,19 @@ class GraphContext:
|
|
|
544
547
|
self.get_store_resource = None
|
|
545
548
|
self.get_table = None
|
|
546
549
|
self.is_mock = False
|
|
550
|
+
self.monitoring_mock = False
|
|
551
|
+
self._project_obj = None
|
|
547
552
|
|
|
548
553
|
@property
|
|
549
554
|
def server(self):
|
|
550
555
|
return self._server
|
|
551
556
|
|
|
557
|
+
@property
|
|
558
|
+
def project_obj(self):
|
|
559
|
+
if not self._project_obj:
|
|
560
|
+
self._project_obj = mlrun.get_run_db().get_project(name=self.project)
|
|
561
|
+
return self._project_obj
|
|
562
|
+
|
|
552
563
|
@property
|
|
553
564
|
def project(self) -> str:
|
|
554
565
|
"""current project name (for the current function)"""
|
mlrun/serving/states.py
CHANGED
|
@@ -595,10 +595,6 @@ class TaskStep(BaseStep):
|
|
|
595
595
|
creation_strategy=self.model_endpoint_creation_strategy,
|
|
596
596
|
endpoint_type=self.endpoint_type,
|
|
597
597
|
)
|
|
598
|
-
if hasattr(self._object, "model_endpoint_uid"):
|
|
599
|
-
self.endpoint_uid = self._object.model_endpoint_uid
|
|
600
|
-
if hasattr(self._object, "name"):
|
|
601
|
-
self.endpoint_name = self._object.name
|
|
602
598
|
|
|
603
599
|
def respond(self):
|
|
604
600
|
"""mark this step as the responder.
|
mlrun/serving/v2_serving.py
CHANGED
|
@@ -115,9 +115,9 @@ class V2ModelServer(StepToDict):
|
|
|
115
115
|
self.model = model
|
|
116
116
|
self.ready = True
|
|
117
117
|
self.model_endpoint_uid = None
|
|
118
|
-
self.model_endpoint = None
|
|
119
118
|
self.shard_by_endpoint = shard_by_endpoint
|
|
120
119
|
self._model_logger = None
|
|
120
|
+
self.initialized = False
|
|
121
121
|
|
|
122
122
|
def _load_and_update_state(self):
|
|
123
123
|
try:
|
|
@@ -139,36 +139,69 @@ class V2ModelServer(StepToDict):
|
|
|
139
139
|
else:
|
|
140
140
|
self._load_and_update_state()
|
|
141
141
|
|
|
142
|
+
def _lazy_init(self, event_id):
|
|
142
143
|
server: mlrun.serving.GraphServer = getattr(
|
|
143
144
|
self.context, "_server", None
|
|
144
145
|
) or getattr(self.context, "server", None)
|
|
145
146
|
if not server:
|
|
146
147
|
logger.warn("GraphServer not initialized for VotingEnsemble instance")
|
|
147
148
|
return
|
|
148
|
-
|
|
149
149
|
if not self.context.is_mock and not self.model_spec:
|
|
150
150
|
self.get_model()
|
|
151
151
|
if not self.context.is_mock or self.context.monitoring_mock:
|
|
152
|
+
if server.model_endpoint_creation_task_name:
|
|
153
|
+
background_task = mlrun.get_run_db().get_project_background_task(
|
|
154
|
+
server.project, server.model_endpoint_creation_task_name
|
|
155
|
+
)
|
|
156
|
+
logger.debug(
|
|
157
|
+
"Checking model endpoint creation task status",
|
|
158
|
+
task_name=server.model_endpoint_creation_task_name,
|
|
159
|
+
)
|
|
160
|
+
if (
|
|
161
|
+
background_task.status.state
|
|
162
|
+
in mlrun.common.schemas.BackgroundTaskState.terminal_states()
|
|
163
|
+
):
|
|
164
|
+
logger.debug(
|
|
165
|
+
f"Model endpoint creation task completed with state {background_task.status.state}"
|
|
166
|
+
)
|
|
167
|
+
else: # in progress
|
|
168
|
+
logger.debug(
|
|
169
|
+
f"Model endpoint creation task is still in progress with the current state: "
|
|
170
|
+
f"{background_task.status.state}. This event will not be monitored.",
|
|
171
|
+
name=self.name,
|
|
172
|
+
event_id=event_id,
|
|
173
|
+
)
|
|
174
|
+
self.initialized = False
|
|
175
|
+
return
|
|
176
|
+
else:
|
|
177
|
+
logger.debug(
|
|
178
|
+
"Model endpoint creation task name not provided",
|
|
179
|
+
)
|
|
152
180
|
try:
|
|
153
|
-
self.
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
181
|
+
self.model_endpoint_uid = (
|
|
182
|
+
mlrun.get_run_db()
|
|
183
|
+
.get_model_endpoint(
|
|
184
|
+
project=server.project,
|
|
185
|
+
name=self.name,
|
|
186
|
+
function_name=server.function_name,
|
|
187
|
+
function_tag=server.function_tag or "latest",
|
|
188
|
+
tsdb_metrics=False,
|
|
189
|
+
)
|
|
190
|
+
.metadata.uid
|
|
158
191
|
)
|
|
159
|
-
self.model_endpoint_uid = self.model_endpoint.metadata.uid
|
|
160
192
|
except mlrun.errors.MLRunNotFoundError:
|
|
161
193
|
logger.info(
|
|
162
|
-
"Model
|
|
194
|
+
"Model endpoint not found for this step; monitoring for this model will not be performed",
|
|
163
195
|
function_name=server.function_name,
|
|
164
196
|
name=self.name,
|
|
165
197
|
)
|
|
166
|
-
self.
|
|
198
|
+
self.model_endpoint_uid = None
|
|
167
199
|
self._model_logger = (
|
|
168
200
|
_ModelLogPusher(self, self.context)
|
|
169
201
|
if self.context and self.context.stream.enabled and self.model_endpoint_uid
|
|
170
202
|
else None
|
|
171
203
|
)
|
|
204
|
+
self.initialized = True
|
|
172
205
|
|
|
173
206
|
def get_param(self, key: str, default=None):
|
|
174
207
|
"""get param by key (specified in the model or the function)"""
|
|
@@ -246,6 +279,8 @@ class V2ModelServer(StepToDict):
|
|
|
246
279
|
|
|
247
280
|
def do_event(self, event, *args, **kwargs):
|
|
248
281
|
"""main model event handler method"""
|
|
282
|
+
if not self.initialized:
|
|
283
|
+
self._lazy_init(event.id)
|
|
249
284
|
start = now_date()
|
|
250
285
|
original_body = event.body
|
|
251
286
|
event_body = _extract_input_data(self._input_path, event.body)
|
mlrun/utils/version/version.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: mlrun
|
|
3
|
-
Version: 1.8.
|
|
3
|
+
Version: 1.8.0rc28
|
|
4
4
|
Summary: Tracking and config of machine learning runs
|
|
5
5
|
Home-page: https://github.com/mlrun/mlrun
|
|
6
6
|
Author: Yaron Haviv
|
|
@@ -44,7 +44,7 @@ Requires-Dist: semver~=3.0
|
|
|
44
44
|
Requires-Dist: dependency-injector~=4.41
|
|
45
45
|
Requires-Dist: fsspec<2024.7,>=2023.9.2
|
|
46
46
|
Requires-Dist: v3iofs~=0.1.17
|
|
47
|
-
Requires-Dist: storey~=1.8.
|
|
47
|
+
Requires-Dist: storey~=1.8.9
|
|
48
48
|
Requires-Dist: inflection~=0.5.0
|
|
49
49
|
Requires-Dist: python-dotenv~=1.0
|
|
50
50
|
Requires-Dist: setuptools>=75.2
|
|
@@ -120,6 +120,7 @@ Requires-Dist: memray~=1.12; sys_platform != "win32" and extra == "api"
|
|
|
120
120
|
Requires-Dist: aiosmtplib~=3.0; extra == "api"
|
|
121
121
|
Requires-Dist: pydantic<2,>=1; extra == "api"
|
|
122
122
|
Requires-Dist: mlrun-pipelines-kfp-v1-8[kfp]~=0.3.5; python_version < "3.11" and extra == "api"
|
|
123
|
+
Requires-Dist: grpcio~=1.70.0; extra == "api"
|
|
123
124
|
Provides-Extra: all
|
|
124
125
|
Requires-Dist: adlfs==2023.9.0; extra == "all"
|
|
125
126
|
Requires-Dist: aiobotocore<2.16,>=2.5.0; extra == "all"
|
|
@@ -208,6 +209,7 @@ Requires-Dist: google-cloud-bigquery[bqstorage,pandas]==3.14.1; extra == "comple
|
|
|
208
209
|
Requires-Dist: google-cloud-storage==2.14.0; extra == "complete-api"
|
|
209
210
|
Requires-Dist: google-cloud==0.34; extra == "complete-api"
|
|
210
211
|
Requires-Dist: graphviz~=0.20.0; extra == "complete-api"
|
|
212
|
+
Requires-Dist: grpcio~=1.70.0; extra == "complete-api"
|
|
211
213
|
Requires-Dist: humanfriendly~=10.0; extra == "complete-api"
|
|
212
214
|
Requires-Dist: igz-mgmt~=0.4.1; extra == "complete-api"
|
|
213
215
|
Requires-Dist: kafka-python~=2.0; extra == "complete-api"
|