mlrun 1.6.4rc8__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +11 -1
- mlrun/__main__.py +40 -122
- mlrun/alerts/__init__.py +15 -0
- mlrun/alerts/alert.py +248 -0
- mlrun/api/schemas/__init__.py +5 -4
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +47 -257
- mlrun/artifacts/dataset.py +11 -192
- mlrun/artifacts/manager.py +79 -47
- mlrun/artifacts/model.py +31 -159
- mlrun/artifacts/plots.py +23 -380
- mlrun/common/constants.py +74 -1
- mlrun/common/db/sql_session.py +5 -5
- mlrun/common/formatters/__init__.py +21 -0
- mlrun/common/formatters/artifact.py +45 -0
- mlrun/common/formatters/base.py +113 -0
- mlrun/common/formatters/feature_set.py +33 -0
- mlrun/common/formatters/function.py +46 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/common/formatters/run.py +29 -0
- mlrun/common/helpers.py +12 -3
- mlrun/common/model_monitoring/helpers.py +9 -5
- mlrun/{runtimes → common/runtimes}/constants.py +37 -9
- mlrun/common/schemas/__init__.py +31 -5
- mlrun/common/schemas/alert.py +202 -0
- mlrun/common/schemas/api_gateway.py +196 -0
- mlrun/common/schemas/artifact.py +25 -4
- mlrun/common/schemas/auth.py +16 -5
- mlrun/common/schemas/background_task.py +1 -1
- mlrun/common/schemas/client_spec.py +4 -2
- mlrun/common/schemas/common.py +7 -4
- mlrun/common/schemas/constants.py +3 -0
- mlrun/common/schemas/feature_store.py +74 -44
- mlrun/common/schemas/frontend_spec.py +15 -7
- mlrun/common/schemas/function.py +12 -1
- mlrun/common/schemas/hub.py +11 -18
- mlrun/common/schemas/memory_reports.py +2 -2
- mlrun/common/schemas/model_monitoring/__init__.py +20 -4
- mlrun/common/schemas/model_monitoring/constants.py +123 -42
- mlrun/common/schemas/model_monitoring/grafana.py +13 -9
- mlrun/common/schemas/model_monitoring/model_endpoints.py +101 -54
- mlrun/common/schemas/notification.py +71 -14
- mlrun/common/schemas/object.py +2 -2
- mlrun/{model_monitoring/controller_handler.py → common/schemas/pagination.py} +9 -12
- mlrun/common/schemas/pipeline.py +8 -1
- mlrun/common/schemas/project.py +69 -18
- mlrun/common/schemas/runs.py +7 -1
- mlrun/common/schemas/runtime_resource.py +8 -12
- mlrun/common/schemas/schedule.py +4 -4
- mlrun/common/schemas/tag.py +1 -2
- mlrun/common/schemas/workflow.py +12 -4
- mlrun/common/types.py +14 -1
- mlrun/config.py +154 -69
- mlrun/data_types/data_types.py +6 -1
- mlrun/data_types/spark.py +2 -2
- mlrun/data_types/to_pandas.py +67 -37
- mlrun/datastore/__init__.py +6 -8
- mlrun/datastore/alibaba_oss.py +131 -0
- mlrun/datastore/azure_blob.py +143 -42
- mlrun/datastore/base.py +102 -58
- mlrun/datastore/datastore.py +34 -13
- mlrun/datastore/datastore_profile.py +146 -20
- mlrun/datastore/dbfs_store.py +3 -7
- mlrun/datastore/filestore.py +1 -4
- mlrun/datastore/google_cloud_storage.py +97 -33
- mlrun/datastore/hdfs.py +56 -0
- mlrun/datastore/inmem.py +6 -3
- mlrun/datastore/redis.py +7 -2
- mlrun/datastore/s3.py +34 -12
- mlrun/datastore/snowflake_utils.py +45 -0
- mlrun/datastore/sources.py +303 -111
- mlrun/datastore/spark_utils.py +31 -2
- mlrun/datastore/store_resources.py +9 -7
- mlrun/datastore/storeytargets.py +151 -0
- mlrun/datastore/targets.py +453 -176
- mlrun/datastore/utils.py +72 -58
- mlrun/datastore/v3io.py +6 -1
- mlrun/db/base.py +274 -41
- mlrun/db/factory.py +1 -1
- mlrun/db/httpdb.py +893 -225
- mlrun/db/nopdb.py +291 -33
- mlrun/errors.py +36 -6
- mlrun/execution.py +115 -42
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +65 -73
- mlrun/feature_store/common.py +7 -12
- mlrun/feature_store/feature_set.py +76 -55
- mlrun/feature_store/feature_vector.py +39 -31
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +16 -11
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +13 -4
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +24 -32
- mlrun/feature_store/steps.py +45 -34
- mlrun/features.py +11 -21
- mlrun/frameworks/_common/artifacts_library.py +9 -9
- mlrun/frameworks/_common/mlrun_interface.py +5 -5
- mlrun/frameworks/_common/model_handler.py +48 -48
- mlrun/frameworks/_common/plan.py +5 -6
- mlrun/frameworks/_common/producer.py +3 -4
- mlrun/frameworks/_common/utils.py +5 -5
- mlrun/frameworks/_dl_common/loggers/logger.py +6 -7
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +9 -9
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +23 -47
- mlrun/frameworks/_ml_common/artifacts_library.py +1 -2
- mlrun/frameworks/_ml_common/loggers/logger.py +3 -4
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +4 -5
- mlrun/frameworks/_ml_common/model_handler.py +24 -24
- mlrun/frameworks/_ml_common/pkl_model_server.py +2 -2
- mlrun/frameworks/_ml_common/plan.py +2 -2
- mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
- mlrun/frameworks/_ml_common/utils.py +4 -4
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +9 -9
- mlrun/frameworks/huggingface/model_server.py +4 -4
- mlrun/frameworks/lgbm/__init__.py +33 -33
- mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -5
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -5
- mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -3
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +6 -6
- mlrun/frameworks/lgbm/model_handler.py +10 -10
- mlrun/frameworks/lgbm/model_server.py +6 -6
- mlrun/frameworks/lgbm/utils.py +5 -5
- mlrun/frameworks/onnx/dataset.py +8 -8
- mlrun/frameworks/onnx/mlrun_interface.py +3 -3
- mlrun/frameworks/onnx/model_handler.py +6 -6
- mlrun/frameworks/onnx/model_server.py +7 -7
- mlrun/frameworks/parallel_coordinates.py +6 -6
- mlrun/frameworks/pytorch/__init__.py +18 -18
- mlrun/frameworks/pytorch/callbacks/callback.py +4 -5
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +17 -17
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +23 -29
- mlrun/frameworks/pytorch/callbacks_handler.py +38 -38
- mlrun/frameworks/pytorch/mlrun_interface.py +20 -20
- mlrun/frameworks/pytorch/model_handler.py +17 -17
- mlrun/frameworks/pytorch/model_server.py +7 -7
- mlrun/frameworks/sklearn/__init__.py +13 -13
- mlrun/frameworks/sklearn/estimator.py +4 -4
- mlrun/frameworks/sklearn/metrics_library.py +14 -14
- mlrun/frameworks/sklearn/mlrun_interface.py +16 -9
- mlrun/frameworks/sklearn/model_handler.py +2 -2
- mlrun/frameworks/tf_keras/__init__.py +10 -7
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +15 -15
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +19 -23
- mlrun/frameworks/tf_keras/mlrun_interface.py +9 -11
- mlrun/frameworks/tf_keras/model_handler.py +14 -14
- mlrun/frameworks/tf_keras/model_server.py +6 -6
- mlrun/frameworks/xgboost/__init__.py +13 -13
- mlrun/frameworks/xgboost/model_handler.py +6 -6
- mlrun/k8s_utils.py +61 -17
- mlrun/launcher/__init__.py +1 -1
- mlrun/launcher/base.py +16 -15
- mlrun/launcher/client.py +13 -11
- mlrun/launcher/factory.py +1 -1
- mlrun/launcher/local.py +23 -13
- mlrun/launcher/remote.py +17 -10
- mlrun/lists.py +7 -6
- mlrun/model.py +478 -103
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +163 -371
- mlrun/{runtimes/mpijob/v1alpha1.py → model_monitoring/applications/__init__.py} +9 -15
- mlrun/model_monitoring/applications/_application_steps.py +188 -0
- mlrun/model_monitoring/applications/base.py +108 -0
- mlrun/model_monitoring/applications/context.py +341 -0
- mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
- mlrun/model_monitoring/applications/histogram_data_drift.py +354 -0
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +131 -278
- mlrun/model_monitoring/db/__init__.py +18 -0
- mlrun/model_monitoring/db/stores/__init__.py +136 -0
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/db/stores/base/store.py +213 -0
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
- mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
- mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
- mlrun/model_monitoring/db/tsdb/base.py +448 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +279 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +507 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
- mlrun/model_monitoring/features_drift_table.py +134 -106
- mlrun/model_monitoring/helpers.py +199 -55
- mlrun/model_monitoring/metrics/__init__.py +13 -0
- mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +134 -398
- mlrun/model_monitoring/tracking_policy.py +9 -2
- mlrun/model_monitoring/writer.py +161 -125
- mlrun/package/__init__.py +6 -6
- mlrun/package/context_handler.py +5 -5
- mlrun/package/packager.py +7 -7
- mlrun/package/packagers/default_packager.py +8 -8
- mlrun/package/packagers/numpy_packagers.py +15 -15
- mlrun/package/packagers/pandas_packagers.py +5 -5
- mlrun/package/packagers/python_standard_library_packagers.py +10 -10
- mlrun/package/packagers_manager.py +19 -23
- mlrun/package/utils/_formatter.py +6 -6
- mlrun/package/utils/_pickler.py +2 -2
- mlrun/package/utils/_supported_format.py +4 -4
- mlrun/package/utils/log_hint_utils.py +2 -2
- mlrun/package/utils/type_hint_utils.py +4 -9
- mlrun/platforms/__init__.py +11 -10
- mlrun/platforms/iguazio.py +24 -203
- mlrun/projects/operations.py +52 -25
- mlrun/projects/pipelines.py +191 -197
- mlrun/projects/project.py +1227 -400
- mlrun/render.py +16 -19
- mlrun/run.py +209 -184
- mlrun/runtimes/__init__.py +83 -15
- mlrun/runtimes/base.py +51 -35
- mlrun/runtimes/daskjob.py +17 -10
- mlrun/runtimes/databricks_job/databricks_cancel_task.py +1 -1
- mlrun/runtimes/databricks_job/databricks_runtime.py +8 -7
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +1 -29
- mlrun/runtimes/function_reference.py +1 -1
- mlrun/runtimes/kubejob.py +34 -128
- mlrun/runtimes/local.py +40 -11
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/abstract.py +9 -10
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/{model_monitoring/stores/models/sqlite.py → runtimes/nuclio/__init__.py} +7 -9
- mlrun/runtimes/nuclio/api_gateway.py +769 -0
- mlrun/runtimes/nuclio/application/__init__.py +15 -0
- mlrun/runtimes/nuclio/application/application.py +758 -0
- mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
- mlrun/runtimes/{function.py → nuclio/function.py} +200 -83
- mlrun/runtimes/{nuclio.py → nuclio/nuclio.py} +6 -6
- mlrun/runtimes/{serving.py → nuclio/serving.py} +65 -68
- mlrun/runtimes/pod.py +281 -101
- mlrun/runtimes/remotesparkjob.py +12 -9
- mlrun/runtimes/sparkjob/spark3job.py +67 -51
- mlrun/runtimes/utils.py +41 -75
- mlrun/secrets.py +9 -5
- mlrun/serving/__init__.py +8 -1
- mlrun/serving/remote.py +2 -7
- mlrun/serving/routers.py +85 -69
- mlrun/serving/server.py +69 -44
- mlrun/serving/states.py +209 -36
- mlrun/serving/utils.py +22 -14
- mlrun/serving/v1_serving.py +6 -7
- mlrun/serving/v2_serving.py +133 -54
- mlrun/track/tracker.py +2 -1
- mlrun/track/tracker_manager.py +3 -3
- mlrun/track/trackers/mlflow_tracker.py +6 -2
- mlrun/utils/async_http.py +6 -8
- mlrun/utils/azure_vault.py +1 -1
- mlrun/utils/clones.py +1 -2
- mlrun/utils/condition_evaluator.py +3 -3
- mlrun/utils/db.py +21 -3
- mlrun/utils/helpers.py +405 -225
- mlrun/utils/http.py +3 -6
- mlrun/utils/logger.py +112 -16
- mlrun/utils/notifications/notification/__init__.py +17 -13
- mlrun/utils/notifications/notification/base.py +50 -2
- mlrun/utils/notifications/notification/console.py +2 -0
- mlrun/utils/notifications/notification/git.py +24 -1
- mlrun/utils/notifications/notification/ipython.py +3 -1
- mlrun/utils/notifications/notification/slack.py +96 -21
- mlrun/utils/notifications/notification/webhook.py +59 -2
- mlrun/utils/notifications/notification_pusher.py +149 -30
- mlrun/utils/regex.py +9 -0
- mlrun/utils/retryer.py +208 -0
- mlrun/utils/singleton.py +1 -1
- mlrun/utils/v3io_clients.py +4 -6
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +2 -6
- mlrun-1.7.0.dist-info/METADATA +378 -0
- mlrun-1.7.0.dist-info/RECORD +351 -0
- {mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/WHEEL +1 -1
- mlrun/feature_store/retrieval/conversion.py +0 -273
- mlrun/kfpops.py +0 -868
- mlrun/model_monitoring/application.py +0 -310
- mlrun/model_monitoring/batch.py +0 -1095
- mlrun/model_monitoring/prometheus.py +0 -219
- mlrun/model_monitoring/stores/__init__.py +0 -111
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -576
- mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/models/base.py +0 -84
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -384
- mlrun/platforms/other.py +0 -306
- mlrun-1.6.4rc8.dist-info/METADATA +0 -272
- mlrun-1.6.4rc8.dist-info/RECORD +0 -314
- {mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/LICENSE +0 -0
- {mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/entry_points.txt +0 -0
- {mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/top_level.txt +0 -0
mlrun/serving/routers.py
CHANGED
|
@@ -20,7 +20,7 @@ import traceback
|
|
|
20
20
|
import typing
|
|
21
21
|
from enum import Enum
|
|
22
22
|
from io import BytesIO
|
|
23
|
-
from typing import
|
|
23
|
+
from typing import Union
|
|
24
24
|
|
|
25
25
|
import numpy
|
|
26
26
|
import numpy as np
|
|
@@ -28,10 +28,10 @@ import numpy as np
|
|
|
28
28
|
import mlrun
|
|
29
29
|
import mlrun.common.model_monitoring
|
|
30
30
|
import mlrun.common.schemas.model_monitoring
|
|
31
|
+
from mlrun.errors import err_to_str
|
|
31
32
|
from mlrun.utils import logger, now_date
|
|
32
33
|
|
|
33
34
|
from ..common.helpers import parse_versioned_object_uri
|
|
34
|
-
from ..config import config
|
|
35
35
|
from .server import GraphServer
|
|
36
36
|
from .utils import RouterToDict, _extract_input_data, _update_result_body
|
|
37
37
|
from .v2_serving import _ModelLogPusher
|
|
@@ -271,7 +271,9 @@ class ParallelRun(BaseModelRouter):
|
|
|
271
271
|
fn = mlrun.new_function("parallel", kind="serving")
|
|
272
272
|
graph = fn.set_topology(
|
|
273
273
|
"router",
|
|
274
|
-
mlrun.serving.routers.ParallelRun(
|
|
274
|
+
mlrun.serving.routers.ParallelRun(
|
|
275
|
+
extend_event=True, executor_type=executor
|
|
276
|
+
),
|
|
275
277
|
)
|
|
276
278
|
graph.add_route("child1", class_name="Cls1")
|
|
277
279
|
graph.add_route("child2", class_name="Cls2", my_arg={"c": 7})
|
|
@@ -485,7 +487,7 @@ class VotingEnsemble(ParallelRun):
|
|
|
485
487
|
url_prefix: str = None,
|
|
486
488
|
health_prefix: str = None,
|
|
487
489
|
vote_type: str = None,
|
|
488
|
-
weights:
|
|
490
|
+
weights: dict[str, float] = None,
|
|
489
491
|
executor_type: Union[ParallelRunnerModes, str] = ParallelRunnerModes.thread,
|
|
490
492
|
format_response_with_col_name_flag: bool = False,
|
|
491
493
|
prediction_col_name: str = "prediction",
|
|
@@ -613,7 +615,7 @@ class VotingEnsemble(ParallelRun):
|
|
|
613
615
|
logger.warn("GraphServer not initialized for VotingEnsemble instance")
|
|
614
616
|
return
|
|
615
617
|
|
|
616
|
-
if not self.context.is_mock or self.context.
|
|
618
|
+
if not self.context.is_mock or self.context.monitoring_mock:
|
|
617
619
|
self.model_endpoint_uid = _init_endpoint_record(server, self)
|
|
618
620
|
|
|
619
621
|
self._update_weights(self.weights)
|
|
@@ -703,7 +705,7 @@ class VotingEnsemble(ParallelRun):
|
|
|
703
705
|
)
|
|
704
706
|
return model, None, subpath
|
|
705
707
|
|
|
706
|
-
def _majority_vote(self, all_predictions:
|
|
708
|
+
def _majority_vote(self, all_predictions: list[list[int]], weights: list[float]):
|
|
707
709
|
"""
|
|
708
710
|
Returns most predicted class for each event
|
|
709
711
|
|
|
@@ -727,7 +729,7 @@ class VotingEnsemble(ParallelRun):
|
|
|
727
729
|
weighted_res = one_hot_representation @ weights
|
|
728
730
|
return np.argmax(weighted_res, axis=1).tolist()
|
|
729
731
|
|
|
730
|
-
def _mean_vote(self, all_predictions:
|
|
732
|
+
def _mean_vote(self, all_predictions: list[list[float]], weights: list[float]):
|
|
731
733
|
"""
|
|
732
734
|
Returns weighted mean of the predictions
|
|
733
735
|
|
|
@@ -741,7 +743,7 @@ class VotingEnsemble(ParallelRun):
|
|
|
741
743
|
def _is_int(self, value):
|
|
742
744
|
return float(value).is_integer()
|
|
743
745
|
|
|
744
|
-
def logic(self, predictions:
|
|
746
|
+
def logic(self, predictions: list[list[Union[int, float]]], weights: list[float]):
|
|
745
747
|
"""
|
|
746
748
|
Returns the final prediction of all the models after applying the desire logic
|
|
747
749
|
|
|
@@ -957,7 +959,7 @@ class VotingEnsemble(ParallelRun):
|
|
|
957
959
|
raise Exception('Expected "inputs" to be a list')
|
|
958
960
|
return request
|
|
959
961
|
|
|
960
|
-
def _normalize_weights(self, weights_dict:
|
|
962
|
+
def _normalize_weights(self, weights_dict: dict[str, float]):
|
|
961
963
|
"""
|
|
962
964
|
Normalized all the weights such that abs(weights_sum - 1.0) <= 0.001
|
|
963
965
|
and adding 0 weight to all the routes that doesn't appear in the dict.
|
|
@@ -1013,7 +1015,7 @@ def _init_endpoint_record(
|
|
|
1013
1015
|
graph_server.function_uri
|
|
1014
1016
|
)
|
|
1015
1017
|
except Exception as e:
|
|
1016
|
-
logger.error("Failed to parse function URI", exc=e)
|
|
1018
|
+
logger.error("Failed to parse function URI", exc=err_to_str(e))
|
|
1017
1019
|
return None
|
|
1018
1020
|
|
|
1019
1021
|
# Generating version model value based on the model name and model version
|
|
@@ -1027,74 +1029,88 @@ def _init_endpoint_record(
|
|
|
1027
1029
|
function_uri=graph_server.function_uri, versioned_model=versioned_model_name
|
|
1028
1030
|
).uid
|
|
1029
1031
|
|
|
1030
|
-
# If model endpoint object was found in DB, skip the creation process.
|
|
1031
1032
|
try:
|
|
1032
|
-
mlrun.get_run_db().get_model_endpoint(
|
|
1033
|
-
|
|
1033
|
+
model_ep = mlrun.get_run_db().get_model_endpoint(
|
|
1034
|
+
project=project, endpoint_id=endpoint_uid
|
|
1035
|
+
)
|
|
1034
1036
|
except mlrun.errors.MLRunNotFoundError:
|
|
1037
|
+
model_ep = None
|
|
1038
|
+
except mlrun.errors.MLRunBadRequestError as err:
|
|
1039
|
+
logger.debug(
|
|
1040
|
+
f"Cant reach to model endpoints store, due to : {err}",
|
|
1041
|
+
)
|
|
1042
|
+
return
|
|
1043
|
+
|
|
1044
|
+
if voting_ensemble.context.server.track_models and not model_ep:
|
|
1035
1045
|
logger.info("Creating a new model endpoint record", endpoint_id=endpoint_uid)
|
|
1046
|
+
# Get the children model endpoints ids
|
|
1047
|
+
children_uids = []
|
|
1048
|
+
for _, c in voting_ensemble.routes.items():
|
|
1049
|
+
if hasattr(c, "endpoint_uid"):
|
|
1050
|
+
children_uids.append(c.endpoint_uid)
|
|
1051
|
+
model_endpoint = mlrun.common.schemas.ModelEndpoint(
|
|
1052
|
+
metadata=mlrun.common.schemas.ModelEndpointMetadata(
|
|
1053
|
+
project=project, uid=endpoint_uid
|
|
1054
|
+
),
|
|
1055
|
+
spec=mlrun.common.schemas.ModelEndpointSpec(
|
|
1056
|
+
function_uri=graph_server.function_uri,
|
|
1057
|
+
model=versioned_model_name,
|
|
1058
|
+
model_class=voting_ensemble.__class__.__name__,
|
|
1059
|
+
stream_path=voting_ensemble.context.stream.stream_uri,
|
|
1060
|
+
active=True,
|
|
1061
|
+
monitoring_mode=mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled,
|
|
1062
|
+
),
|
|
1063
|
+
status=mlrun.common.schemas.ModelEndpointStatus(
|
|
1064
|
+
children=list(voting_ensemble.routes.keys()),
|
|
1065
|
+
endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.ROUTER,
|
|
1066
|
+
children_uids=children_uids,
|
|
1067
|
+
),
|
|
1068
|
+
)
|
|
1036
1069
|
|
|
1037
|
-
|
|
1038
|
-
# Get the children model endpoints ids
|
|
1039
|
-
children_uids = []
|
|
1040
|
-
for _, c in voting_ensemble.routes.items():
|
|
1041
|
-
if hasattr(c, "endpoint_uid"):
|
|
1042
|
-
children_uids.append(c.endpoint_uid)
|
|
1043
|
-
|
|
1044
|
-
model_endpoint = mlrun.common.schemas.ModelEndpoint(
|
|
1045
|
-
metadata=mlrun.common.schemas.ModelEndpointMetadata(
|
|
1046
|
-
project=project, uid=endpoint_uid
|
|
1047
|
-
),
|
|
1048
|
-
spec=mlrun.common.schemas.ModelEndpointSpec(
|
|
1049
|
-
function_uri=graph_server.function_uri,
|
|
1050
|
-
model=versioned_model_name,
|
|
1051
|
-
model_class=voting_ensemble.__class__.__name__,
|
|
1052
|
-
stream_path=config.model_endpoint_monitoring.store_prefixes.default.format(
|
|
1053
|
-
project=project, kind="stream"
|
|
1054
|
-
),
|
|
1055
|
-
active=True,
|
|
1056
|
-
monitoring_mode=mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
|
|
1057
|
-
if voting_ensemble.context.server.track_models
|
|
1058
|
-
else mlrun.common.schemas.model_monitoring.ModelMonitoringMode.disabled,
|
|
1059
|
-
),
|
|
1060
|
-
status=mlrun.common.schemas.ModelEndpointStatus(
|
|
1061
|
-
children=list(voting_ensemble.routes.keys()),
|
|
1062
|
-
endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.ROUTER,
|
|
1063
|
-
children_uids=children_uids,
|
|
1064
|
-
),
|
|
1065
|
-
)
|
|
1070
|
+
db = mlrun.get_run_db()
|
|
1066
1071
|
|
|
1067
|
-
|
|
1072
|
+
db.create_model_endpoint(
|
|
1073
|
+
project=project,
|
|
1074
|
+
endpoint_id=model_endpoint.metadata.uid,
|
|
1075
|
+
model_endpoint=model_endpoint.dict(),
|
|
1076
|
+
)
|
|
1068
1077
|
|
|
1078
|
+
# Update model endpoint children type
|
|
1079
|
+
for model_endpoint in children_uids:
|
|
1080
|
+
current_endpoint = db.get_model_endpoint(
|
|
1081
|
+
project=project, endpoint_id=model_endpoint
|
|
1082
|
+
)
|
|
1083
|
+
current_endpoint.status.endpoint_type = (
|
|
1084
|
+
mlrun.common.schemas.model_monitoring.EndpointType.LEAF_EP
|
|
1085
|
+
)
|
|
1069
1086
|
db.create_model_endpoint(
|
|
1070
1087
|
project=project,
|
|
1071
|
-
endpoint_id=model_endpoint
|
|
1072
|
-
model_endpoint=
|
|
1073
|
-
)
|
|
1074
|
-
|
|
1075
|
-
# Update model endpoint children type
|
|
1076
|
-
for model_endpoint in children_uids:
|
|
1077
|
-
current_endpoint = db.get_model_endpoint(
|
|
1078
|
-
project=project, endpoint_id=model_endpoint
|
|
1079
|
-
)
|
|
1080
|
-
current_endpoint.status.endpoint_type = (
|
|
1081
|
-
mlrun.common.schemas.model_monitoring.EndpointType.LEAF_EP
|
|
1082
|
-
)
|
|
1083
|
-
db.create_model_endpoint(
|
|
1084
|
-
project=project,
|
|
1085
|
-
endpoint_id=model_endpoint,
|
|
1086
|
-
model_endpoint=current_endpoint,
|
|
1087
|
-
)
|
|
1088
|
-
|
|
1089
|
-
except Exception as exc:
|
|
1090
|
-
logger.warning(
|
|
1091
|
-
"Failed creating model endpoint record",
|
|
1092
|
-
exc=exc,
|
|
1093
|
-
traceback=traceback.format_exc(),
|
|
1088
|
+
endpoint_id=model_endpoint,
|
|
1089
|
+
model_endpoint=current_endpoint,
|
|
1094
1090
|
)
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1091
|
+
elif (
|
|
1092
|
+
model_ep
|
|
1093
|
+
and (
|
|
1094
|
+
model_ep.spec.monitoring_mode
|
|
1095
|
+
== mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
|
|
1096
|
+
)
|
|
1097
|
+
!= voting_ensemble.context.server.track_models
|
|
1098
|
+
):
|
|
1099
|
+
monitoring_mode = (
|
|
1100
|
+
mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
|
|
1101
|
+
if voting_ensemble.context.server.track_models
|
|
1102
|
+
else mlrun.common.schemas.model_monitoring.ModelMonitoringMode.disabled
|
|
1103
|
+
)
|
|
1104
|
+
db = mlrun.get_run_db()
|
|
1105
|
+
db.patch_model_endpoint(
|
|
1106
|
+
project=project,
|
|
1107
|
+
endpoint_id=endpoint_uid,
|
|
1108
|
+
attributes={"monitoring_mode": monitoring_mode},
|
|
1109
|
+
)
|
|
1110
|
+
logger.debug(
|
|
1111
|
+
f"Updating model endpoint monitoring_mode to {monitoring_mode}",
|
|
1112
|
+
endpoint_id=endpoint_uid,
|
|
1113
|
+
)
|
|
1098
1114
|
|
|
1099
1115
|
return endpoint_uid
|
|
1100
1116
|
|
mlrun/serving/server.py
CHANGED
|
@@ -22,9 +22,14 @@ import traceback
|
|
|
22
22
|
import uuid
|
|
23
23
|
from typing import Optional, Union
|
|
24
24
|
|
|
25
|
+
from nuclio import Context as NuclioContext
|
|
26
|
+
from nuclio.request import Logger as NuclioLogger
|
|
27
|
+
|
|
25
28
|
import mlrun
|
|
29
|
+
import mlrun.common.constants
|
|
26
30
|
import mlrun.common.helpers
|
|
27
31
|
import mlrun.model_monitoring
|
|
32
|
+
import mlrun.utils
|
|
28
33
|
from mlrun.config import config
|
|
29
34
|
from mlrun.errors import err_to_str
|
|
30
35
|
from mlrun.secrets import SecretsStore
|
|
@@ -37,10 +42,7 @@ from ..errors import MLRunInvalidArgumentError
|
|
|
37
42
|
from ..model import ModelObj
|
|
38
43
|
from ..utils import get_caller_globals
|
|
39
44
|
from .states import RootFlowStep, RouterStep, get_function, graph_root_setter
|
|
40
|
-
from .utils import
|
|
41
|
-
event_id_key,
|
|
42
|
-
event_path_key,
|
|
43
|
-
)
|
|
45
|
+
from .utils import event_id_key, event_path_key
|
|
44
46
|
|
|
45
47
|
|
|
46
48
|
class _StreamContext:
|
|
@@ -52,7 +54,7 @@ class _StreamContext:
|
|
|
52
54
|
Initialize _StreamContext object.
|
|
53
55
|
:param enabled: A boolean indication for applying the stream context
|
|
54
56
|
:param parameters: Dictionary of optional parameters, such as `log_stream` and `stream_args`. Note that these
|
|
55
|
-
parameters might be relevant to the output source such as `
|
|
57
|
+
parameters might be relevant to the output source such as `kafka_brokers` if
|
|
56
58
|
the output source is from type Kafka.
|
|
57
59
|
:param function_uri: Full value of the function uri, usually it's <project-name>/<function-name>
|
|
58
60
|
"""
|
|
@@ -70,15 +72,15 @@ class _StreamContext:
|
|
|
70
72
|
function_uri, config.default_project
|
|
71
73
|
)
|
|
72
74
|
|
|
73
|
-
stream_uri = mlrun.model_monitoring.get_stream_path(project=project)
|
|
75
|
+
self.stream_uri = mlrun.model_monitoring.get_stream_path(project=project)
|
|
74
76
|
|
|
75
77
|
if log_stream:
|
|
76
78
|
# Update the stream path to the log stream value
|
|
77
|
-
stream_uri = log_stream.format(project=project)
|
|
79
|
+
self.stream_uri = log_stream.format(project=project)
|
|
78
80
|
|
|
79
81
|
stream_args = parameters.get("stream_args", {})
|
|
80
82
|
|
|
81
|
-
self.output_stream = get_stream_pusher(stream_uri, **stream_args)
|
|
83
|
+
self.output_stream = get_stream_pusher(self.stream_uri, **stream_args)
|
|
82
84
|
|
|
83
85
|
|
|
84
86
|
class GraphServer(ModelObj):
|
|
@@ -152,6 +154,7 @@ class GraphServer(ModelObj):
|
|
|
152
154
|
resource_cache: ResourceCache = None,
|
|
153
155
|
logger=None,
|
|
154
156
|
is_mock=False,
|
|
157
|
+
monitoring_mock=False,
|
|
155
158
|
):
|
|
156
159
|
"""for internal use, initialize all steps (recursively)"""
|
|
157
160
|
|
|
@@ -164,6 +167,7 @@ class GraphServer(ModelObj):
|
|
|
164
167
|
|
|
165
168
|
context = GraphContext(server=self, nuclio_context=context, logger=logger)
|
|
166
169
|
context.is_mock = is_mock
|
|
170
|
+
context.monitoring_mock = monitoring_mock
|
|
167
171
|
context.root = self.graph
|
|
168
172
|
|
|
169
173
|
context.stream = _StreamContext(
|
|
@@ -188,11 +192,6 @@ class GraphServer(ModelObj):
|
|
|
188
192
|
|
|
189
193
|
def init_object(self, namespace):
|
|
190
194
|
self.graph.init_object(self.context, namespace, self.load_mode, reset=True)
|
|
191
|
-
return (
|
|
192
|
-
v2_serving_async_handler
|
|
193
|
-
if config.datastore.async_source_mode == "enabled"
|
|
194
|
-
else v2_serving_handler
|
|
195
|
-
)
|
|
196
195
|
|
|
197
196
|
def test(
|
|
198
197
|
self,
|
|
@@ -310,17 +309,14 @@ class GraphServer(ModelObj):
|
|
|
310
309
|
|
|
311
310
|
def wait_for_completion(self):
|
|
312
311
|
"""wait for async operation to complete"""
|
|
313
|
-
self.graph.wait_for_completion()
|
|
312
|
+
return self.graph.wait_for_completion()
|
|
314
313
|
|
|
315
314
|
|
|
316
315
|
def v2_serving_init(context, namespace=None):
|
|
317
316
|
"""hook for nuclio init_context()"""
|
|
318
317
|
|
|
319
|
-
data = os.environ.get("SERVING_SPEC_ENV", "")
|
|
320
|
-
if not data:
|
|
321
|
-
raise MLRunInvalidArgumentError("failed to find spec env var")
|
|
322
|
-
spec = json.loads(data)
|
|
323
318
|
context.logger.info("Initializing server from spec")
|
|
319
|
+
spec = mlrun.utils.get_serving_spec()
|
|
324
320
|
server = GraphServer.from_dict(spec)
|
|
325
321
|
if config.log_level.lower() == "debug":
|
|
326
322
|
server.verbose = True
|
|
@@ -328,42 +324,54 @@ def v2_serving_init(context, namespace=None):
|
|
|
328
324
|
server.http_trigger = getattr(context.trigger, "kind", "http") == "http"
|
|
329
325
|
context.logger.info_with(
|
|
330
326
|
"Setting current function",
|
|
331
|
-
|
|
327
|
+
current_function=os.getenv("SERVING_CURRENT_FUNCTION", ""),
|
|
332
328
|
)
|
|
333
|
-
server.set_current_function(os.
|
|
329
|
+
server.set_current_function(os.getenv("SERVING_CURRENT_FUNCTION", ""))
|
|
334
330
|
context.logger.info_with(
|
|
335
331
|
"Initializing states", namespace=namespace or get_caller_globals()
|
|
336
332
|
)
|
|
337
|
-
|
|
333
|
+
kwargs = {}
|
|
334
|
+
if hasattr(context, "is_mock"):
|
|
335
|
+
kwargs["is_mock"] = context.is_mock
|
|
336
|
+
server.init_states(
|
|
337
|
+
context,
|
|
338
|
+
namespace or get_caller_globals(),
|
|
339
|
+
**kwargs,
|
|
340
|
+
)
|
|
338
341
|
context.logger.info("Initializing graph steps")
|
|
339
|
-
|
|
342
|
+
server.init_object(namespace or get_caller_globals())
|
|
340
343
|
# set the handler hook to point to our handler
|
|
341
|
-
setattr(context, "mlrun_handler",
|
|
344
|
+
setattr(context, "mlrun_handler", v2_serving_handler)
|
|
342
345
|
setattr(context, "_server", server)
|
|
343
346
|
context.logger.info_with("Serving was initialized", verbose=server.verbose)
|
|
344
347
|
if server.verbose:
|
|
345
348
|
context.logger.info(server.to_yaml())
|
|
346
349
|
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
+
_set_callbacks(server, context)
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
def _set_callbacks(server, context):
|
|
354
|
+
if not server.graph.supports_termination() or not hasattr(context, "platform"):
|
|
355
|
+
return
|
|
356
|
+
|
|
357
|
+
if hasattr(context.platform, "set_termination_callback"):
|
|
350
358
|
context.logger.info(
|
|
351
359
|
"Setting termination callback to terminate graph on worker shutdown"
|
|
352
360
|
)
|
|
353
361
|
|
|
354
|
-
def termination_callback():
|
|
362
|
+
async def termination_callback():
|
|
355
363
|
context.logger.info("Termination callback called")
|
|
356
364
|
server.wait_for_completion()
|
|
357
365
|
context.logger.info("Termination of async flow is completed")
|
|
358
366
|
|
|
359
367
|
context.platform.set_termination_callback(termination_callback)
|
|
360
368
|
|
|
361
|
-
if hasattr(context
|
|
369
|
+
if hasattr(context.platform, "set_drain_callback"):
|
|
362
370
|
context.logger.info(
|
|
363
371
|
"Setting drain callback to terminate and restart the graph on a drain event (such as rebalancing)"
|
|
364
372
|
)
|
|
365
373
|
|
|
366
|
-
def drain_callback():
|
|
374
|
+
async def drain_callback():
|
|
367
375
|
context.logger.info("Drain callback called")
|
|
368
376
|
server.wait_for_completion()
|
|
369
377
|
context.logger.info(
|
|
@@ -383,16 +391,26 @@ def v2_serving_handler(context, event, get_body=False):
|
|
|
383
391
|
if event.body == b"":
|
|
384
392
|
event.body = None
|
|
385
393
|
|
|
386
|
-
|
|
387
|
-
|
|
394
|
+
# original path is saved in stream_path so it can be used by explicit ack, but path is reset to / as a
|
|
395
|
+
# workaround for NUC-178
|
|
396
|
+
# nuclio 1.12.12 added the topic attribute, and we must use it as part of the fix for NUC-233
|
|
397
|
+
# TODO: Remove fallback on event.path once support for nuclio<1.12.12 is dropped
|
|
398
|
+
event.stream_path = getattr(event, "topic", event.path)
|
|
399
|
+
if hasattr(event, "trigger") and event.trigger.kind in (
|
|
400
|
+
"kafka",
|
|
401
|
+
"kafka-cluster",
|
|
402
|
+
"v3ioStream",
|
|
403
|
+
"v3io-stream",
|
|
404
|
+
"rabbit-mq",
|
|
405
|
+
"rabbitMq",
|
|
406
|
+
):
|
|
407
|
+
event.path = "/"
|
|
388
408
|
|
|
389
|
-
|
|
390
|
-
"""hook for nuclio handler()"""
|
|
391
|
-
return await context._server.run(event, context, get_body)
|
|
409
|
+
return context._server.run(event, context, get_body)
|
|
392
410
|
|
|
393
411
|
|
|
394
412
|
def create_graph_server(
|
|
395
|
-
parameters=
|
|
413
|
+
parameters=None,
|
|
396
414
|
load_mode=None,
|
|
397
415
|
graph=None,
|
|
398
416
|
verbose=False,
|
|
@@ -408,14 +426,15 @@ def create_graph_server(
|
|
|
408
426
|
server.graph.add_route("my", class_name=MyModelClass, model_path="{path}", z=100)
|
|
409
427
|
print(server.test("/v2/models/my/infer", testdata))
|
|
410
428
|
"""
|
|
429
|
+
parameters = parameters or {}
|
|
411
430
|
server = GraphServer(graph, parameters, load_mode, verbose=verbose, **kwargs)
|
|
412
431
|
server.set_current_function(
|
|
413
|
-
current_function or os.
|
|
432
|
+
current_function or os.getenv("SERVING_CURRENT_FUNCTION", "")
|
|
414
433
|
)
|
|
415
434
|
return server
|
|
416
435
|
|
|
417
436
|
|
|
418
|
-
class MockTrigger
|
|
437
|
+
class MockTrigger:
|
|
419
438
|
"""mock nuclio event trigger"""
|
|
420
439
|
|
|
421
440
|
def __init__(self, kind="", name=""):
|
|
@@ -423,7 +442,7 @@ class MockTrigger(object):
|
|
|
423
442
|
self.name = name
|
|
424
443
|
|
|
425
444
|
|
|
426
|
-
class MockEvent
|
|
445
|
+
class MockEvent:
|
|
427
446
|
"""mock basic nuclio event object"""
|
|
428
447
|
|
|
429
448
|
def __init__(
|
|
@@ -456,7 +475,7 @@ class MockEvent(object):
|
|
|
456
475
|
return f"Event(id={self.id}, body={self.body}, method={self.method}, path={self.path}{error})"
|
|
457
476
|
|
|
458
477
|
|
|
459
|
-
class Response
|
|
478
|
+
class Response:
|
|
460
479
|
def __init__(self, headers=None, body=None, content_type=None, status_code=200):
|
|
461
480
|
self.headers = headers or {}
|
|
462
481
|
self.body = body
|
|
@@ -474,7 +493,13 @@ class Response(object):
|
|
|
474
493
|
class GraphContext:
|
|
475
494
|
"""Graph context object"""
|
|
476
495
|
|
|
477
|
-
def __init__(
|
|
496
|
+
def __init__(
|
|
497
|
+
self,
|
|
498
|
+
level="info", # Unused argument
|
|
499
|
+
logger=None,
|
|
500
|
+
server=None,
|
|
501
|
+
nuclio_context: Optional[NuclioContext] = None,
|
|
502
|
+
) -> None:
|
|
478
503
|
self.state = None
|
|
479
504
|
self.logger = logger
|
|
480
505
|
self.worker_id = 0
|
|
@@ -484,7 +509,7 @@ class GraphContext:
|
|
|
484
509
|
self.root = None
|
|
485
510
|
|
|
486
511
|
if nuclio_context:
|
|
487
|
-
self.logger = nuclio_context.logger
|
|
512
|
+
self.logger: NuclioLogger = nuclio_context.logger
|
|
488
513
|
self.Response = nuclio_context.Response
|
|
489
514
|
if hasattr(nuclio_context, "trigger") and hasattr(
|
|
490
515
|
nuclio_context.trigger, "kind"
|
|
@@ -494,7 +519,7 @@ class GraphContext:
|
|
|
494
519
|
if hasattr(nuclio_context, "platform"):
|
|
495
520
|
self.platform = nuclio_context.platform
|
|
496
521
|
elif not logger:
|
|
497
|
-
self.logger = mlrun.utils.
|
|
522
|
+
self.logger: mlrun.utils.Logger = mlrun.utils.logger
|
|
498
523
|
|
|
499
524
|
self._server = server
|
|
500
525
|
self.current_function = None
|
|
@@ -507,7 +532,7 @@ class GraphContext:
|
|
|
507
532
|
return self._server
|
|
508
533
|
|
|
509
534
|
@property
|
|
510
|
-
def project(self):
|
|
535
|
+
def project(self) -> str:
|
|
511
536
|
"""current project name (for the current function)"""
|
|
512
537
|
project, _, _, _ = mlrun.common.helpers.parse_versioned_object_uri(
|
|
513
538
|
self._server.function_uri
|
|
@@ -563,7 +588,7 @@ class GraphContext:
|
|
|
563
588
|
_,
|
|
564
589
|
_,
|
|
565
590
|
function_status,
|
|
566
|
-
) = mlrun.runtimes.function.get_nuclio_deploy_status(name, project, tag)
|
|
591
|
+
) = mlrun.runtimes.nuclio.function.get_nuclio_deploy_status(name, project, tag)
|
|
567
592
|
|
|
568
593
|
if state in ["error", "unhealthy"]:
|
|
569
594
|
raise ValueError(
|