PyPI - mlrun - Versions diffs - 1.6.2rc6__py3-none-any.whl → 1.6.3__py3-none-any.whl - Mend

mlrun 1.6.2rc6py3-none-any.whl → 1.6.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (61) hide show

mlrun/artifacts/model.py +28 -22
mlrun/common/db/sql_session.py +3 -0
mlrun/common/model_monitoring/helpers.py +4 -2
mlrun/common/schemas/__init__.py +2 -0
mlrun/common/schemas/common.py +40 -0
mlrun/common/schemas/model_monitoring/__init__.py +1 -0
mlrun/common/schemas/model_monitoring/constants.py +21 -5
mlrun/common/schemas/project.py +2 -0
mlrun/config.py +59 -20
mlrun/data_types/data_types.py +4 -0
mlrun/datastore/azure_blob.py +9 -9
mlrun/datastore/base.py +22 -44
mlrun/datastore/google_cloud_storage.py +6 -6
mlrun/datastore/v3io.py +74 -73
mlrun/db/auth_utils.py +152 -0
mlrun/db/base.py +18 -0
mlrun/db/httpdb.py +79 -55
mlrun/execution.py +3 -3
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +3 -3
mlrun/frameworks/tf_keras/model_handler.py +7 -7
mlrun/k8s_utils.py +10 -5
mlrun/kfpops.py +19 -10
mlrun/lists.py +2 -0
mlrun/model.py +31 -2
mlrun/model_monitoring/api.py +8 -8
mlrun/model_monitoring/batch.py +1 -1
mlrun/model_monitoring/controller.py +0 -7
mlrun/model_monitoring/features_drift_table.py +6 -0
mlrun/model_monitoring/helpers.py +4 -1
mlrun/model_monitoring/stores/kv_model_endpoint_store.py +13 -13
mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -1
mlrun/model_monitoring/stream_processing.py +50 -37
mlrun/package/packagers/pandas_packagers.py +3 -3
mlrun/package/utils/_archiver.py +3 -1
mlrun/platforms/iguazio.py +6 -65
mlrun/projects/pipelines.py +51 -17
mlrun/projects/project.py +77 -61
mlrun/render.py +13 -4
mlrun/run.py +2 -0
mlrun/runtimes/base.py +24 -1
mlrun/runtimes/function.py +9 -9
mlrun/runtimes/kubejob.py +5 -3
mlrun/runtimes/local.py +2 -2
mlrun/runtimes/mpijob/abstract.py +6 -6
mlrun/runtimes/pod.py +8 -8
mlrun/runtimes/serving.py +3 -3
mlrun/runtimes/sparkjob/spark3job.py +3 -3
mlrun/serving/remote.py +4 -2
mlrun/utils/async_http.py +28 -8
mlrun/utils/helpers.py +20 -0
mlrun/utils/http.py +3 -3
mlrun/utils/logger.py +11 -6
mlrun/utils/notifications/notification_pusher.py +6 -6
mlrun/utils/version/version.json +2 -2
{mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/METADATA +18 -18
{mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/RECORD +60 -59
mlrun/datastore/helpers.py +0 -18
{mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/LICENSE +0 -0
{mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/WHEEL +0 -0
{mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/entry_points.txt +0 -0
{mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/stores/kv_model_endpoint_store.py CHANGED Viewed

@@ -540,24 +540,24 @@ class KVModelEndpointStore(ModelEndpointStore):
             and endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.METRICS]
             == "null"
         ):
-            endpoint[
-                mlrun.common.schemas.model_monitoring.EventFieldType.METRICS
-            ] = json.dumps(
-                {
-                    mlrun.common.schemas.model_monitoring.EventKeyMetrics.GENERIC: {
-                        mlrun.common.schemas.model_monitoring.EventLiveStats.LATENCY_AVG_1H: 0,
-                        mlrun.common.schemas.model_monitoring.EventLiveStats.PREDICTIONS_PER_SECOND: 0,
+            endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.METRICS] = (
+                json.dumps(
+                    {
+                        mlrun.common.schemas.model_monitoring.EventKeyMetrics.GENERIC: {
+                            mlrun.common.schemas.model_monitoring.EventLiveStats.LATENCY_AVG_1H: 0,
+                            mlrun.common.schemas.model_monitoring.EventLiveStats.PREDICTIONS_PER_SECOND: 0,
+                        }
                     }
-                }
+                )
             )
         # Validate key `uid` instead of `endpoint_id`
         # For backwards compatibility reasons, we replace the `endpoint_id` with `uid` which is the updated key name
         if mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID in endpoint:
-            endpoint[
-                mlrun.common.schemas.model_monitoring.EventFieldType.UID
-            ] = endpoint[
-                mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID
-            ]
+            endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.UID] = (
+                endpoint[
+                    mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID
+                ]
+            )
     @staticmethod
     def _encode_field(field: typing.Union[str, bytes]) -> bytes:

mlrun/model_monitoring/stores/sql_model_endpoint_store.py CHANGED Viewed

@@ -31,7 +31,6 @@ from .models import get_model_endpoints_table
 class SQLModelEndpointStore(ModelEndpointStore):
     """
     Handles the DB operations when the DB target is from type SQL. For the SQL operations, we use SQLAlchemy, a Python
     SQL toolkit that handles the communication with the database.  When using SQL for storing the model endpoints

mlrun/model_monitoring/stream_processing.py CHANGED Viewed

@@ -24,6 +24,7 @@ import mlrun
 import mlrun.common.model_monitoring.helpers
 import mlrun.config
 import mlrun.datastore.targets
+import mlrun.feature_store as fstore
 import mlrun.feature_store.steps
 import mlrun.model_monitoring.prometheus
 import mlrun.serving.states
@@ -49,7 +50,7 @@ class EventStreamProcessor:
         parquet_batching_timeout_secs: int,
         parquet_target: str,
         sample_window: int = 10,
-        aggregate_windows: typing.Optional[typing.List[str]] = None,
+        aggregate_windows: typing.Optional[list[str]] = None,
         aggregate_period: str = "30s",
         model_monitoring_access_key: str = None,
     ):
@@ -349,7 +350,6 @@ class EventStreamProcessor:
                     rate="10/m",
                     time_col=EventFieldType.TIMESTAMP,
                     container=self.tsdb_container,
-                    access_key=self.v3io_access_key,
                     v3io_frames=self.v3io_framesd,
                     infer_columns_from_data=True,
                     index_cols=[
@@ -587,6 +587,8 @@ class ProcessBeforeParquet(mlrun.feature_store.steps.MapClass):
         for key in [
             EventFieldType.FEATURES,
             EventFieldType.NAMED_FEATURES,
+            EventFieldType.PREDICTION,
+            EventFieldType.NAMED_PREDICTIONS,
         ]:
             event.pop(key, None)
@@ -629,14 +631,14 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
         self.project: str = project
         # First and last requests timestamps (value) of each endpoint (key)
-        self.first_request: typing.Dict[str, str] = dict()
-        self.last_request: typing.Dict[str, str] = dict()
+        self.first_request: dict[str, str] = dict()
+        self.last_request: dict[str, str] = dict()
         # Number of errors (value) per endpoint (key)
-        self.error_count: typing.Dict[str, int] = collections.defaultdict(int)
+        self.error_count: dict[str, int] = collections.defaultdict(int)
         # Set of endpoints in the current events
-        self.endpoints: typing.Set[str] = set()
+        self.endpoints: set[str] = set()
     def do(self, full_event):
         event = full_event.body
@@ -745,18 +747,12 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
         # in list of events. This list will be used as the body for the storey event.
         events = []
         for i, (feature, prediction) in enumerate(zip(features, predictions)):
-            # Validate that inputs are based on numeric values
-            if not self.is_valid(
-                endpoint_id,
-                self.is_list_of_numerics,
-                feature,
-                ["request", "inputs", f"[{i}]"],
-            ):
-                return None
             if not isinstance(prediction, list):
                 prediction = [prediction]
+            if not isinstance(feature, list):
+                feature = [feature]
             events.append(
                 {
                     EventFieldType.FUNCTION_URI: function_uri,
@@ -803,18 +799,6 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
                 f"{self.last_request[endpoint_id]} - write to TSDB will be rejected"
             )
-    @staticmethod
-    def is_list_of_numerics(
-        field: typing.List[typing.Union[int, float, dict, list]],
-        dict_path: typing.List[str],
-    ):
-        if all(isinstance(x, int) or isinstance(x, float) for x in field):
-            return True
-        logger.error(
-            f"List does not consist of only numeric values: {field} [Event -> {','.join(dict_path)}]"
-        )
-        return False
     def resume_state(self, endpoint_id):
         # Make sure process is resumable, if process fails for any reason, be able to pick things up close to where we
         # left them
@@ -849,7 +833,7 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
         endpoint_id: str,
         validation_function,
         field: typing.Any,
-        dict_path: typing.List[str],
+        dict_path: list[str],
     ):
         if validation_function(field, dict_path):
             return True
@@ -857,7 +841,7 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
         return False
-def is_not_none(field: typing.Any, dict_path: typing.List[str]):
+def is_not_none(field: typing.Any, dict_path: list[str]):
     if field is not None:
         return True
     logger.error(
@@ -946,9 +930,11 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
                 return self.label_columns[endpoint_id]
         return None
-    def do(self, event: typing.Dict):
+    def do(self, event: dict):
         endpoint_id = event[EventFieldType.ENDPOINT_ID]
+        feature_values = event[EventFieldType.FEATURES]
+        label_values = event[EventFieldType.PREDICTION]
         # Get feature names and label columns
         if endpoint_id not in self.feature_names:
             endpoint_record = get_endpoint_record(
@@ -984,6 +970,12 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
                     },
                 )
+                update_monitoring_feature_set(
+                    endpoint_record=endpoint_record,
+                    feature_names=feature_names,
+                    feature_values=feature_values,
+                )
             # Similar process with label columns
             if not label_columns and self._infer_columns_from_data:
                 label_columns = self._infer_label_columns_from_data(event)
@@ -1002,6 +994,11 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
                     endpoint_id=endpoint_id,
                     attributes={EventFieldType.LABEL_NAMES: json.dumps(label_columns)},
                 )
+                update_monitoring_feature_set(
+                    endpoint_record=endpoint_record,
+                    feature_names=label_columns,
+                    feature_values=label_values,
+                )
             self.label_columns[endpoint_id] = label_columns
             self.feature_names[endpoint_id] = feature_names
@@ -1019,7 +1016,6 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
         # Add feature_name:value pairs along with a mapping dictionary of all of these pairs
         feature_names = self.feature_names[endpoint_id]
-        feature_values = event[EventFieldType.FEATURES]
         self._map_dictionary_values(
             event=event,
             named_iters=feature_names,
@@ -1029,7 +1025,6 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
         # Add label_name:value pairs along with a mapping dictionary of all of these pairs
         label_names = self.label_columns[endpoint_id]
-        label_values = event[EventFieldType.PREDICTION]
         self._map_dictionary_values(
             event=event,
             named_iters=label_names,
@@ -1045,9 +1040,9 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
     @staticmethod
     def _map_dictionary_values(
-        event: typing.Dict,
-        named_iters: typing.List,
-        values_iters: typing.List,
+        event: dict,
+        named_iters: list,
+        values_iters: list,
         mapping_dictionary: str,
     ):
         """Adding name-value pairs to event dictionary based on two provided lists of names and values. These pairs
@@ -1082,7 +1077,7 @@ class UpdateEndpoint(mlrun.feature_store.steps.MapClass):
         self.project = project
         self.model_endpoint_store_target = model_endpoint_store_target
-    def do(self, event: typing.Dict):
+    def do(self, event: dict):
         update_endpoint_record(
             project=self.project,
             endpoint_id=event.pop(EventFieldType.ENDPOINT_ID),
@@ -1117,7 +1112,7 @@ class InferSchema(mlrun.feature_store.steps.MapClass):
         self.table = table
         self.keys = set()
-    def do(self, event: typing.Dict):
+    def do(self, event: dict):
         key_set = set(event.keys())
         if not key_set.issubset(self.keys):
             self.keys.update(key_set)
@@ -1241,3 +1236,21 @@ def get_endpoint_record(project: str, endpoint_id: str):
         project=project,
     )
     return model_endpoint_store.get_model_endpoint(endpoint_id=endpoint_id)
+def update_monitoring_feature_set(
+    endpoint_record: dict[str, typing.Any],
+    feature_names: list[str],
+    feature_values: list[typing.Any],
+):
+    monitoring_feature_set = fstore.get_feature_set(
+        endpoint_record[
+            mlrun.common.schemas.model_monitoring.EventFieldType.FEATURE_SET_URI
+        ]
+    )
+    for name, val in zip(feature_names, feature_values):
+        monitoring_feature_set.add_feature(
+            fstore.Feature(name=name, value_type=type(val))
+        )
+    monitoring_feature_set.save()

mlrun/package/packagers/pandas_packagers.py CHANGED Viewed

@@ -838,9 +838,9 @@ class PandasDataFramePackager(DefaultPackager):
         """
         if isinstance(obj, dict):
             for key, value in obj.items():
-                obj[
-                    PandasDataFramePackager._prepare_result(obj=key)
-                ] = PandasDataFramePackager._prepare_result(obj=value)
+                obj[PandasDataFramePackager._prepare_result(obj=key)] = (
+                    PandasDataFramePackager._prepare_result(obj=value)
+                )
         elif isinstance(obj, list):
             for i, value in enumerate(obj):
                 obj[i] = PandasDataFramePackager._prepare_result(obj=value)

mlrun/package/utils/_archiver.py CHANGED Viewed

@@ -179,7 +179,9 @@ class _TarArchiver(_Archiver):
         # Extract:
         with tarfile.open(archive_path, f"r:{cls._MODE_STRING}") as tar_file:
-            tar_file.extractall(directory_path)
+            # use 'data' to ensure no security risks are imposed by the archive files
+            # see: https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.extractall
+            tar_file.extractall(directory_path, filter="data")
         return str(directory_path)

mlrun/platforms/iguazio.py CHANGED Viewed

@@ -16,19 +16,15 @@ import json
 import os
 import urllib
 from collections import namedtuple
-from datetime import datetime
-from http import HTTPStatus
 from urllib.parse import urlparse
 import kfp.dsl
 import requests
 import semver
-import urllib3
 import v3io
 import mlrun.errors
 from mlrun.config import config as mlconf
-from mlrun.errors import err_to_str
 from mlrun.utils import dict_to_json
 _cached_control_session = None
@@ -488,25 +484,6 @@ class V3ioStreamClient:
         return response.output.records
-def create_control_session(url, username, password):
-    # for systems without production cert - silence no cert verification WARN
-    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-    if not username or not password:
-        raise ValueError("cannot create session key, missing username or password")
-    session = requests.Session()
-    session.auth = (username, password)
-    try:
-        auth = session.post(f"{url}/api/sessions", verify=False)
-    except OSError as exc:
-        raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}")
-    if not auth.ok:
-        raise OSError(f"failed to create session: {url}, {auth.text}")
-    return auth.json()["data"]["id"]
 def is_iguazio_endpoint(endpoint_url: str) -> bool:
     # TODO: find a better heuristic
     return ".default-tenant." in endpoint_url
@@ -533,21 +510,6 @@ def is_iguazio_session_cookie(session_cookie: str) -> bool:
         return False
-def is_iguazio_system_2_10_or_above(dashboard_url):
-    # for systems without production cert - silence no cert verification WARN
-    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-    response = requests.get(f"{dashboard_url}/api/external_versions", verify=False)
-    if not response.ok:
-        if response.status_code == HTTPStatus.NOT_FOUND.value:
-            # in iguazio systems prior to 2.10 this endpoint didn't exist, so the api returns 404 cause endpoint not
-            # found
-            return False
-        response.raise_for_status()
-    return True
 # we assign the control session or access key to the password since this is iguazio auth scheme
 # (requests should be sent with username:control_session/access_key as auth header)
 def add_or_refresh_credentials(
@@ -577,33 +539,12 @@ def add_or_refresh_credentials(
     # (ideally if we could identify we're in enterprise we would have verify here that token and username have value)
     if not is_iguazio_endpoint(api_url):
         return "", "", token
-    iguazio_dashboard_url = "https://dashboard" + api_url[api_url.find(".") :]
-    # in 2.8 mlrun api is protected with control session, from 2.10 it's protected with access key
-    is_access_key_auth = is_iguazio_system_2_10_or_above(iguazio_dashboard_url)
-    if is_access_key_auth:
-        if not username or not token:
-            raise ValueError(
-                "username and access key required to authenticate against iguazio system"
-            )
-        return username, token, ""
-    if not username or not password:
-        raise ValueError("username and password needed to create session")
-    global _cached_control_session
-    now = datetime.now()
-    if _cached_control_session:
-        if (
-            _cached_control_session[2] == username
-            and _cached_control_session[3] == password
-            and (now - _cached_control_session[1]).seconds < 20 * 60 * 60
-        ):
-            return _cached_control_session[2], _cached_control_session[0], ""
-    control_session = create_control_session(iguazio_dashboard_url, username, password)
-    _cached_control_session = (control_session, now, username, password)
-    return username, control_session, ""
+    if not username or not token:
+        raise ValueError(
+            "username and access key required to authenticate against iguazio system"
+        )
+    return username, token, ""
 def parse_path(url, suffix="/"):

mlrun/projects/pipelines.py CHANGED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 import abc
 import builtins
+import http
 import importlib.util as imputil
 import os
 import tempfile
@@ -69,16 +70,16 @@ class WorkflowSpec(mlrun.model.ModelObj):
     def __init__(
         self,
-        engine=None,
-        code=None,
-        path=None,
-        args=None,
-        name=None,
-        handler=None,
-        args_schema: dict = None,
+        engine: typing.Optional[str] = None,
+        code: typing.Optional[str] = None,
+        path: typing.Optional[str] = None,
+        args: typing.Optional[dict] = None,
+        name: typing.Optional[str] = None,
+        handler: typing.Optional[str] = None,
+        args_schema: typing.Optional[dict] = None,
         schedule: typing.Union[str, mlrun.common.schemas.ScheduleCronTrigger] = None,
-        cleanup_ttl: int = None,
-        image: str = None,
+        cleanup_ttl: typing.Optional[int] = None,
+        image: typing.Optional[str] = None,
     ):
         self.engine = engine
         self.code = code
@@ -401,6 +402,9 @@ def enrich_function_object(
         else:
             f.spec.build.source = project.spec.source
             f.spec.build.load_source_on_run = project.spec.load_source_on_run
+            f.spec.build.source_code_target_dir = (
+                project.spec.build.source_code_target_dir
+            )
             f.spec.workdir = project.spec.workdir or project.spec.subpath
             f.prepare_image_for_deploy()
@@ -605,6 +609,7 @@ class _KFPRunner(_PipelineRunner):
             namespace=namespace,
             artifact_path=artifact_path,
             cleanup_ttl=workflow_spec.cleanup_ttl,
+            timeout=int(mlrun.mlconf.workflows.timeouts.kfp),
         )
         # The user provided workflow code might have made changes to function specs that require cleanup
@@ -862,17 +867,44 @@ class _RemoteRunner(_PipelineRunner):
                 )
                 return
+            get_workflow_id_timeout = max(
+                int(mlrun.mlconf.workflows.timeouts.remote),
+                int(getattr(mlrun.mlconf.workflows.timeouts, inner_engine.engine)),
+            )
+            logger.debug(
+                "Workflow submitted, waiting for pipeline run to start",
+                workflow_name=workflow_response.name,
+                get_workflow_id_timeout=get_workflow_id_timeout,
+            )
+            def _get_workflow_id_or_bail():
+                try:
+                    return run_db.get_workflow_id(
+                        project=project.name,
+                        name=workflow_response.name,
+                        run_id=workflow_response.run_id,
+                        engine=workflow_spec.engine,
+                    )
+                except mlrun.errors.MLRunHTTPStatusError as get_wf_exc:
+                    # fail fast on specific errors
+                    if get_wf_exc.error_status_code in [
+                        http.HTTPStatus.PRECONDITION_FAILED
+                    ]:
+                        raise mlrun.errors.MLRunFatalFailureError(
+                            original_exception=get_wf_exc
+                        )
+                    # raise for a retry (on other errors)
+                    raise
             # Getting workflow id from run:
             response = retry_until_successful(
                 1,
-                getattr(mlrun.mlconf.workflows.timeouts, inner_engine.engine),
+                get_workflow_id_timeout,
                 logger,
                 False,
-                run_db.get_workflow_id,
-                project=project.name,
-                name=workflow_response.name,
-                run_id=workflow_response.run_id,
-                engine=workflow_spec.engine,
+                _get_workflow_id_or_bail,
             )
             workflow_id = response.workflow_id
             # After fetching the workflow_id the workflow executed successfully
@@ -988,6 +1020,7 @@ def load_and_run(
     cleanup_ttl: int = None,
     load_only: bool = False,
     wait_for_completion: bool = False,
+    project_context: str = None,
 ):
     """
     Auxiliary function that the RemoteRunner run once or run every schedule.
@@ -1018,10 +1051,11 @@ def load_and_run(
                                 workflow and all its resources are deleted)
     :param load_only:           for just loading the project, inner use.
     :param wait_for_completion: wait for workflow completion before returning
+    :param project_context:     project context path (used for loading the project)
     """
     try:
         project = mlrun.load_project(
-            context=f"./{project_name}",
+            context=project_context or f"./{project_name}",
             url=url,
             name=project_name,
             init_git=init_git,
@@ -1053,7 +1087,7 @@ def load_and_run(
         raise error
-    context.logger.info(f"Loaded project {project.name} from remote successfully")
+    context.logger.info(f"Loaded project {project.name} successfully")
     if load_only:
         return

mlrun 1.6.2rc6__py3-none-any.whl → 1.6.3__py3-none-any.whl

Potentially problematic release.

mlrun 1.6.2rc6py3-none-any.whl → 1.6.3py3-none-any.whl