PyPI - mlrun - Versions diffs - 1.7.0rc2__py3-none-any.whl → 1.7.0rc4__py3-none-any.whl - Mend

mlrun 1.7.0rc2py3-none-any.whl → 1.7.0rc4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (70) hide show

mlrun/artifacts/manager.py +6 -1
mlrun/common/constants.py +1 -0
mlrun/common/model_monitoring/helpers.py +12 -6
mlrun/common/schemas/__init__.py +1 -0
mlrun/common/schemas/client_spec.py +1 -0
mlrun/common/schemas/common.py +40 -0
mlrun/common/schemas/model_monitoring/constants.py +4 -1
mlrun/common/schemas/project.py +2 -0
mlrun/config.py +20 -16
mlrun/datastore/azure_blob.py +22 -9
mlrun/datastore/base.py +15 -25
mlrun/datastore/datastore.py +19 -8
mlrun/datastore/datastore_profile.py +47 -5
mlrun/datastore/google_cloud_storage.py +10 -6
mlrun/datastore/hdfs.py +51 -0
mlrun/datastore/redis.py +4 -0
mlrun/datastore/s3.py +4 -0
mlrun/datastore/sources.py +31 -50
mlrun/datastore/targets.py +58 -48
mlrun/datastore/utils.py +2 -49
mlrun/datastore/v3io.py +4 -0
mlrun/db/base.py +34 -0
mlrun/db/httpdb.py +71 -42
mlrun/execution.py +3 -3
mlrun/feature_store/feature_vector.py +2 -2
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +3 -3
mlrun/frameworks/tf_keras/model_handler.py +7 -7
mlrun/k8s_utils.py +10 -5
mlrun/kfpops.py +19 -10
mlrun/model.py +5 -0
mlrun/model_monitoring/api.py +3 -3
mlrun/model_monitoring/application.py +1 -1
mlrun/model_monitoring/applications/__init__.py +13 -0
mlrun/model_monitoring/applications/histogram_data_drift.py +218 -0
mlrun/model_monitoring/batch.py +9 -111
mlrun/model_monitoring/controller.py +73 -55
mlrun/model_monitoring/controller_handler.py +13 -5
mlrun/model_monitoring/features_drift_table.py +62 -53
mlrun/model_monitoring/helpers.py +30 -21
mlrun/model_monitoring/metrics/__init__.py +13 -0
mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
mlrun/model_monitoring/stores/kv_model_endpoint_store.py +14 -14
mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -1
mlrun/package/packagers/pandas_packagers.py +3 -3
mlrun/package/utils/_archiver.py +3 -1
mlrun/platforms/iguazio.py +8 -65
mlrun/projects/pipelines.py +21 -11
mlrun/projects/project.py +121 -42
mlrun/runtimes/base.py +21 -2
mlrun/runtimes/kubejob.py +5 -3
mlrun/runtimes/local.py +2 -2
mlrun/runtimes/mpijob/abstract.py +6 -6
mlrun/runtimes/nuclio/function.py +9 -9
mlrun/runtimes/nuclio/serving.py +3 -3
mlrun/runtimes/pod.py +3 -3
mlrun/runtimes/sparkjob/spark3job.py +3 -3
mlrun/serving/remote.py +4 -2
mlrun/serving/server.py +15 -18
mlrun/serving/states.py +27 -12
mlrun/utils/async_http.py +3 -3
mlrun/utils/helpers.py +27 -5
mlrun/utils/http.py +3 -3
mlrun/utils/notifications/notification_pusher.py +6 -6
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.0rc2.dist-info → mlrun-1.7.0rc4.dist-info}/METADATA +13 -16
{mlrun-1.7.0rc2.dist-info → mlrun-1.7.0rc4.dist-info}/RECORD +70 -64
{mlrun-1.7.0rc2.dist-info → mlrun-1.7.0rc4.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc2.dist-info → mlrun-1.7.0rc4.dist-info}/WHEEL +0 -0
{mlrun-1.7.0rc2.dist-info → mlrun-1.7.0rc4.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc2.dist-info → mlrun-1.7.0rc4.dist-info}/top_level.txt +0 -0

mlrun/serving/server.py CHANGED Viewed

@@ -188,11 +188,6 @@ class GraphServer(ModelObj):
     def init_object(self, namespace):
         self.graph.init_object(self.context, namespace, self.load_mode, reset=True)
-        return (
-            v2_serving_async_handler
-            if config.datastore.async_source_mode == "enabled"
-            else v2_serving_handler
-        )
     def test(
         self,
@@ -310,7 +305,7 @@ class GraphServer(ModelObj):
     def wait_for_completion(self):
         """wait for async operation to complete"""
-        self.graph.wait_for_completion()
+        return self.graph.wait_for_completion()
 def v2_serving_init(context, namespace=None):
@@ -334,11 +329,18 @@ def v2_serving_init(context, namespace=None):
     context.logger.info_with(
         "Initializing states", namespace=namespace or get_caller_globals()
     )
-    server.init_states(context, namespace or get_caller_globals())
+    kwargs = {}
+    if hasattr(context, "is_mock"):
+        kwargs["is_mock"] = context.is_mock
+    server.init_states(
+        context,
+        namespace or get_caller_globals(),
+        **kwargs,
+    )
     context.logger.info("Initializing graph steps")
-    serving_handler = server.init_object(namespace or get_caller_globals())
+    server.init_object(namespace or get_caller_globals())
     # set the handler hook to point to our handler
-    setattr(context, "mlrun_handler", serving_handler)
+    setattr(context, "mlrun_handler", v2_serving_handler)
     setattr(context, "_server", server)
     context.logger.info_with("Serving was initialized", verbose=server.verbose)
     if server.verbose:
@@ -351,9 +353,9 @@ def v2_serving_init(context, namespace=None):
             "Setting termination callback to terminate graph on worker shutdown"
         )
-        def termination_callback():
+        async def termination_callback():
             context.logger.info("Termination callback called")
-            server.wait_for_completion()
+            await server.wait_for_completion()
             context.logger.info("Termination of async flow is completed")
         context.platform.set_termination_callback(termination_callback)
@@ -363,9 +365,9 @@ def v2_serving_init(context, namespace=None):
             "Setting drain callback to terminate and restart the graph on a drain event (such as rebalancing)"
         )
-        def drain_callback():
+        async def drain_callback():
             context.logger.info("Drain callback called")
-            server.wait_for_completion()
+            await server.wait_for_completion()
             context.logger.info(
                 "Termination of async flow is completed. Rerunning async flow."
             )
@@ -386,11 +388,6 @@ def v2_serving_handler(context, event, get_body=False):
     return context._server.run(event, context, get_body)
-async def v2_serving_async_handler(context, event, get_body=False):
-    """hook for nuclio handler()"""
-    return await context._server.run(event, context, get_body)
 def create_graph_server(
     parameters={},
     load_mode=None,

mlrun/serving/states.py CHANGED Viewed

@@ -14,6 +14,7 @@
 __all__ = ["TaskStep", "RouterStep", "RootFlowStep", "ErrorStep"]
+import asyncio
 import os
 import pathlib
 import traceback
@@ -1160,19 +1161,19 @@ class FlowStep(BaseStep):
         if self._controller:
             # async flow (using storey)
             event._awaitable_result = None
-            if config.datastore.async_source_mode == "enabled":
+            if self.context.is_mock:
+                resp = self._controller.emit(
+                    event, return_awaitable_result=self._wait_for_result
+                )
+                if self._wait_for_result and resp:
+                    return resp.await_result()
+            else:
                 resp_awaitable = self._controller.emit(
                     event, await_result=self._wait_for_result
                 )
                 if self._wait_for_result:
                     return resp_awaitable
                 return self._await_and_return_id(resp_awaitable, event)
-            else:
-                resp = self._controller.emit(
-                    event, return_awaitable_result=self._wait_for_result
-                )
-                if self._wait_for_result and resp:
-                    return resp.await_result()
             event = copy(event)
             event.body = {"id": event.id}
             return event
@@ -1210,10 +1211,20 @@ class FlowStep(BaseStep):
     def wait_for_completion(self):
         """wait for completion of run in async flows"""
         if self._controller:
-            if hasattr(self._controller, "terminate"):
-                self._controller.terminate()
-            return self._controller.await_termination()
+            if asyncio.iscoroutinefunction(self._controller.await_termination):
+                async def terminate_and_await_termination():
+                    if hasattr(self._controller, "terminate"):
+                        await self._controller.terminate()
+                    return await self._controller.await_termination()
+                return terminate_and_await_termination()
+            else:
+                if hasattr(self._controller, "terminate"):
+                    self._controller.terminate()
+                return self._controller.await_termination()
     def plot(self, filename=None, format=None, source=None, targets=None, **kw):
         """plot/save graph using graphviz
@@ -1555,10 +1566,14 @@ def _init_async_objects(context, steps):
                 wait_for_result = True
     source_args = context.get_param("source_args", {})
     explicit_ack = is_explicit_ack_supported(context) and mlrun.mlconf.is_explicit_ack()
-    default_source = storey.SyncEmitSource(
+    if context.is_mock:
+        source_class = storey.SyncEmitSource
+    else:
+        source_class = storey.AsyncEmitSource
+    default_source = source_class(
         context=context,
         explicit_ack=explicit_ack,
         **source_args,

mlrun/utils/async_http.py CHANGED Viewed

@@ -137,9 +137,9 @@ class _CustomRequestContext(_RequestContext):
                 # enrich user agent
                 # will help traceability and debugging
-                headers[
-                    aiohttp.hdrs.USER_AGENT
-                ] = f"{aiohttp.http.SERVER_SOFTWARE} mlrun/{config.version}"
+                headers[aiohttp.hdrs.USER_AGENT] = (
+                    f"{aiohttp.http.SERVER_SOFTWARE} mlrun/{config.version}"
+                )
                 response: typing.Optional[
                     aiohttp.ClientResponse

mlrun/utils/helpers.py CHANGED Viewed

@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import asyncio
 import enum
 import functools
 import hashlib
@@ -30,7 +31,6 @@ from os import path
 from types import ModuleType
 from typing import Any, Optional
-import anyio
 import git
 import inflection
 import numpy as np
@@ -49,6 +49,7 @@ import mlrun.common.schemas
 import mlrun.errors
 import mlrun.utils.regex
 import mlrun.utils.version.version
+from mlrun.common.constants import MYSQL_MEDIUMBLOB_SIZE_BYTES
 from mlrun.config import config
 from .logger import create_logger
@@ -270,6 +271,17 @@ def validate_artifact_key_name(
     )
+def validate_inline_artifact_body_size(body: typing.Union[str, bytes, None]) -> None:
+    if body and len(body) > MYSQL_MEDIUMBLOB_SIZE_BYTES:
+        raise mlrun.errors.MLRunBadRequestError(
+            "The body of the artifact exceeds the maximum allowed size. "
+            "Avoid embedding the artifact body. "
+            "This increases the size of the project yaml file and could affect the project during loading and saving. "
+            "More information is available at"
+            "https://docs.mlrun.org/en/latest/projects/automate-project-git-source.html#setting-and-registering-the-project-artifacts"
+        )
 def validate_v3io_stream_consumer_group(
     value: str, raise_on_failure: bool = True
 ) -> bool:
@@ -1464,13 +1476,15 @@ def normalize_project_username(username: str):
     return username
-# run_in threadpool is taken from fastapi to allow us to run sync functions in a threadpool
-# without importing fastapi in the client
 async def run_in_threadpool(func, *args, **kwargs):
+    """
+    Run a sync-function in the loop default thread pool executor pool and await its result.
+    Note that this function is not suitable for CPU-bound tasks, as it will block the event loop.
+    """
+    loop = asyncio.get_running_loop()
     if kwargs:
-        # run_sync doesn't accept 'kwargs', so bind them in here
         func = functools.partial(func, **kwargs)
-    return await anyio.to_thread.run_sync(func, *args)
+    return await loop.run_in_executor(None, func, *args)
 def is_explicit_ack_supported(context):
@@ -1540,3 +1554,11 @@ def get_local_file_schema() -> list:
     # The expression `list(string.ascii_lowercase)` generates a list of lowercase alphabets,
     # which corresponds to drive letters in Windows file paths such as `C:/Windows/path`.
     return ["file"] + list(string.ascii_lowercase)
+def is_safe_path(base, filepath, is_symlink=False):
+    # Avoid path traversal attacks by ensuring that the path is safe
+    resolved_filepath = (
+        os.path.abspath(filepath) if not is_symlink else os.path.realpath(filepath)
+    )
+    return base == os.path.commonpath((base, resolved_filepath))

mlrun/utils/http.py CHANGED Viewed

@@ -109,9 +109,9 @@ class HTTPSessionWithRetry(requests.Session):
     def request(self, method, url, **kwargs):
         retry_count = 0
         kwargs.setdefault("headers", {})
-        kwargs["headers"][
-            "User-Agent"
-        ] = f"{requests.utils.default_user_agent()} mlrun/{config.version}"
+        kwargs["headers"]["User-Agent"] = (
+            f"{requests.utils.default_user_agent()} mlrun/{config.version}"
+        )
         while True:
             try:
                 response = super().request(method, url, **kwargs)

mlrun/utils/notifications/notification_pusher.py CHANGED Viewed

@@ -303,9 +303,9 @@ class NotificationPusher(_NotificationPusherBase):
                 traceback=traceback.format_exc(),
             )
             update_notification_status_kwargs["reason"] = f"Exception error: {str(exc)}"
-            update_notification_status_kwargs[
-                "status"
-            ] = mlrun.common.schemas.NotificationStatus.ERROR
+            update_notification_status_kwargs["status"] = (
+                mlrun.common.schemas.NotificationStatus.ERROR
+            )
             raise exc
         finally:
             self._update_notification_status(
@@ -352,9 +352,9 @@ class NotificationPusher(_NotificationPusherBase):
                 traceback=traceback.format_exc(),
             )
             update_notification_status_kwargs["reason"] = f"Exception error: {str(exc)}"
-            update_notification_status_kwargs[
-                "status"
-            ] = mlrun.common.schemas.NotificationStatus.ERROR
+            update_notification_status_kwargs["status"] = (
+                mlrun.common.schemas.NotificationStatus.ERROR
+            )
             raise exc
         finally:
             await mlrun.utils.helpers.run_in_threadpool(

mlrun/utils/version/version.json CHANGED Viewed

@@ -1,4 +1,4 @@
 {
-  "git_commit": "523bdf379b1183bee50b5b0ebe61ddeae9d7ca1c",
-  "version": "1.7.0-rc2"
+  "git_commit": "cb2750f25e202a321723af3465359944445dfda7",
+  "version": "1.7.0-rc4"
 }

{mlrun-1.7.0rc2.dist-info → mlrun-1.7.0rc4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: mlrun
-Version: 1.7.0rc2
+Version: 1.7.0rc4
 Summary: Tracking and config of machine learning runs
 Home-page: https://github.com/mlrun/mlrun
 Author: Yaron Haviv
@@ -44,13 +44,12 @@ Requires-Dist: semver ~=3.0
 Requires-Dist: dependency-injector ~=4.41
 Requires-Dist: fsspec ==2023.9.2
 Requires-Dist: v3iofs ~=0.1.17
-Requires-Dist: storey ~=1.7.3
+Requires-Dist: storey ~=1.7.5
 Requires-Dist: inflection ~=0.5.0
 Requires-Dist: python-dotenv ~=0.17.0
-Requires-Dist: setuptools ~=68.2
+Requires-Dist: setuptools ~=69.1
 Requires-Dist: deprecated ~=1.2
 Requires-Dist: jinja2 >=3.1.3,~=3.1
-Requires-Dist: anyio ~=3.7
 Requires-Dist: orjson ~=3.9
 Provides-Extra: all
 Requires-Dist: adlfs ==2023.9.0 ; extra == 'all'
@@ -80,12 +79,11 @@ Requires-Dist: sqlalchemy ~=1.4 ; extra == 'all'
 Provides-Extra: api
 Requires-Dist: uvicorn ~=0.27.1 ; extra == 'api'
 Requires-Dist: dask-kubernetes ~=0.11.0 ; extra == 'api'
-Requires-Dist: apscheduler !=3.10.2,~=3.6 ; extra == 'api'
-Requires-Dist: sqlite3-to-mysql ~=1.4 ; extra == 'api'
-Requires-Dist: objgraph ~=3.5 ; extra == 'api'
-Requires-Dist: igz-mgmt ~=0.0.10 ; extra == 'api'
-Requires-Dist: humanfriendly ~=9.2 ; extra == 'api'
-Requires-Dist: fastapi ~=0.103.2 ; extra == 'api'
+Requires-Dist: apscheduler <4,>=3.10.3 ; extra == 'api'
+Requires-Dist: objgraph ~=3.6 ; extra == 'api'
+Requires-Dist: igz-mgmt ~=0.1.0 ; extra == 'api'
+Requires-Dist: humanfriendly ~=10.0 ; extra == 'api'
+Requires-Dist: fastapi ~=0.110.0 ; extra == 'api'
 Requires-Dist: sqlalchemy ~=1.4 ; extra == 'api'
 Requires-Dist: pymysql ~=1.0 ; extra == 'api'
 Requires-Dist: alembic ~=1.9 ; extra == 'api'
@@ -127,7 +125,7 @@ Provides-Extra: complete-api
 Requires-Dist: adlfs ==2023.9.0 ; extra == 'complete-api'
 Requires-Dist: aiobotocore <2.8,>=2.5.0 ; extra == 'complete-api'
 Requires-Dist: alembic ~=1.9 ; extra == 'complete-api'
-Requires-Dist: apscheduler !=3.10.2,~=3.6 ; extra == 'complete-api'
+Requires-Dist: apscheduler <4,>=3.10.3 ; extra == 'complete-api'
 Requires-Dist: avro ~=1.11 ; extra == 'complete-api'
 Requires-Dist: azure-core ~=1.24 ; extra == 'complete-api'
 Requires-Dist: azure-identity ~=1.5 ; extra == 'complete-api'
@@ -137,23 +135,22 @@ Requires-Dist: dask-kubernetes ~=0.11.0 ; extra == 'complete-api'
 Requires-Dist: dask ~=2023.9.0 ; extra == 'complete-api'
 Requires-Dist: databricks-sdk ~=0.13.0 ; extra == 'complete-api'
 Requires-Dist: distributed ~=2023.9.0 ; extra == 'complete-api'
-Requires-Dist: fastapi ~=0.103.2 ; extra == 'complete-api'
+Requires-Dist: fastapi ~=0.110.0 ; extra == 'complete-api'
 Requires-Dist: gcsfs ==2023.9.2 ; extra == 'complete-api'
 Requires-Dist: google-cloud-bigquery[bqstorage,pandas] ==3.14.1 ; extra == 'complete-api'
 Requires-Dist: graphviz ~=0.20.0 ; extra == 'complete-api'
-Requires-Dist: humanfriendly ~=9.2 ; extra == 'complete-api'
-Requires-Dist: igz-mgmt ~=0.0.10 ; extra == 'complete-api'
+Requires-Dist: humanfriendly ~=10.0 ; extra == 'complete-api'
+Requires-Dist: igz-mgmt ~=0.1.0 ; extra == 'complete-api'
 Requires-Dist: kafka-python ~=2.0 ; extra == 'complete-api'
 Requires-Dist: mlflow ~=2.8 ; extra == 'complete-api'
 Requires-Dist: msrest ~=0.6.21 ; extra == 'complete-api'
-Requires-Dist: objgraph ~=3.5 ; extra == 'complete-api'
+Requires-Dist: objgraph ~=3.6 ; extra == 'complete-api'
 Requires-Dist: plotly <5.12.0,~=5.4 ; extra == 'complete-api'
 Requires-Dist: pymysql ~=1.0 ; extra == 'complete-api'
 Requires-Dist: pyopenssl >=23 ; extra == 'complete-api'
 Requires-Dist: redis ~=4.3 ; extra == 'complete-api'
 Requires-Dist: s3fs ==2023.9.2 ; extra == 'complete-api'
 Requires-Dist: sqlalchemy ~=1.4 ; extra == 'complete-api'
-Requires-Dist: sqlite3-to-mysql ~=1.4 ; extra == 'complete-api'
 Requires-Dist: timelength ~=1.1 ; extra == 'complete-api'
 Requires-Dist: uvicorn ~=0.27.1 ; extra == 'complete-api'
 Provides-Extra: dask

mlrun 1.7.0rc2__py3-none-any.whl → 1.7.0rc4__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc2py3-none-any.whl → 1.7.0rc4py3-none-any.whl