PyPI - mlrun - Versions diffs - 1.6.2rc5__py3-none-any.whl → 1.6.2rc6__py3-none-any.whl - Mend

mlrun 1.6.2rc5py3-none-any.whl → 1.6.2rc6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (48) hide show

mlrun/common/db/sql_session.py +0 -3
mlrun/common/model_monitoring/helpers.py +2 -4
mlrun/common/schemas/__init__.py +0 -1
mlrun/common/schemas/project.py +0 -2
mlrun/config.py +11 -30
mlrun/datastore/azure_blob.py +9 -9
mlrun/datastore/base.py +44 -22
mlrun/datastore/google_cloud_storage.py +6 -6
mlrun/datastore/v3io.py +46 -70
mlrun/db/base.py +0 -18
mlrun/db/httpdb.py +25 -28
mlrun/execution.py +3 -3
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +3 -3
mlrun/frameworks/tf_keras/model_handler.py +7 -7
mlrun/k8s_utils.py +5 -10
mlrun/kfpops.py +10 -19
mlrun/model.py +0 -5
mlrun/model_monitoring/api.py +8 -8
mlrun/model_monitoring/batch.py +1 -1
mlrun/model_monitoring/stores/kv_model_endpoint_store.py +13 -13
mlrun/model_monitoring/stores/sql_model_endpoint_store.py +1 -0
mlrun/package/packagers/pandas_packagers.py +3 -3
mlrun/package/utils/_archiver.py +1 -3
mlrun/platforms/iguazio.py +65 -6
mlrun/projects/pipelines.py +11 -21
mlrun/projects/project.py +46 -65
mlrun/runtimes/base.py +1 -24
mlrun/runtimes/function.py +9 -9
mlrun/runtimes/kubejob.py +3 -5
mlrun/runtimes/local.py +2 -2
mlrun/runtimes/mpijob/abstract.py +6 -6
mlrun/runtimes/pod.py +3 -3
mlrun/runtimes/serving.py +3 -3
mlrun/runtimes/sparkjob/spark3job.py +3 -3
mlrun/serving/remote.py +2 -4
mlrun/utils/async_http.py +3 -3
mlrun/utils/helpers.py +0 -8
mlrun/utils/http.py +3 -3
mlrun/utils/logger.py +2 -2
mlrun/utils/notifications/notification_pusher.py +6 -6
mlrun/utils/version/version.json +2 -2
{mlrun-1.6.2rc5.dist-info → mlrun-1.6.2rc6.dist-info}/METADATA +16 -14
{mlrun-1.6.2rc5.dist-info → mlrun-1.6.2rc6.dist-info}/RECORD +47 -48
mlrun/common/schemas/common.py +0 -40
{mlrun-1.6.2rc5.dist-info → mlrun-1.6.2rc6.dist-info}/LICENSE +0 -0
{mlrun-1.6.2rc5.dist-info → mlrun-1.6.2rc6.dist-info}/WHEEL +0 -0
{mlrun-1.6.2rc5.dist-info → mlrun-1.6.2rc6.dist-info}/entry_points.txt +0 -0
{mlrun-1.6.2rc5.dist-info → mlrun-1.6.2rc6.dist-info}/top_level.txt +0 -0

mlrun/common/db/sql_session.py CHANGED Viewed

@@ -63,12 +63,9 @@ def _init_engine(dsn=None):
         max_overflow = config.httpdb.db.connections_pool_max_overflow
         if max_overflow is None:
             max_overflow = config.httpdb.max_workers
         kwargs = {
             "pool_size": pool_size,
             "max_overflow": max_overflow,
-            "pool_pre_ping": config.httpdb.db.connections_pool_pre_ping,
-            "pool_recycle": config.httpdb.db.connections_pool_recycle,
         }
     engine = create_engine(dsn, **kwargs)
     _engines[dsn] = engine

mlrun/common/model_monitoring/helpers.py CHANGED Viewed

@@ -82,15 +82,13 @@ def parse_monitoring_stream_path(
         if application_name is None:
             stream_uri = (
                 mlrun.mlconf.model_endpoint_monitoring.default_http_sink.format(
-                    project=project, namespace=mlrun.mlconf.namespace
+                    project=project
                 )
             )
         else:
             stream_uri = (
                 mlrun.mlconf.model_endpoint_monitoring.default_http_sink_app.format(
-                    project=project,
-                    application_name=application_name,
-                    namespace=mlrun.mlconf.namespace,
+                    project=project, application_name=application_name
                 )
             )
     return stream_uri

mlrun/common/schemas/__init__.py CHANGED Viewed

@@ -43,7 +43,6 @@ from .clusterization_spec import (
     ClusterizationSpec,
     WaitForChiefToReachOnlineStateFeatureFlag,
 )
-from .common import ImageBuilder
 from .constants import (
     APIStates,
     ClusterizationRole,

mlrun/common/schemas/project.py CHANGED Viewed

@@ -19,7 +19,6 @@ import pydantic
 import mlrun.common.types
-from .common import ImageBuilder
 from .object import ObjectKind, ObjectStatus
@@ -86,7 +85,6 @@ class ProjectSpec(pydantic.BaseModel):
     desired_state: typing.Optional[ProjectDesiredState] = ProjectDesiredState.online
     custom_packagers: typing.Optional[typing.List[typing.Tuple[str, bool]]] = None
     default_image: typing.Optional[str] = None
-    build: typing.Optional[ImageBuilder] = None
     class Config:
         extra = pydantic.Extra.allow

mlrun/config.py CHANGED Viewed

@@ -288,12 +288,6 @@ default_config = {
         "state": "online",
         "retry_api_call_on_exception": "enabled",
         "http_connection_timeout_keep_alive": 11,
-        # http client used by httpdb
-        "http": {
-            # when True, the client will verify the server's TLS
-            # set to False for backwards compatibility.
-            "verify": False,
-        },
         "db": {
             "commit_retry_timeout": 30,
             "commit_retry_interval": 3,
@@ -312,11 +306,7 @@ default_config = {
                 # default is 16MB, max 1G, for more info https://dev.mysql.com/doc/refman/8.0/en/packet-too-large.html
                 "max_allowed_packet": 64000000,  # 64MB
             },
-            # tests connections for liveness upon each checkout
-            "connections_pool_pre_ping": True,
-            # this setting causes the pool to recycle connections after the given number of seconds has passed
-            "connections_pool_recycle": 60 * 60,
-            # None defaults to httpdb.max_workers
+            # None will set this to be equal to the httpdb.max_workers
             "connections_pool_size": None,
             "connections_pool_max_overflow": None,
             # below is a db-specific configuration
@@ -444,7 +434,7 @@ default_config = {
             # pip install <requirement_specifier>, e.g. mlrun==0.5.4, mlrun~=0.5,
             # git+https://github.com/mlrun/mlrun@development. by default uses the version
             "mlrun_version_specifier": "",
-            "kaniko_image": "gcr.io/kaniko-project/executor:v1.21.1",  # kaniko builder image
+            "kaniko_image": "gcr.io/kaniko-project/executor:v1.8.0",  # kaniko builder image
             "kaniko_init_container_image": "alpine:3.18",
             # image for kaniko init container when docker registry is ECR
             "kaniko_aws_cli_image": "amazon/aws-cli:2.7.10",
@@ -491,8 +481,8 @@ default_config = {
         "offline_storage_path": "model-endpoints/{kind}",
         # Default http path that points to the monitoring stream nuclio function. Will be used as a stream path
         # when the user is working in CE environment and has not provided any stream path.
-        "default_http_sink": "http://nuclio-{project}-model-monitoring-stream.{namespace}.svc.cluster.local:8080",
-        "default_http_sink_app": "http://nuclio-{project}-{application_name}.{namespace}.svc.cluster.local:8080",
+        "default_http_sink": "http://nuclio-{project}-model-monitoring-stream.mlrun.svc.cluster.local:8080",
+        "default_http_sink_app": "http://nuclio-{project}-{application_name}.mlrun.svc.cluster.local:8080",
         "batch_processing_function_branch": "master",
         "parquet_batching_max_events": 10_000,
         "parquet_batching_timeout_secs": timedelta(minutes=1).total_seconds(),
@@ -612,7 +602,7 @@ default_config = {
     "workflows": {
         "default_workflow_runner_name": "workflow-runner-{}",
         # Default timeout seconds for retrieving workflow id after execution:
-        "timeouts": {"local": 120, "kfp": 30, "remote": 90},
+        "timeouts": {"local": 120, "kfp": 30, "remote": 30},
     },
     "log_collector": {
         "address": "localhost:8282",
@@ -964,10 +954,10 @@ class Config:
             with_gpu = (
                 with_gpu_requests if requirement == "requests" else with_gpu_limits
             )
-            resources[requirement] = (
-                self.get_default_function_pod_requirement_resources(
-                    requirement, with_gpu
-                )
+            resources[
+                requirement
+            ] = self.get_default_function_pod_requirement_resources(
+                requirement, with_gpu
             )
         return resources
@@ -1350,21 +1340,12 @@ def read_env(env=None, prefix=env_prefix):
         if igz_domain:
             config["ui_url"] = f"https://mlrun-ui.{igz_domain}"
-    if log_level := config.get("log_level"):
+    if config.get("log_level"):
         import mlrun.utils.logger
         # logger created (because of imports mess) before the config is loaded (in tests), therefore we're changing its
         # level manually
-        mlrun.utils.logger.set_logger_level(log_level)
-    if log_formatter_name := config.get("log_formatter"):
-        import mlrun.utils.logger
-        log_formatter = mlrun.utils.create_formatter_instance(
-            mlrun.utils.FormatterKinds(log_formatter_name)
-        )
-        mlrun.utils.logger.get_handler("default").setFormatter(log_formatter)
+        mlrun.utils.logger.set_logger_level(config["log_level"])
     # The default function pod resource values are of type str; however, when reading from environment variable numbers,
     # it converts them to type int if contains only number, so we want to convert them to str.
     _convert_resources_to_str(config)

mlrun/datastore/azure_blob.py CHANGED Viewed

@@ -175,9 +175,9 @@ class AzureBlobStore(DataStore):
         if "client_secret" in st or "client_id" in st or "tenant_id" in st:
             res[f"spark.hadoop.fs.azure.account.auth.type.{host}"] = "OAuth"
-            res[f"spark.hadoop.fs.azure.account.oauth.provider.type.{host}"] = (
-                "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider"
-            )
+            res[
+                f"spark.hadoop.fs.azure.account.oauth.provider.type.{host}"
+            ] = "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider"
             if "client_id" in st:
                 res[f"spark.hadoop.fs.azure.account.oauth2.client.id.{host}"] = st[
                     "client_id"
@@ -188,14 +188,14 @@ class AzureBlobStore(DataStore):
                 ]
             if "tenant_id" in st:
                 tenant_id = st["tenant_id"]
-                res[f"spark.hadoop.fs.azure.account.oauth2.client.endpoint.{host}"] = (
-                    f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"
-                )
+                res[
+                    f"spark.hadoop.fs.azure.account.oauth2.client.endpoint.{host}"
+                ] = f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"
         if "sas_token" in st:
             res[f"spark.hadoop.fs.azure.account.auth.type.{host}"] = "SAS"
-            res[f"spark.hadoop.fs.azure.sas.token.provider.type.{host}"] = (
-                "org.apache.hadoop.fs.azurebfs.sas.FixedSASTokenProvider"
-            )
+            res[
+                f"spark.hadoop.fs.azure.sas.token.provider.type.{host}"
+            ] = "org.apache.hadoop.fs.azurebfs.sas.FixedSASTokenProvider"
             res[f"spark.hadoop.fs.azure.sas.fixed.token.{host}"] = st["sas_token"]
         return res

mlrun/datastore/base.py CHANGED Viewed

@@ -27,7 +27,6 @@ import requests
 import urllib3
 from deprecated import deprecated
-import mlrun.config
 import mlrun.errors
 from mlrun.errors import err_to_str
 from mlrun.utils import StorePrefix, is_ipython, logger
@@ -35,6 +34,10 @@ from mlrun.utils import StorePrefix, is_ipython, logger
 from .store_resources import is_store_uri, parse_store_uri
 from .utils import filter_df_start_end_time, select_columns_from_df
+verify_ssl = False
+if not verify_ssl:
+    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 class FileStats:
     def __init__(self, size, modified, content_type=None):
@@ -640,6 +643,45 @@ def basic_auth_header(user, password):
     return {"Authorization": authstr}
+def http_get(url, headers=None, auth=None):
+    try:
+        response = requests.get(url, headers=headers, auth=auth, verify=verify_ssl)
+    except OSError as exc:
+        raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}")
+    mlrun.errors.raise_for_status(response)
+    return response.content
+def http_head(url, headers=None, auth=None):
+    try:
+        response = requests.head(url, headers=headers, auth=auth, verify=verify_ssl)
+    except OSError as exc:
+        raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}")
+    mlrun.errors.raise_for_status(response)
+    return response.headers
+def http_put(url, data, headers=None, auth=None, session=None):
+    try:
+        put_api = session.put if session else requests.put
+        response = put_api(
+            url, data=data, headers=headers, auth=auth, verify=verify_ssl
+        )
+    except OSError as exc:
+        raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}") from exc
+    mlrun.errors.raise_for_status(response)
+def http_upload(url, file_path, headers=None, auth=None):
+    with open(file_path, "rb") as data:
+        http_put(url, data, headers, auth)
 class HttpStore(DataStore):
     def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
         super().__init__(parent, name, schema, endpoint, secrets)
@@ -667,7 +709,7 @@ class HttpStore(DataStore):
         raise ValueError("unimplemented")
     def get(self, key, size=None, offset=0):
-        data = self._http_get(self.url + self._join(key), self._headers, self.auth)
+        data = http_get(self.url + self._join(key), self._headers, self.auth)
         if offset:
             data = data[offset:]
         if size:
@@ -687,26 +729,6 @@ class HttpStore(DataStore):
                 f"schema as it is not secure and is not recommended."
             )
-    def _http_get(
-        self,
-        url,
-        headers=None,
-        auth=None,
-    ):
-        # import here to prevent import cycle
-        from mlrun.config import config as mlconf
-        verify_ssl = mlconf.httpdb.http.verify
-        try:
-            if not verify_ssl:
-                urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-            response = requests.get(url, headers=headers, auth=auth, verify=verify_ssl)
-        except OSError as exc:
-            raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}")
-        mlrun.errors.raise_for_status(response)
-        return response.content
 # This wrapper class is designed to extract the 'ds' schema and profile name from URL-formatted paths.
 # Within fsspec, the AbstractFileSystem::_strip_protocol() internal method is used to handle complete URL paths.

mlrun/datastore/google_cloud_storage.py CHANGED Viewed

@@ -147,13 +147,13 @@ class GoogleCloudStorageStore(DataStore):
             if "project_id" in credentials:
                 res["spark.hadoop.fs.gs.project.id"] = credentials["project_id"]
             if "private_key_id" in credentials:
-                res["spark.hadoop.fs.gs.auth.service.account.private.key.id"] = (
-                    credentials["private_key_id"]
-                )
+                res[
+                    "spark.hadoop.fs.gs.auth.service.account.private.key.id"
+                ] = credentials["private_key_id"]
             if "private_key" in credentials:
-                res["spark.hadoop.fs.gs.auth.service.account.private.key"] = (
-                    credentials["private_key"]
-                )
+                res[
+                    "spark.hadoop.fs.gs.auth.service.account.private.key"
+                ] = credentials["private_key"]
             if "client_email" in credentials:
                 res["spark.hadoop.fs.gs.auth.service.account.email"] = credentials[
                     "client_email"

mlrun/datastore/v3io.py CHANGED Viewed

@@ -15,11 +15,12 @@
 import mmap
 import os
 import time
+from copy import deepcopy
 from datetime import datetime
 import fsspec
-import v3io
-from v3io.dataplane.response import HttpResponseError
+import requests
+import v3io.dataplane
 import mlrun
 from mlrun.datastore.helpers import ONE_GB, ONE_MB
@@ -29,6 +30,11 @@ from .base import (
     DataStore,
     FileStats,
     basic_auth_header,
+    get_range,
+    http_get,
+    http_head,
+    http_put,
+    http_upload,
 )
 V3IO_LOCAL_ROOT = "v3io"
@@ -41,18 +47,17 @@ class V3ioStore(DataStore):
         self.headers = None
         self.secure = self.kind == "v3ios"
-        token = self._get_secret_or_env("V3IO_ACCESS_KEY")
-        username = self._get_secret_or_env("V3IO_USERNAME")
-        password = self._get_secret_or_env("V3IO_PASSWORD")
         if self.endpoint.startswith("https://"):
             self.endpoint = self.endpoint[len("https://") :]
             self.secure = True
         elif self.endpoint.startswith("http://"):
             self.endpoint = self.endpoint[len("http://") :]
             self.secure = False
-        self.client = v3io.dataplane.Client(access_key=token, endpoint=self.url)
-        self.object = self.client.object
+        token = self._get_secret_or_env("V3IO_ACCESS_KEY")
+        username = self._get_secret_or_env("V3IO_USERNAME")
+        password = self._get_secret_or_env("V3IO_PASSWORD")
         self.auth = None
         self.token = token
         if token:
@@ -60,16 +65,6 @@ class V3ioStore(DataStore):
         elif username and password:
             self.headers = basic_auth_header(username, password)
-    @staticmethod
-    def _do_object_request(function: callable, *args, **kwargs):
-        try:
-            return function(*args, **kwargs)
-        except HttpResponseError as http_response_error:
-            raise mlrun.errors.err_for_status_code(
-                status_code=http_response_error.status_code,
-                message=mlrun.errors.err_to_str(http_response_error),
-            )
     @staticmethod
     def uri_to_ipython(endpoint, subpath):
         return V3IO_LOCAL_ROOT + subpath
@@ -96,19 +91,13 @@ class V3ioStore(DataStore):
     def _upload(self, key: str, src_path: str, max_chunk_size: int = ONE_GB):
         """helper function for upload method, allows for controlling max_chunk_size in testing"""
-        container, path = split_path(self._join(key))
         file_size = os.path.getsize(src_path)  # in bytes
         if file_size <= ONE_MB:
-            with open(src_path, "rb") as source_file:
-                data = source_file.read()
-            self._do_object_request(
-                self.object.put,
-                container=container,
-                path=path,
-                body=data,
-                append=False,
-            )
+            http_upload(self.url + self._join(key), src_path, self.headers, None)
             return
+        append_header = deepcopy(self.headers)
+        append_header["Range"] = "-1"
         # chunk must be a multiple of the ALLOCATIONGRANULARITY
         # https://docs.python.org/3/library/mmap.html
         if residue := max_chunk_size % mmap.ALLOCATIONGRANULARITY:
@@ -125,13 +114,11 @@ class V3ioStore(DataStore):
                     access=mmap.ACCESS_READ,
                     offset=file_offset,
                 ) as mmap_obj:
-                    append = file_offset != 0
-                    self._do_object_request(
-                        self.object.put,
-                        container=container,
-                        path=path,
-                        body=mmap_obj,
-                        append=append,
+                    http_put(
+                        self.url + self._join(key),
+                        mmap_obj,
+                        append_header if file_offset else self.headers,
+                        None,
                     )
                     file_offset += chunk_size
@@ -139,55 +126,43 @@ class V3ioStore(DataStore):
         return self._upload(key, src_path)
     def get(self, key, size=None, offset=0):
-        container, path = split_path(self._join(key))
-        return self._do_object_request(
-            function=self.object.get,
-            container=container,
-            path=path,
-            offset=offset,
-            num_bytes=size,
-        ).body
+        headers = self.headers
+        if size or offset:
+            headers = deepcopy(headers)
+            headers["Range"] = get_range(size, offset)
+        return http_get(self.url + self._join(key), headers)
-    def _put(self, key, data, append=False, max_chunk_size: int = ONE_GB):
+    def _put(self, key, data, max_chunk_size: int = ONE_GB):
         """helper function for put method, allows for controlling max_chunk_size in testing"""
-        container, path = split_path(self._join(key))
         buffer_size = len(data)  # in bytes
         if buffer_size <= ONE_MB:
-            self._do_object_request(
-                self.object.put,
-                container=container,
-                path=path,
-                body=data,
-                append=append,
-            )
+            http_put(self.url + self._join(key), data, self.headers, None)
             return
+        append_header = deepcopy(self.headers)
+        append_header["Range"] = "-1"
         buffer_offset = 0
         try:
             data = memoryview(data)
         except TypeError:
             pass
-        while buffer_offset < buffer_size:
-            chunk_size = min(buffer_size - buffer_offset, max_chunk_size)
-            append = True if buffer_offset or append else False
-            self._do_object_request(
-                self.object.put,
-                container=container,
-                path=path,
-                body=data[buffer_offset : buffer_offset + chunk_size],
-                append=append,
-            )
-            buffer_offset += chunk_size
+        with requests.Session() as requests_session:
+            while buffer_offset < buffer_size:
+                chunk_size = min(buffer_size - buffer_offset, max_chunk_size)
+                http_put(
+                    self.url + self._join(key),
+                    data[buffer_offset : buffer_offset + chunk_size],
+                    append_header if buffer_offset else self.headers,
+                    None,
+                    requests_session,
+                )
+                buffer_offset += chunk_size
     def put(self, key, data, append=False):
-        return self._put(key, data, append)
+        return self._put(key, data)
     def stat(self, key):
-        container, path = split_path(self._join(key))
-        response = self._do_object_request(
-            function=self.object.head, container=container, path=path
-        )
-        head = dict(response.headers)
+        head = http_head(self.url + self._join(key), self.headers)
         size = int(head.get("Content-Length", "0"))
         datestr = head.get("Last-Modified", "0")
         modified = time.mktime(
@@ -196,6 +171,7 @@ class V3ioStore(DataStore):
         return FileStats(size, modified)
     def listdir(self, key):
+        v3io_client = v3io.dataplane.Client(endpoint=self.url, access_key=self.token)
         container, subpath = split_path(self._join(key))
         if not subpath.endswith("/"):
             subpath += "/"
@@ -204,7 +180,7 @@ class V3ioStore(DataStore):
         subpath_length = len(subpath) - 1
         try:
-            response = self.client.container.list(
+            response = v3io_client.container.list(
                 container=container,
                 path=subpath,
                 get_all_attributes=False,

mlrun/db/base.py CHANGED Viewed

@@ -677,21 +677,3 @@ class RunDBInterface(ABC):
         self, func_url: str = None, function: "mlrun.runtimes.BaseRuntime" = None
     ):
         pass
-    def submit_workflow(
-        self,
-        project: str,
-        name: str,
-        workflow_spec: Union[
-            "mlrun.projects.pipelines.WorkflowSpec",
-            "mlrun.common.schemas.WorkflowSpec",
-            dict,
-        ],
-        arguments: Optional[dict] = None,
-        artifact_path: Optional[str] = None,
-        source: Optional[str] = None,
-        run_name: Optional[str] = None,
-        namespace: Optional[str] = None,
-        notifications: list["mlrun.model.Notification"] = None,
-    ) -> "mlrun.common.schemas.WorkflowResponse":
-        pass

mlrun 1.6.2rc5__py3-none-any.whl → 1.6.2rc6__py3-none-any.whl

Potentially problematic release.

mlrun 1.6.2rc5py3-none-any.whl → 1.6.2rc6py3-none-any.whl