PyPI - mlrun - Versions diffs - 1.6.2rc6__py3-none-any.whl → 1.6.3rc3__py3-none-any.whl - Mend

mlrun 1.6.2rc6py3-none-any.whl → 1.6.3rc3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (57) hide show

mlrun/artifacts/model.py +28 -22
mlrun/common/db/sql_session.py +3 -0
mlrun/common/model_monitoring/helpers.py +4 -2
mlrun/common/schemas/__init__.py +2 -0
mlrun/common/schemas/common.py +40 -0
mlrun/common/schemas/model_monitoring/__init__.py +1 -0
mlrun/common/schemas/model_monitoring/constants.py +21 -5
mlrun/common/schemas/project.py +2 -0
mlrun/config.py +51 -20
mlrun/data_types/data_types.py +4 -0
mlrun/datastore/azure_blob.py +9 -9
mlrun/datastore/base.py +22 -44
mlrun/datastore/google_cloud_storage.py +6 -6
mlrun/datastore/v3io.py +70 -46
mlrun/db/base.py +18 -0
mlrun/db/httpdb.py +41 -36
mlrun/execution.py +3 -3
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +3 -3
mlrun/frameworks/tf_keras/model_handler.py +7 -7
mlrun/k8s_utils.py +10 -5
mlrun/kfpops.py +19 -10
mlrun/model.py +6 -0
mlrun/model_monitoring/api.py +8 -8
mlrun/model_monitoring/batch.py +1 -1
mlrun/model_monitoring/controller.py +0 -7
mlrun/model_monitoring/features_drift_table.py +6 -0
mlrun/model_monitoring/helpers.py +4 -1
mlrun/model_monitoring/stores/kv_model_endpoint_store.py +13 -13
mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -1
mlrun/model_monitoring/stream_processing.py +50 -36
mlrun/package/packagers/pandas_packagers.py +3 -3
mlrun/package/utils/_archiver.py +3 -1
mlrun/platforms/iguazio.py +6 -65
mlrun/projects/pipelines.py +29 -12
mlrun/projects/project.py +69 -55
mlrun/run.py +2 -0
mlrun/runtimes/base.py +24 -1
mlrun/runtimes/function.py +9 -9
mlrun/runtimes/kubejob.py +5 -3
mlrun/runtimes/local.py +2 -2
mlrun/runtimes/mpijob/abstract.py +6 -6
mlrun/runtimes/pod.py +3 -3
mlrun/runtimes/serving.py +3 -3
mlrun/runtimes/sparkjob/spark3job.py +3 -3
mlrun/serving/remote.py +4 -2
mlrun/utils/async_http.py +3 -3
mlrun/utils/helpers.py +20 -0
mlrun/utils/http.py +3 -3
mlrun/utils/logger.py +2 -2
mlrun/utils/notifications/notification_pusher.py +6 -6
mlrun/utils/version/version.json +2 -2
{mlrun-1.6.2rc6.dist-info → mlrun-1.6.3rc3.dist-info}/METADATA +15 -17
{mlrun-1.6.2rc6.dist-info → mlrun-1.6.3rc3.dist-info}/RECORD +57 -56
{mlrun-1.6.2rc6.dist-info → mlrun-1.6.3rc3.dist-info}/LICENSE +0 -0
{mlrun-1.6.2rc6.dist-info → mlrun-1.6.3rc3.dist-info}/WHEEL +0 -0
{mlrun-1.6.2rc6.dist-info → mlrun-1.6.3rc3.dist-info}/entry_points.txt +0 -0
{mlrun-1.6.2rc6.dist-info → mlrun-1.6.3rc3.dist-info}/top_level.txt +0 -0

mlrun/datastore/v3io.py CHANGED Viewed

@@ -15,12 +15,11 @@
 import mmap
 import os
 import time
-from copy import deepcopy
 from datetime import datetime
 import fsspec
-import requests
-import v3io.dataplane
+import v3io
+from v3io.dataplane.response import HttpResponseError
 import mlrun
 from mlrun.datastore.helpers import ONE_GB, ONE_MB
@@ -30,11 +29,6 @@ from .base import (
     DataStore,
     FileStats,
     basic_auth_header,
-    get_range,
-    http_get,
-    http_head,
-    http_put,
-    http_upload,
 )
 V3IO_LOCAL_ROOT = "v3io"
@@ -47,17 +41,18 @@ class V3ioStore(DataStore):
         self.headers = None
         self.secure = self.kind == "v3ios"
+        token = self._get_secret_or_env("V3IO_ACCESS_KEY")
+        username = self._get_secret_or_env("V3IO_USERNAME")
+        password = self._get_secret_or_env("V3IO_PASSWORD")
         if self.endpoint.startswith("https://"):
             self.endpoint = self.endpoint[len("https://") :]
             self.secure = True
         elif self.endpoint.startswith("http://"):
             self.endpoint = self.endpoint[len("http://") :]
             self.secure = False
-        token = self._get_secret_or_env("V3IO_ACCESS_KEY")
-        username = self._get_secret_or_env("V3IO_USERNAME")
-        password = self._get_secret_or_env("V3IO_PASSWORD")
+        self.client = v3io.dataplane.Client(access_key=token, endpoint=self.url)
+        self.object = self.client.object
         self.auth = None
         self.token = token
         if token:
@@ -65,6 +60,16 @@ class V3ioStore(DataStore):
         elif username and password:
             self.headers = basic_auth_header(username, password)
+    @staticmethod
+    def _do_object_request(function: callable, *args, **kwargs):
+        try:
+            return function(*args, **kwargs)
+        except HttpResponseError as http_response_error:
+            raise mlrun.errors.err_for_status_code(
+                status_code=http_response_error.status_code,
+                message=mlrun.errors.err_to_str(http_response_error),
+            )
     @staticmethod
     def uri_to_ipython(endpoint, subpath):
         return V3IO_LOCAL_ROOT + subpath
@@ -91,13 +96,19 @@ class V3ioStore(DataStore):
     def _upload(self, key: str, src_path: str, max_chunk_size: int = ONE_GB):
         """helper function for upload method, allows for controlling max_chunk_size in testing"""
+        container, path = split_path(self._join(key))
         file_size = os.path.getsize(src_path)  # in bytes
         if file_size <= ONE_MB:
-            http_upload(self.url + self._join(key), src_path, self.headers, None)
+            with open(src_path, "rb") as source_file:
+                data = source_file.read()
+            self._do_object_request(
+                self.object.put,
+                container=container,
+                path=path,
+                body=data,
+                append=False,
+            )
             return
-        append_header = deepcopy(self.headers)
-        append_header["Range"] = "-1"
         # chunk must be a multiple of the ALLOCATIONGRANULARITY
         # https://docs.python.org/3/library/mmap.html
         if residue := max_chunk_size % mmap.ALLOCATIONGRANULARITY:
@@ -114,11 +125,13 @@ class V3ioStore(DataStore):
                     access=mmap.ACCESS_READ,
                     offset=file_offset,
                 ) as mmap_obj:
-                    http_put(
-                        self.url + self._join(key),
-                        mmap_obj,
-                        append_header if file_offset else self.headers,
-                        None,
+                    append = file_offset != 0
+                    self._do_object_request(
+                        self.object.put,
+                        container=container,
+                        path=path,
+                        body=mmap_obj,
+                        append=append,
                     )
                     file_offset += chunk_size
@@ -126,43 +139,55 @@ class V3ioStore(DataStore):
         return self._upload(key, src_path)
     def get(self, key, size=None, offset=0):
-        headers = self.headers
-        if size or offset:
-            headers = deepcopy(headers)
-            headers["Range"] = get_range(size, offset)
-        return http_get(self.url + self._join(key), headers)
+        container, path = split_path(self._join(key))
+        return self._do_object_request(
+            function=self.object.get,
+            container=container,
+            path=path,
+            offset=offset,
+            num_bytes=size,
+        ).body
-    def _put(self, key, data, max_chunk_size: int = ONE_GB):
+    def _put(self, key, data, append=False, max_chunk_size: int = ONE_GB):
         """helper function for put method, allows for controlling max_chunk_size in testing"""
+        container, path = split_path(self._join(key))
         buffer_size = len(data)  # in bytes
         if buffer_size <= ONE_MB:
-            http_put(self.url + self._join(key), data, self.headers, None)
+            self._do_object_request(
+                self.object.put,
+                container=container,
+                path=path,
+                body=data,
+                append=append,
+            )
             return
-        append_header = deepcopy(self.headers)
-        append_header["Range"] = "-1"
         buffer_offset = 0
         try:
             data = memoryview(data)
         except TypeError:
             pass
-        with requests.Session() as requests_session:
-            while buffer_offset < buffer_size:
-                chunk_size = min(buffer_size - buffer_offset, max_chunk_size)
-                http_put(
-                    self.url + self._join(key),
-                    data[buffer_offset : buffer_offset + chunk_size],
-                    append_header if buffer_offset else self.headers,
-                    None,
-                    requests_session,
-                )
-                buffer_offset += chunk_size
+        while buffer_offset < buffer_size:
+            chunk_size = min(buffer_size - buffer_offset, max_chunk_size)
+            append = True if buffer_offset or append else False
+            self._do_object_request(
+                self.object.put,
+                container=container,
+                path=path,
+                body=data[buffer_offset : buffer_offset + chunk_size],
+                append=append,
+            )
+            buffer_offset += chunk_size
     def put(self, key, data, append=False):
-        return self._put(key, data)
+        return self._put(key, data, append)
     def stat(self, key):
-        head = http_head(self.url + self._join(key), self.headers)
+        container, path = split_path(self._join(key))
+        response = self._do_object_request(
+            function=self.object.head, container=container, path=path
+        )
+        head = dict(response.headers)
         size = int(head.get("Content-Length", "0"))
         datestr = head.get("Last-Modified", "0")
         modified = time.mktime(
@@ -171,7 +196,6 @@ class V3ioStore(DataStore):
         return FileStats(size, modified)
     def listdir(self, key):
-        v3io_client = v3io.dataplane.Client(endpoint=self.url, access_key=self.token)
         container, subpath = split_path(self._join(key))
         if not subpath.endswith("/"):
             subpath += "/"
@@ -180,7 +204,7 @@ class V3ioStore(DataStore):
         subpath_length = len(subpath) - 1
         try:
-            response = v3io_client.container.list(
+            response = self.client.container.list(
                 container=container,
                 path=subpath,
                 get_all_attributes=False,

mlrun/db/base.py CHANGED Viewed

@@ -677,3 +677,21 @@ class RunDBInterface(ABC):
         self, func_url: str = None, function: "mlrun.runtimes.BaseRuntime" = None
     ):
         pass
+    def submit_workflow(
+        self,
+        project: str,
+        name: str,
+        workflow_spec: Union[
+            "mlrun.projects.pipelines.WorkflowSpec",
+            "mlrun.common.schemas.WorkflowSpec",
+            dict,
+        ],
+        arguments: Optional[dict] = None,
+        artifact_path: Optional[str] = None,
+        source: Optional[str] = None,
+        run_name: Optional[str] = None,
+        namespace: Optional[str] = None,
+        notifications: list["mlrun.model.Notification"] = None,
+    ) -> "mlrun.common.schemas.WorkflowResponse":
+        pass

mlrun/db/httpdb.py CHANGED Viewed

@@ -152,7 +152,7 @@ class HTTPRunDB(RunDBInterface):
     @staticmethod
     def get_api_path_prefix(version: str = None) -> str:
         """
-        :param version: API version to use, None (the default) will mean to use the default value from mlconf,
+        :param version: API version to use, None (the default) will mean to use the default value from mlrun.config,
          for un-versioned api set an empty string.
         """
         if version is not None:
@@ -250,7 +250,11 @@ class HTTPRunDB(RunDBInterface):
         try:
             response = self.session.request(
-                method, url, timeout=timeout, verify=False, **kw
+                method,
+                url,
+                timeout=timeout,
+                verify=config.httpdb.http.verify,
+                **kw,
             )
         except requests.RequestException as exc:
             error = f"{err_to_str(exc)}: {error}" if error else err_to_str(exc)
@@ -302,11 +306,11 @@ class HTTPRunDB(RunDBInterface):
     def connect(self, secrets=None):
         """Connect to the MLRun API server. Must be called prior to executing any other method.
-        The code utilizes the URL for the API server from the configuration - ``mlconf.dbpath``.
+        The code utilizes the URL for the API server from the configuration - ``config.dbpath``.
         For example::
-            mlconf.dbpath = mlconf.dbpath or 'http://mlrun-api:8080'
+            config.dbpath = config.dbpath or 'http://mlrun-api:8080'
             db = get_run_db().connect()
         """
         # hack to allow unit tests to instantiate HTTPRunDB without a real server behind
@@ -500,7 +504,7 @@ class HTTPRunDB(RunDBInterface):
         if offset < 0:
             raise MLRunInvalidArgumentError("Offset cannot be negative")
         if size is None:
-            size = int(mlrun.mlconf.httpdb.logs.pull_logs_default_size_limit)
+            size = int(config.httpdb.logs.pull_logs_default_size_limit)
         elif size == -1:
             logger.warning(
                 "Retrieving all logs. This may be inefficient and can result in a large log."
@@ -546,25 +550,23 @@ class HTTPRunDB(RunDBInterface):
         state, text = self.get_log(uid, project, offset=offset)
         if text:
-            print(text.decode(errors=mlrun.mlconf.httpdb.logs.decode.errors))
+            print(text.decode(errors=config.httpdb.logs.decode.errors))
         nil_resp = 0
         while True:
             offset += len(text)
             # if we get 3 nil responses in a row, increase the sleep time to 10 seconds
             # TODO: refactor this to use a conditional backoff mechanism
             if nil_resp < 3:
-                time.sleep(int(mlrun.mlconf.httpdb.logs.pull_logs_default_interval))
+                time.sleep(int(config.httpdb.logs.pull_logs_default_interval))
             else:
                 time.sleep(
-                    int(
-                        mlrun.mlconf.httpdb.logs.pull_logs_backoff_no_logs_default_interval
-                    )
+                    int(config.httpdb.logs.pull_logs_backoff_no_logs_default_interval)
                 )
             state, text = self.get_log(uid, project, offset=offset)
             if text:
                 nil_resp = 0
                 print(
-                    text.decode(errors=mlrun.mlconf.httpdb.logs.decode.errors),
+                    text.decode(errors=config.httpdb.logs.decode.errors),
                     end="",
                 )
             else:
@@ -1135,17 +1137,17 @@ class HTTPRunDB(RunDBInterface):
             structured_dict = {}
             for project, job_runtime_resources_map in response.json().items():
                 for job_id, runtime_resources in job_runtime_resources_map.items():
-                    structured_dict.setdefault(project, {})[
-                        job_id
-                    ] = mlrun.common.schemas.RuntimeResources(**runtime_resources)
+                    structured_dict.setdefault(project, {})[job_id] = (
+                        mlrun.common.schemas.RuntimeResources(**runtime_resources)
+                    )
             return structured_dict
         elif group_by == mlrun.common.schemas.ListRuntimeResourcesGroupByField.project:
             structured_dict = {}
             for project, kind_runtime_resources_map in response.json().items():
                 for kind, runtime_resources in kind_runtime_resources_map.items():
-                    structured_dict.setdefault(project, {})[
-                        kind
-                    ] = mlrun.common.schemas.RuntimeResources(**runtime_resources)
+                    structured_dict.setdefault(project, {})[kind] = (
+                        mlrun.common.schemas.RuntimeResources(**runtime_resources)
+                    )
             return structured_dict
         else:
             raise NotImplementedError(
@@ -1173,7 +1175,8 @@ class HTTPRunDB(RunDBInterface):
         :param force: Force deletion - delete the runtime resource even if it's not in terminal state or if the grace
             period didn't pass.
         :param grace_period: Grace period given to the runtime resource before they are actually removed, counted from
-            the moment they moved to terminal state (defaults to mlrun.mlconf.runtime_resources_deletion_grace_period).
+            the moment they moved to terminal state
+            (defaults to mlrun.config.config.runtime_resources_deletion_grace_period).
         :returns: :py:class:`~mlrun.common.schemas.GroupedByProjectRuntimeResourcesOutput` listing the runtime resources
             that were removed.
@@ -1203,9 +1206,9 @@ class HTTPRunDB(RunDBInterface):
         structured_dict = {}
         for project, kind_runtime_resources_map in response.json().items():
             for kind, runtime_resources in kind_runtime_resources_map.items():
-                structured_dict.setdefault(project, {})[
-                    kind
-                ] = mlrun.common.schemas.RuntimeResources(**runtime_resources)
+                structured_dict.setdefault(project, {})[kind] = (
+                    mlrun.common.schemas.RuntimeResources(**runtime_resources)
+                )
         return structured_dict
     def create_schedule(
@@ -1340,7 +1343,7 @@ class HTTPRunDB(RunDBInterface):
             logger.warning(
                 "Building a function image to ECR and loading an S3 source to the image may require conflicting access "
                 "keys. Only the permissions granted to the platform's configured secret will take affect "
-                "(see mlrun.mlconf.httpdb.builder.docker_registry_secret). "
+                "(see mlrun.config.config.httpdb.builder.docker_registry_secret). "
                 "In case the permissions are limited to ECR scope, you may use pull_at_runtime=True instead",
                 source=func.spec.build.source,
                 load_source_on_run=func.spec.build.load_source_on_run,
@@ -1495,7 +1498,7 @@ class HTTPRunDB(RunDBInterface):
         Retrieve updated information on project background tasks being executed.
         If no filter is provided, will return background tasks from the last week.
-        :param project: Project name (defaults to mlrun.mlconf.default_project).
+        :param project: Project name (defaults to mlrun.config.config.default_project).
         :param state:   List only background tasks whose state is specified.
         :param created_from: Filter by background task created time in ``[created_from, created_to]``.
         :param created_to:  Filter by background task created time in ``[created_from, created_to]``.
@@ -1608,19 +1611,21 @@ class HTTPRunDB(RunDBInterface):
         artifact_path=None,
         ops=None,
         cleanup_ttl=None,
+        timeout=60,
     ):
         """Submit a KFP pipeline for execution.
-        :param project: The project of the pipeline
-        :param pipeline: Pipeline function or path to .yaml/.zip pipeline file.
-        :param arguments: A dictionary of arguments to pass to the pipeline.
-        :param experiment: A name to assign for the specific experiment.
-        :param run: A name for this specific run.
-        :param namespace: Kubernetes namespace to execute the pipeline in.
-        :param artifact_path: A path to artifacts used by this pipeline.
-        :param ops: Transformers to apply on all ops in the pipeline.
-        :param cleanup_ttl: pipeline cleanup ttl in secs (time to wait after workflow completion, at which point the
-                            workflow and all its resources are deleted)
+        :param project:         The project of the pipeline
+        :param pipeline:        Pipeline function or path to .yaml/.zip pipeline file.
+        :param arguments:       A dictionary of arguments to pass to the pipeline.
+        :param experiment:      A name to assign for the specific experiment.
+        :param run:             A name for this specific run.
+        :param namespace:       Kubernetes namespace to execute the pipeline in.
+        :param artifact_path:   A path to artifacts used by this pipeline.
+        :param ops:             Transformers to apply on all ops in the pipeline.
+        :param cleanup_ttl:     Pipeline cleanup ttl in secs (time to wait after workflow completion, at which point the
+                                workflow and all its resources are deleted)
+        :param timeout:         Timeout for the API call.
         """
         if isinstance(pipeline, str):
@@ -1662,7 +1667,7 @@ class HTTPRunDB(RunDBInterface):
                 "POST",
                 f"projects/{project}/pipelines",
                 params=params,
-                timeout=20,
+                timeout=timeout,
                 body=data,
                 headers=headers,
             )
@@ -3450,8 +3455,8 @@ class HTTPRunDB(RunDBInterface):
         source: Optional[str] = None,
         run_name: Optional[str] = None,
         namespace: Optional[str] = None,
-        notifications: typing.List[mlrun.model.Notification] = None,
-    ):
+        notifications: list[mlrun.model.Notification] = None,
+    ) -> mlrun.common.schemas.WorkflowResponse:
         """
         Submitting workflow for a remote execution.

mlrun/execution.py CHANGED Viewed

@@ -559,9 +559,9 @@ class MLClientCtx(object):
             for k, v in get_in(task, ["status", "results"], {}).items():
                 self._results[k] = v
             for artifact in get_in(task, ["status", run_keys.artifacts], []):
-                self._artifacts_manager.artifacts[
-                    artifact["metadata"]["key"]
-                ] = artifact
+                self._artifacts_manager.artifacts[artifact["metadata"]["key"]] = (
+                    artifact
+                )
                 self._artifacts_manager.link_artifact(
                     self.project,
                     self.name,

mlrun/frameworks/tf_keras/callbacks/logging_callback.py CHANGED Viewed

@@ -389,9 +389,9 @@ class LoggingCallback(Callback):
         ):
             try:
                 self._get_hyperparameter(key_chain=learning_rate_key_chain)
-                self._dynamic_hyperparameters_keys[
-                    learning_rate_key
-                ] = learning_rate_key_chain
+                self._dynamic_hyperparameters_keys[learning_rate_key] = (
+                    learning_rate_key_chain
+                )
             except (KeyError, IndexError, ValueError):
                 pass

mlrun/frameworks/tf_keras/model_handler.py CHANGED Viewed

@@ -263,13 +263,13 @@ class TFKerasModelHandler(DLModelHandler):
         # Update the paths and log artifacts if context is available:
         if self._weights_file is not None:
             if self._context is not None:
-                artifacts[
-                    self._get_weights_file_artifact_name()
-                ] = self._context.log_artifact(
-                    self._weights_file,
-                    local_path=self._weights_file,
-                    artifact_path=output_path,
-                    db_key=False,
+                artifacts[self._get_weights_file_artifact_name()] = (
+                    self._context.log_artifact(
+                        self._weights_file,
+                        local_path=self._weights_file,
+                        artifact_path=output_path,
+                        db_key=False,
+                    )
                 )
         return artifacts if self._context is not None else None

mlrun/k8s_utils.py CHANGED Viewed

@@ -134,13 +134,13 @@ def sanitize_label_value(value: str) -> str:
     return re.sub(r"([^a-zA-Z0-9_.-]|^[^a-zA-Z0-9]|[^a-zA-Z0-9]$)", "-", value[:63])
-def verify_label_key(key):
+def verify_label_key(key: str):
+    """
+    Verify that the label key is valid for Kubernetes.
+    Refer to https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set
+    """
     if not key:
         raise mlrun.errors.MLRunInvalidArgumentError("label key cannot be empty")
-    if key.startswith("k8s.io") or key.startswith("kubernetes.io"):
-        raise mlrun.errors.MLRunInvalidArgumentError(
-            "Labels cannot start with 'k8s.io' or 'kubernetes.io'"
-        )
     mlrun.utils.helpers.verify_field_regex(
         f"project.metadata.labels.'{key}'",
@@ -148,6 +148,11 @@ def verify_label_key(key):
         mlrun.utils.regex.k8s_character_limit,
     )
+    if key.startswith("k8s.io/") or key.startswith("kubernetes.io/"):
+        raise mlrun.errors.MLRunInvalidArgumentError(
+            "Labels cannot start with 'k8s.io/' or 'kubernetes.io/'"
+        )
     parts = key.split("/")
     if len(parts) == 1:
         name = parts[0]

mlrun/kfpops.py CHANGED Viewed

@@ -41,8 +41,8 @@ from .utils import (
 # default KFP artifacts and output (ui metadata, metrics etc.)
 # directories to /tmp to allow running with security context
-KFPMETA_DIR = os.environ.get("KFPMETA_OUT_DIR", "/tmp")
-KFP_ARTIFACTS_DIR = os.environ.get("KFP_ARTIFACTS_DIR", "/tmp")
+KFPMETA_DIR = "/tmp"
+KFP_ARTIFACTS_DIR = "/tmp"
 project_annotation = "mlrun/project"
 run_annotation = "mlrun/pipeline-step-type"
@@ -71,7 +71,7 @@ def write_kfpmeta(struct):
             {"name": k, "numberValue": v} for k, v in results.items() if is_num(v)
         ],
     }
-    with open(KFPMETA_DIR + "/mlpipeline-metrics.json", "w") as f:
+    with open(os.path.join(KFPMETA_DIR, "mlpipeline-metrics.json"), "w") as f:
         json.dump(metrics, f)
     struct = deepcopy(struct)
@@ -91,7 +91,14 @@ def write_kfpmeta(struct):
         elif key in results:
             val = results[key]
         try:
-            path = "/".join([KFP_ARTIFACTS_DIR, key])
+            # NOTE: if key has "../x", it would fail on path traversal
+            path = os.path.join(KFP_ARTIFACTS_DIR, key)
+            if not mlrun.utils.helpers.is_safe_path(KFP_ARTIFACTS_DIR, path):
+                logger.warning(
+                    "Path traversal is not allowed ignoring", path=path, key=key
+                )
+                continue
+            path = os.path.abspath(path)
             logger.info("Writing artifact output", path=path, val=val)
             with open(path, "w") as fp:
                 fp.write(str(val))
@@ -109,7 +116,7 @@ def write_kfpmeta(struct):
         "outputs": output_artifacts
         + [{"type": "markdown", "storage": "inline", "source": text}]
     }
-    with open(KFPMETA_DIR + "/mlpipeline-ui-metadata.json", "w") as f:
+    with open(os.path.join(KFPMETA_DIR, "mlpipeline-ui-metadata.json"), "w") as f:
         json.dump(metadata, f)
@@ -401,9 +408,9 @@ def mlrun_op(
         cmd += ["--label", f"{label}={val}"]
     for output in outputs:
         cmd += ["-o", str(output)]
-        file_outputs[
-            output.replace(".", "_")
-        ] = f"/tmp/{output}"  # not using path.join to avoid windows "\"
+        file_outputs[output.replace(".", "_")] = (
+            f"/tmp/{output}"  # not using path.join to avoid windows "\"
+        )
     if project:
         cmd += ["--project", project]
     if handler:
@@ -450,8 +457,10 @@ def mlrun_op(
         command=cmd + [command],
         file_outputs=file_outputs,
         output_artifact_paths={
-            "mlpipeline-ui-metadata": KFPMETA_DIR + "/mlpipeline-ui-metadata.json",
-            "mlpipeline-metrics": KFPMETA_DIR + "/mlpipeline-metrics.json",
+            "mlpipeline-ui-metadata": os.path.join(
+                KFPMETA_DIR, "mlpipeline-ui-metadata.json"
+            ),
+            "mlpipeline-metrics": os.path.join(KFPMETA_DIR, "mlpipeline-metrics.json"),
         },
     )
     cop = add_default_function_resources(cop)

mlrun/model.py CHANGED Viewed

@@ -62,6 +62,7 @@ class ModelObj:
             return new_type.from_dict(param)
         return param
+    @mlrun.utils.filter_warnings("ignore", FutureWarning)
     def to_dict(self, fields=None, exclude=None):
         """convert the object to a python dictionary
@@ -359,6 +360,7 @@ class ImageBuilder(ModelObj):
         requirements: list = None,
         extra_args=None,
         builder_env=None,
+        source_code_target_dir=None,
     ):
         self.functionSourceCode = functionSourceCode  #: functionSourceCode
         self.codeEntryType = ""  #: codeEntryType
@@ -379,6 +381,7 @@ class ImageBuilder(ModelObj):
         self.auto_build = auto_build  #: auto_build
         self.build_pod = None
         self.requirements = requirements or []  #: pip requirements
+        self.source_code_target_dir = source_code_target_dir or None
     @property
     def source(self):
@@ -415,6 +418,7 @@ class ImageBuilder(ModelObj):
         overwrite=False,
         builder_env=None,
         extra_args=None,
+        source_code_target_dir=None,
     ):
         if image:
             self.image = image
@@ -440,6 +444,8 @@ class ImageBuilder(ModelObj):
             self.builder_env = builder_env
         if extra_args:
             self.extra_args = extra_args
+        if source_code_target_dir:
+            self.source_code_target_dir = source_code_target_dir
     def with_commands(
         self,

mlrun/model_monitoring/api.py CHANGED Viewed

@@ -436,9 +436,9 @@ def _generate_model_endpoint(
         ] = possible_drift_threshold
     model_endpoint.spec.monitoring_mode = monitoring_mode
-    model_endpoint.status.first_request = (
-        model_endpoint.status.last_request
-    ) = datetime_now().isoformat()
+    model_endpoint.status.first_request = model_endpoint.status.last_request = (
+        datetime_now().isoformat()
+    )
     if sample_set_statistics:
         model_endpoint.status.feature_stats = sample_set_statistics
@@ -476,11 +476,11 @@ def trigger_drift_batch_job(
         db_session = mlrun.get_run_db()
     # Register the monitoring batch job (do nothing if already exist) and get the job function as a dictionary
-    batch_function_dict: typing.Dict[
-        str, typing.Any
-    ] = db_session.deploy_monitoring_batch_job(
-        project=project,
-        default_batch_image=default_batch_image,
+    batch_function_dict: typing.Dict[str, typing.Any] = (
+        db_session.deploy_monitoring_batch_job(
+            project=project,
+            default_batch_image=default_batch_image,
+        )
     )
     # Prepare current run params

mlrun/model_monitoring/batch.py CHANGED Viewed

@@ -992,7 +992,7 @@ class BatchProcessor:
         """
         stream_http_path = (
             mlrun.mlconf.model_endpoint_monitoring.default_http_sink.format(
-                project=self.project
+                project=self.project, namespace=mlrun.mlconf.namespace
             )
         )

mlrun/model_monitoring/controller.py CHANGED Viewed

@@ -426,13 +426,6 @@ class MonitoringApplicationController:
             m_fs = fstore.get_feature_set(
                 endpoint[mm_constants.EventFieldType.FEATURE_SET_URI]
             )
-            labels = endpoint[mm_constants.EventFieldType.LABEL_NAMES]
-            if labels:
-                if isinstance(labels, str):
-                    labels = json.loads(labels)
-                for label in labels:
-                    if label not in list(m_fs.spec.features.keys()):
-                        m_fs.add_feature(fstore.Feature(name=label, value_type="float"))
             for application in applications_names:
                 batch_window = batch_window_generator.get_batch_window(

mlrun 1.6.2rc6__py3-none-any.whl → 1.6.3rc3__py3-none-any.whl

Potentially problematic release.

mlrun 1.6.2rc6py3-none-any.whl → 1.6.3rc3py3-none-any.whl