PyPI - mlrun - Versions diffs - 1.7.0rc7__py3-none-any.whl → 1.7.0rc11__py3-none-any.whl - Mend

mlrun 1.7.0rc7py3-none-any.whl → 1.7.0rc11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (91) hide show

mlrun/__init__.py +1 -0
mlrun/__main__.py +2 -0
mlrun/artifacts/model.py +29 -25
mlrun/common/schemas/__init__.py +4 -0
mlrun/common/schemas/alert.py +122 -0
mlrun/common/schemas/api_gateway.py +8 -1
mlrun/common/schemas/auth.py +4 -0
mlrun/common/schemas/client_spec.py +1 -0
mlrun/common/schemas/hub.py +7 -9
mlrun/common/schemas/model_monitoring/constants.py +4 -2
mlrun/{datastore/helpers.py → common/schemas/pagination.py} +11 -3
mlrun/common/schemas/project.py +15 -10
mlrun/config.py +35 -13
mlrun/datastore/__init__.py +3 -7
mlrun/datastore/base.py +6 -5
mlrun/datastore/datastore_profile.py +19 -1
mlrun/datastore/snowflake_utils.py +43 -0
mlrun/datastore/sources.py +18 -30
mlrun/datastore/targets.py +140 -12
mlrun/datastore/utils.py +10 -5
mlrun/datastore/v3io.py +27 -50
mlrun/db/base.py +88 -2
mlrun/db/httpdb.py +314 -41
mlrun/db/nopdb.py +142 -0
mlrun/execution.py +21 -14
mlrun/feature_store/api.py +9 -5
mlrun/feature_store/feature_set.py +39 -23
mlrun/feature_store/feature_vector.py +2 -1
mlrun/feature_store/retrieval/spark_merger.py +27 -23
mlrun/feature_store/steps.py +30 -19
mlrun/features.py +4 -13
mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
mlrun/frameworks/lgbm/__init__.py +1 -1
mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
mlrun/frameworks/lgbm/model_handler.py +1 -1
mlrun/frameworks/pytorch/__init__.py +2 -2
mlrun/frameworks/sklearn/__init__.py +1 -1
mlrun/frameworks/tf_keras/__init__.py +1 -1
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
mlrun/frameworks/xgboost/__init__.py +1 -1
mlrun/kfpops.py +2 -5
mlrun/launcher/base.py +1 -1
mlrun/launcher/client.py +2 -2
mlrun/model.py +2 -2
mlrun/model_monitoring/application.py +11 -2
mlrun/model_monitoring/applications/histogram_data_drift.py +3 -3
mlrun/model_monitoring/controller.py +2 -3
mlrun/model_monitoring/helpers.py +3 -1
mlrun/model_monitoring/stream_processing.py +0 -1
mlrun/model_monitoring/writer.py +32 -0
mlrun/package/packagers_manager.py +1 -0
mlrun/platforms/__init__.py +1 -1
mlrun/platforms/other.py +1 -1
mlrun/projects/operations.py +11 -4
mlrun/projects/pipelines.py +1 -1
mlrun/projects/project.py +180 -73
mlrun/run.py +77 -41
mlrun/runtimes/__init__.py +16 -0
mlrun/runtimes/base.py +4 -1
mlrun/runtimes/kubejob.py +26 -121
mlrun/runtimes/mpijob/abstract.py +8 -8
mlrun/runtimes/nuclio/api_gateway.py +58 -8
mlrun/runtimes/nuclio/application/application.py +79 -1
mlrun/runtimes/nuclio/application/reverse_proxy.go +9 -1
mlrun/runtimes/nuclio/function.py +20 -13
mlrun/runtimes/nuclio/serving.py +11 -10
mlrun/runtimes/pod.py +148 -3
mlrun/runtimes/utils.py +0 -28
mlrun/secrets.py +6 -2
mlrun/serving/remote.py +2 -3
mlrun/serving/routers.py +7 -4
mlrun/serving/server.py +1 -1
mlrun/serving/states.py +14 -38
mlrun/serving/v2_serving.py +8 -7
mlrun/utils/helpers.py +1 -1
mlrun/utils/http.py +1 -1
mlrun/utils/notifications/notification/base.py +12 -0
mlrun/utils/notifications/notification/console.py +2 -0
mlrun/utils/notifications/notification/git.py +3 -1
mlrun/utils/notifications/notification/ipython.py +2 -0
mlrun/utils/notifications/notification/slack.py +41 -13
mlrun/utils/notifications/notification/webhook.py +11 -1
mlrun/utils/retryer.py +3 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.0rc7.dist-info → mlrun-1.7.0rc11.dist-info}/METADATA +15 -15
{mlrun-1.7.0rc7.dist-info → mlrun-1.7.0rc11.dist-info}/RECORD +91 -89
{mlrun-1.7.0rc7.dist-info → mlrun-1.7.0rc11.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc7.dist-info → mlrun-1.7.0rc11.dist-info}/WHEEL +0 -0
{mlrun-1.7.0rc7.dist-info → mlrun-1.7.0rc11.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc7.dist-info → mlrun-1.7.0rc11.dist-info}/top_level.txt +0 -0

mlrun/datastore/snowflake_utils.py ADDED Viewed

@@ -0,0 +1,43 @@
+# Copyright 2024 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import mlrun
+def get_snowflake_password():
+    key = "SNOWFLAKE_PASSWORD"
+    snowflake_password = mlrun.get_secret_or_env(key)
+    if not snowflake_password:
+        raise mlrun.errors.MLRunInvalidArgumentError(
+            f"No password provided. Set password using the {key} "
+            "project secret or environment variable."
+        )
+    return snowflake_password
+def get_snowflake_spark_options(attributes):
+    return {
+        "format": "net.snowflake.spark.snowflake",
+        "sfURL": attributes.get("url"),
+        "sfUser": attributes.get("user"),
+        "sfPassword": get_snowflake_password(),
+        "sfDatabase": attributes.get("database"),
+        "sfSchema": attributes.get("schema"),
+        "sfWarehouse": attributes.get("warehouse"),
+        "application": "iguazio_platform",
+        "TIMESTAMP_TYPE_MAPPING": "TIMESTAMP_LTZ",
+    }

mlrun/datastore/sources.py CHANGED Viewed

@@ -28,6 +28,7 @@ from nuclio.config import split_path
 import mlrun
 from mlrun.config import config
+from mlrun.datastore.snowflake_utils import get_snowflake_spark_options
 from mlrun.secrets import SecretsStore
 from ..model import DataSource
@@ -113,7 +114,11 @@ class BaseSourceDriver(DataSource):
     def to_spark_df(self, session, named_view=False, time_field=None, columns=None):
         if self.support_spark:
-            df = load_spark_dataframe_with_options(session, self.get_spark_options())
+            spark_options = self.get_spark_options()
+            spark_format = spark_options.pop("format", None)
+            df = load_spark_dataframe_with_options(
+                session, spark_options, format=spark_format
+            )
             if named_view:
                 df.createOrReplaceTempView(self.name)
             return self._filter_spark_df(df, time_field, columns)
@@ -401,12 +406,17 @@ class BigQuerySource(BaseSourceDriver):
          # use sql query
          query_string = "SELECT * FROM `the-psf.pypi.downloads20210328` LIMIT 5000"
-         source = BigQuerySource("bq1", query=query_string,
-                                 gcp_project="my_project",
-                                 materialization_dataset="dataviews")
+         source = BigQuerySource(
+             "bq1",
+             query=query_string,
+             gcp_project="my_project",
+             materialization_dataset="dataviews",
+         )
          # read a table
-         source = BigQuerySource("bq2", table="the-psf.pypi.downloads20210328", gcp_project="my_project")
+         source = BigQuerySource(
+             "bq2", table="the-psf.pypi.downloads20210328", gcp_project="my_project"
+         )
     :parameter name: source name
@@ -673,32 +683,10 @@ class SnowflakeSource(BaseSourceDriver):
             **kwargs,
         )
-    def _get_password(self):
-        key = "SNOWFLAKE_PASSWORD"
-        snowflake_password = os.getenv(key) or os.getenv(
-            SecretsStore.k8s_env_variable_name_for_secret(key)
-        )
-        if not snowflake_password:
-            raise mlrun.errors.MLRunInvalidArgumentError(
-                "No password provided. Set password using the SNOWFLAKE_PASSWORD "
-                "project secret or environment variable."
-            )
-        return snowflake_password
     def get_spark_options(self):
-        return {
-            "format": "net.snowflake.spark.snowflake",
-            "query": self.attributes.get("query"),
-            "sfURL": self.attributes.get("url"),
-            "sfUser": self.attributes.get("user"),
-            "sfPassword": self._get_password(),
-            "sfDatabase": self.attributes.get("database"),
-            "sfSchema": self.attributes.get("schema"),
-            "sfWarehouse": self.attributes.get("warehouse"),
-            "application": "iguazio_platform",
-        }
+        spark_options = get_snowflake_spark_options(self.attributes)
+        spark_options["query"] = self.attributes.get("query")
+        return spark_options
 class CustomSource(BaseSourceDriver):

mlrun/datastore/targets.py CHANGED Viewed

@@ -17,6 +17,7 @@ import os
 import random
 import sys
 import time
+import warnings
 from collections import Counter
 from copy import copy
 from typing import Any, Optional, Union
@@ -28,6 +29,7 @@ from mergedeep import merge
 import mlrun
 import mlrun.utils.helpers
 from mlrun.config import config
+from mlrun.datastore.snowflake_utils import get_snowflake_spark_options
 from mlrun.model import DataSource, DataTarget, DataTargetBase, TargetPathObject
 from mlrun.utils import logger, now_date
 from mlrun.utils.helpers import to_parquet
@@ -57,6 +59,7 @@ class TargetTypes:
     dataframe = "dataframe"
     custom = "custom"
     sql = "sql"
+    snowflake = "snowflake"
     @staticmethod
     def all():
@@ -71,6 +74,7 @@ class TargetTypes:
             TargetTypes.dataframe,
             TargetTypes.custom,
             TargetTypes.sql,
+            TargetTypes.snowflake,
         ]
@@ -78,11 +82,14 @@ def generate_target_run_id():
     return f"{round(time.time() * 1000)}_{random.randint(0, 999)}"
-def write_spark_dataframe_with_options(spark_options, df, mode):
+def write_spark_dataframe_with_options(spark_options, df, mode, write_format=None):
     non_hadoop_spark_options = spark_session_update_hadoop_options(
         df.sql_ctx.sparkSession, spark_options
     )
-    df.write.mode(mode).save(**non_hadoop_spark_options)
+    if write_format:
+        df.write.format(write_format).mode(mode).save(**non_hadoop_spark_options)
+    else:
+        df.write.mode(mode).save(**non_hadoop_spark_options)
 def default_target_names():
@@ -497,7 +504,10 @@ class BaseStoreTarget(DataTargetBase):
             options = self.get_spark_options(key_column, timestamp_key)
             options.update(kwargs)
             df = self.prepare_spark_df(df, key_column, timestamp_key, options)
-            write_spark_dataframe_with_options(options, df, "overwrite")
+            write_format = options.pop("format", None)
+            write_spark_dataframe_with_options(
+                options, df, "overwrite", write_format=write_format
+            )
         elif hasattr(df, "dask"):
             dask_options = self.get_dask_options()
             store, path_in_store, target_path = self._get_store_and_path()
@@ -524,7 +534,12 @@ class BaseStoreTarget(DataTargetBase):
             store, path_in_store, target_path = self._get_store_and_path()
             target_path = generate_path_with_chunk(self, chunk_id, target_path)
             file_system = store.filesystem
-            if file_system.protocol == "file":
+            if (
+                file_system.protocol == "file"
+                # fsspec 2023.10.0 changed protocol from "file" to ("file", "local")
+                or isinstance(file_system.protocol, (tuple, list))
+                and "file" in file_system.protocol
+            ):
                 dir = os.path.dirname(target_path)
                 if dir:
                     os.makedirs(dir, exist_ok=True)
@@ -1108,6 +1123,97 @@ class CSVTarget(BaseStoreTarget):
         return True
+class SnowflakeTarget(BaseStoreTarget):
+    """
+    :param attributes: A dictionary of attributes for Snowflake connection; will be overridden by database parameters
+                       if they exist.
+    :param url: Snowflake hostname, in the format: <account_name>.<region>.snowflakecomputing.com
+    :param user: Snowflake user for login
+    :param db_schema: Database schema
+    :param database: Database name
+    :param warehouse: Snowflake warehouse name
+    :param table_name: Snowflake table name
+    """
+    support_spark = True
+    support_append = True
+    is_offline = True
+    kind = TargetTypes.snowflake
+    def __init__(
+        self,
+        name: str = "",
+        path=None,
+        attributes: dict[str, str] = None,
+        after_step=None,
+        columns=None,
+        partitioned: bool = False,
+        key_bucketing_number: Optional[int] = None,
+        partition_cols: Optional[list[str]] = None,
+        time_partitioning_granularity: Optional[str] = None,
+        max_events: Optional[int] = None,
+        flush_after_seconds: Optional[int] = None,
+        storage_options: dict[str, str] = None,
+        schema: dict[str, Any] = None,
+        credentials_prefix=None,
+        url: str = None,
+        user: str = None,
+        db_schema: str = None,
+        database: str = None,
+        warehouse: str = None,
+        table_name: str = None,
+    ):
+        attrs = {
+            "url": url,
+            "user": user,
+            "database": database,
+            "schema": db_schema,
+            "warehouse": warehouse,
+            "table": table_name,
+        }
+        extended_attrs = {
+            key: value for key, value in attrs.items() if value is not None
+        }
+        attributes = {} if not attributes else attributes
+        attributes.update(extended_attrs)
+        super().__init__(
+            name,
+            path,
+            attributes,
+            after_step,
+            list(schema.keys()) if schema else columns,
+            partitioned,
+            key_bucketing_number,
+            partition_cols,
+            time_partitioning_granularity,
+            max_events=max_events,
+            flush_after_seconds=flush_after_seconds,
+            storage_options=storage_options,
+            schema=schema,
+            credentials_prefix=credentials_prefix,
+        )
+    def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
+        spark_options = get_snowflake_spark_options(self.attributes)
+        spark_options["dbtable"] = self.attributes.get("table")
+        return spark_options
+    def purge(self):
+        pass
+    def as_df(
+        self,
+        columns=None,
+        df_module=None,
+        entities=None,
+        start_time=None,
+        end_time=None,
+        time_column=None,
+        **kwargs,
+    ):
+        raise NotImplementedError()
 class NoSqlBaseTarget(BaseStoreTarget):
     is_table = True
     is_online = True
@@ -1179,7 +1285,10 @@ class NoSqlBaseTarget(BaseStoreTarget):
             options = self.get_spark_options(key_column, timestamp_key)
             options.update(kwargs)
             df = self.prepare_spark_df(df)
-            write_spark_dataframe_with_options(options, df, "overwrite")
+            write_format = options.pop("format", None)
+            write_spark_dataframe_with_options(
+                options, df, "overwrite", write_format=write_format
+            )
         else:
             # To prevent modification of the original dataframe and make sure
             # that the last event of a key is the one being persisted
@@ -1419,11 +1528,27 @@ class KafkaTarget(BaseStoreTarget):
         *args,
         bootstrap_servers=None,
         producer_options=None,
+        brokers=None,
         **kwargs,
     ):
         attrs = {}
-        if bootstrap_servers is not None:
-            attrs["bootstrap_servers"] = bootstrap_servers
+        # TODO: Remove this in 1.9.0
+        if bootstrap_servers:
+            if brokers:
+                raise mlrun.errors.MLRunInvalidArgumentError(
+                    "KafkaTarget cannot be created with both the 'brokers' parameter and the deprecated "
+                    "'bootstrap_servers' parameter. Please use 'brokers' only."
+                )
+            warnings.warn(
+                "'bootstrap_servers' parameter is deprecated in 1.7.0 and will be removed in 1.9.0, "
+                "use 'brokers' instead.",
+                FutureWarning,
+            )
+            brokers = bootstrap_servers
+        if brokers:
+            attrs["brokers"] = brokers
         if producer_options is not None:
             attrs["producer_options"] = producer_options
@@ -1445,14 +1570,16 @@ class KafkaTarget(BaseStoreTarget):
         if self.path and self.path.startswith("ds://"):
             datastore_profile = datastore_profile_read(self.path)
             attributes = datastore_profile.attributes()
-            bootstrap_servers = attributes.pop("bootstrap_servers", None)
+            brokers = attributes.pop(
+                "brokers", attributes.pop("bootstrap_servers", None)
+            )
             topic = datastore_profile.topic
         else:
             attributes = copy(self.attributes)
-            bootstrap_servers = attributes.pop("bootstrap_servers", None)
-            topic, bootstrap_servers = parse_kafka_url(
-                self.get_target_path(), bootstrap_servers
+            brokers = attributes.pop(
+                "brokers", attributes.pop("bootstrap_servers", None)
             )
+            topic, brokers = parse_kafka_url(self.get_target_path(), brokers)
         if not topic:
             raise mlrun.errors.MLRunInvalidArgumentError(
@@ -1466,7 +1593,7 @@ class KafkaTarget(BaseStoreTarget):
             class_name="storey.KafkaTarget",
             columns=column_list,
             topic=topic,
-            bootstrap_servers=bootstrap_servers,
+            brokers=brokers,
             **attributes,
         )
@@ -1957,6 +2084,7 @@ kind_to_driver = {
     TargetTypes.tsdb: TSDBTarget,
     TargetTypes.custom: CustomTarget,
     TargetTypes.sql: SQLTarget,
+    TargetTypes.snowflake: SnowflakeTarget,
 }

mlrun/datastore/utils.py CHANGED Viewed

@@ -23,24 +23,29 @@ import semver
 import mlrun.datastore
-def parse_kafka_url(url: str, bootstrap_servers: list = None) -> tuple[str, list]:
+def parse_kafka_url(
+    url: str, brokers: typing.Union[list, str] = None
+) -> tuple[str, list]:
     """Generating Kafka topic and adjusting a list of bootstrap servers.
     :param url:               URL path to parse using urllib.parse.urlparse.
-    :param bootstrap_servers: List of bootstrap servers for the kafka brokers.
+    :param brokers: List of kafka brokers.
     :return: A tuple of:
          [0] = Kafka topic value
          [1] = List of bootstrap servers
     """
-    bootstrap_servers = bootstrap_servers or []
+    brokers = brokers or []
+    if isinstance(brokers, str):
+        brokers = brokers.split(",")
     # Parse the provided URL into six components according to the general structure of a URL
     url = urlparse(url)
     # Add the network location to the bootstrap servers list
     if url.netloc:
-        bootstrap_servers = [url.netloc] + bootstrap_servers
+        brokers = [url.netloc] + brokers
     # Get the topic value from the parsed url
     query_dict = parse_qs(url.query)
@@ -49,7 +54,7 @@ def parse_kafka_url(url: str, bootstrap_servers: list = None) -> tuple[str, list
     else:
         topic = url.path
         topic = topic.lstrip("/")
-    return topic, bootstrap_servers
+    return topic, brokers
 def upload_tarball(source_dir, target, secrets=None):

mlrun/datastore/v3io.py CHANGED Viewed

@@ -12,8 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import mmap
-import os
 import time
 from datetime import datetime
@@ -22,7 +20,6 @@ import v3io
 from v3io.dataplane.response import HttpResponseError
 import mlrun
-from mlrun.datastore.helpers import ONE_GB, ONE_MB
 from ..platforms.iguazio import parse_path, split_path
 from .base import (
@@ -32,6 +29,7 @@ from .base import (
 )
 V3IO_LOCAL_ROOT = "v3io"
+V3IO_DEFAULT_UPLOAD_CHUNK_SIZE = 1024 * 1024 * 100
 class V3ioStore(DataStore):
@@ -98,46 +96,28 @@ class V3ioStore(DataStore):
         )
         return self._sanitize_storage_options(res)
-    def _upload(self, key: str, src_path: str, max_chunk_size: int = ONE_GB):
+    def _upload(
+        self,
+        key: str,
+        src_path: str,
+        max_chunk_size: int = V3IO_DEFAULT_UPLOAD_CHUNK_SIZE,
+    ):
         """helper function for upload method, allows for controlling max_chunk_size in testing"""
         container, path = split_path(self._join(key))
-        file_size = os.path.getsize(src_path)  # in bytes
-        if file_size <= ONE_MB:
-            with open(src_path, "rb") as source_file:
-                data = source_file.read()
-            self._do_object_request(
-                self.object.put,
-                container=container,
-                path=path,
-                body=data,
-                append=False,
-            )
-            return
-        # chunk must be a multiple of the ALLOCATIONGRANULARITY
-        # https://docs.python.org/3/library/mmap.html
-        if residue := max_chunk_size % mmap.ALLOCATIONGRANULARITY:
-            # round down to the nearest multiple of ALLOCATIONGRANULARITY
-            max_chunk_size -= residue
         with open(src_path, "rb") as file_obj:
-            file_offset = 0
-            while file_offset < file_size:
-                chunk_size = min(file_size - file_offset, max_chunk_size)
-                with mmap.mmap(
-                    file_obj.fileno(),
-                    length=chunk_size,
-                    access=mmap.ACCESS_READ,
-                    offset=file_offset,
-                ) as mmap_obj:
-                    append = file_offset != 0
-                    self._do_object_request(
-                        self.object.put,
-                        container=container,
-                        path=path,
-                        body=mmap_obj,
-                        append=append,
-                    )
-                    file_offset += chunk_size
+            append = False
+            while True:
+                data = memoryview(file_obj.read(max_chunk_size))
+                if not data:
+                    break
+                self._do_object_request(
+                    self.object.put,
+                    container=container,
+                    path=path,
+                    body=data,
+                    append=append,
+                )
+                append = True
     def upload(self, key, src_path):
         return self._upload(key, src_path)
@@ -152,19 +132,16 @@ class V3ioStore(DataStore):
             num_bytes=size,
         ).body
-    def _put(self, key, data, append=False, max_chunk_size: int = ONE_GB):
+    def _put(
+        self,
+        key,
+        data,
+        append=False,
+        max_chunk_size: int = V3IO_DEFAULT_UPLOAD_CHUNK_SIZE,
+    ):
         """helper function for put method, allows for controlling max_chunk_size in testing"""
         container, path = split_path(self._join(key))
         buffer_size = len(data)  # in bytes
-        if buffer_size <= ONE_MB:
-            self._do_object_request(
-                self.object.put,
-                container=container,
-                path=path,
-                body=data,
-                append=append,
-            )
-            return
         buffer_offset = 0
         try:
             data = memoryview(data)

mlrun 1.7.0rc7__py3-none-any.whl → 1.7.0rc11__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc7py3-none-any.whl → 1.7.0rc11py3-none-any.whl