PyPI - mlrun - Versions diffs - 1.7.0rc38__py3-none-any.whl → 1.7.0rc40__py3-none-any.whl - Mend

mlrun 1.7.0rc38py3-none-any.whl → 1.7.0rc40py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (42) hide show

mlrun/alerts/alert.py +30 -27
mlrun/common/schemas/alert.py +3 -0
mlrun/common/schemas/notification.py +1 -0
mlrun/datastore/alibaba_oss.py +2 -2
mlrun/datastore/azure_blob.py +6 -3
mlrun/datastore/base.py +1 -1
mlrun/datastore/dbfs_store.py +2 -2
mlrun/datastore/google_cloud_storage.py +83 -20
mlrun/datastore/s3.py +2 -2
mlrun/datastore/sources.py +54 -0
mlrun/datastore/targets.py +9 -53
mlrun/db/httpdb.py +6 -1
mlrun/errors.py +8 -0
mlrun/execution.py +7 -0
mlrun/feature_store/api.py +5 -0
mlrun/feature_store/retrieval/job.py +1 -0
mlrun/model.py +24 -3
mlrun/model_monitoring/api.py +9 -0
mlrun/model_monitoring/applications/_application_steps.py +36 -0
mlrun/model_monitoring/applications/histogram_data_drift.py +15 -13
mlrun/model_monitoring/controller.py +15 -11
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +5 -5
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +85 -47
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +35 -7
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +3 -1
mlrun/model_monitoring/helpers.py +16 -17
mlrun/model_monitoring/stream_processing.py +2 -3
mlrun/projects/pipelines.py +19 -30
mlrun/projects/project.py +69 -51
mlrun/run.py +8 -6
mlrun/runtimes/__init__.py +4 -0
mlrun/runtimes/nuclio/api_gateway.py +9 -0
mlrun/runtimes/nuclio/application/application.py +112 -54
mlrun/runtimes/nuclio/function.py +1 -1
mlrun/utils/helpers.py +33 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc40.dist-info}/METADATA +8 -11
{mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc40.dist-info}/RECORD +42 -42
{mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc40.dist-info}/WHEEL +1 -1
{mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc40.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc40.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc40.dist-info}/top_level.txt +0 -0

mlrun/alerts/alert.py CHANGED Viewed

@@ -62,6 +62,7 @@ class AlertConfig(ModelObj):
             # create an alert on endpoint_id, which will be triggered to slack if there is a "data_drift_detected" event
             # 3 times in the next hour.
             from mlrun.alerts import AlertConfig
             import mlrun.common.schemas.alert as alert_objects
@@ -94,29 +95,29 @@ class AlertConfig(ModelObj):
             )
             project.store_alert_config(alert_data)
-        :param project:        name of the project to associate the alert with
-        :param name:           name of the alert
-        :param template:       optional parameter that allows to create an alert based on a predefined template.
-                               you can pass either an AlertTemplate object or a string (the template name).
-                               if a template is used, many fields of the alert will be auto-generated based on the
-                               template. however, you still need to provide the following fields:
+        :param project:        Name of the project to associate the alert with
+        :param name:           Name of the alert
+        :param template:       Optional parameter that allows creating an alert based on a predefined template.
+                               You can pass either an AlertTemplate object or a string (the template name).
+                               If a template is used, many fields of the alert will be auto-generated based on the
+                               template.However, you still need to provide the following fields:
                                `name`, `project`, `entity`, `notifications`
-        :param description:    description of the alert
-        :param summary:        summary of the alert, will be sent in the generated notifications
-        :param severity:       severity of the alert
-        :param trigger:        the events that will trigger this alert, may be a simple trigger based on events or
+        :param description:    Description of the alert
+        :param summary:        Summary of the alert, will be sent in the generated notifications
+        :param severity:       Severity of the alert
+        :param trigger:        The events that will trigger this alert, may be a simple trigger based on events or
                                complex trigger which is based on a prometheus alert
-        :param criteria:       when the alert will be triggered based on the specified number of events within the
+        :param criteria:       When the alert will be triggered based on the specified number of events within the
                                defined time period.
-        :param reset_policy:   when to clear the alert. May be "manual" for manual reset of the alert, or
+        :param reset_policy:   When to clear the alert. May be "manual" for manual reset of the alert, or
                                "auto" if the criteria contains a time period
-        :param notifications:  list of notifications to invoke once the alert is triggered
-        :param entities:       entities that the event relates to. The entity object will contain fields that uniquely
-                               identify a given entity in the system
-        :param id:             internal id of the alert (user should not supply it)
-        :param state:          state of the alert, may be active/inactive (user should not supply it)
-        :param created:        when the alert is created (user should not supply it)
-        :param count:          internal counter of the alert (user should not supply it)
+        :param notifications:  List of notifications to invoke once the alert is triggered
+        :param entities:       Entities that the event relates to. The entity object will contain fields that
+                               uniquely identify a given entity in the system
+        :param id:             Internal id of the alert (user should not supply it)
+        :param state:          State of the alert, may be active/inactive (user should not supply it)
+        :param created:        When the alert is created (user should not supply it)
+        :param count:          Internal counter of the alert (user should not supply it)
         """
         self.project = project
         self.name = name
@@ -137,8 +138,8 @@ class AlertConfig(ModelObj):
             self._apply_template(template)
     def validate_required_fields(self):
-        if not self.project or not self.name:
-            raise mlrun.errors.MLRunBadRequestError("Project and name must be provided")
+        if not self.name:
+            raise mlrun.errors.MLRunInvalidArgumentError("Alert name must be provided")
     def _serialize_field(
         self, struct: dict, field_name: str = None, strip: bool = False
@@ -237,9 +238,11 @@ class AlertConfig(ModelObj):
             db = mlrun.get_run_db()
             template = db.get_alert_template(template)
-        # Extract parameters from the template and apply them to the AlertConfig object
-        self.summary = template.summary
-        self.severity = template.severity
-        self.criteria = template.criteria
-        self.trigger = template.trigger
-        self.reset_policy = template.reset_policy
+        # Apply parameters from the template to the AlertConfig object only if they are not already specified by the
+        # user in the current configuration.
+        # User-provided parameters will take precedence over corresponding template values
+        self.summary = self.summary or template.summary
+        self.severity = self.severity or template.severity
+        self.criteria = self.criteria or template.criteria
+        self.trigger = self.trigger or template.trigger
+        self.reset_policy = self.reset_policy or template.reset_policy

mlrun/common/schemas/alert.py CHANGED Viewed

@@ -23,6 +23,7 @@ from mlrun.common.types import StrEnum
 class EventEntityKind(StrEnum):
     MODEL_ENDPOINT_RESULT = "model-endpoint-result"
+    MODEL_MONITORING_APPLICATION = "model-monitoring-application"
     JOB = "job"
@@ -43,6 +44,7 @@ class EventKind(StrEnum):
     SYSTEM_PERFORMANCE_SUSPECTED = "system_performance_suspected"
     MM_APP_ANOMALY_DETECTED = "mm_app_anomaly_detected"
     MM_APP_ANOMALY_SUSPECTED = "mm_app_anomaly_suspected"
+    MM_APP_FAILED = "mm_app_failed"
     FAILED = "failed"
@@ -57,6 +59,7 @@ _event_kind_entity_map = {
     EventKind.SYSTEM_PERFORMANCE_SUSPECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
     EventKind.MM_APP_ANOMALY_DETECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
     EventKind.MM_APP_ANOMALY_SUSPECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
+    EventKind.MM_APP_FAILED: [EventEntityKind.MODEL_MONITORING_APPLICATION],
     EventKind.FAILED: [EventEntityKind.JOB],
 }

mlrun/common/schemas/notification.py CHANGED Viewed

@@ -52,6 +52,7 @@ class NotificationLimits(enum.Enum):
 class Notification(pydantic.BaseModel):
     """
     Notification object schema
     :param kind: notification implementation kind - slack, webhook, etc.
     :param name: for logging and identification
     :param message: message content in the notification

mlrun/datastore/alibaba_oss.py CHANGED Viewed

@@ -22,7 +22,7 @@ from fsspec.registry import get_filesystem_class
 import mlrun.errors
-from .base import DataStore, FileStats, makeDatastoreSchemaSanitizer
+from .base import DataStore, FileStats, make_datastore_schema_sanitizer
 class OSSStore(DataStore):
@@ -53,7 +53,7 @@ class OSSStore(DataStore):
         except ImportError as exc:
             raise ImportError("ALIBABA ossfs not installed") from exc
         filesystem_class = get_filesystem_class(protocol=self.kind)
-        self._filesystem = makeDatastoreSchemaSanitizer(
+        self._filesystem = make_datastore_schema_sanitizer(
             filesystem_class,
             using_bucket=self.using_bucket,
             **self.get_storage_options(),

mlrun/datastore/azure_blob.py CHANGED Viewed

@@ -22,7 +22,7 @@ from fsspec.registry import get_filesystem_class
 import mlrun.errors
-from .base import DataStore, FileStats, makeDatastoreSchemaSanitizer
+from .base import DataStore, FileStats, make_datastore_schema_sanitizer
 # Azure blobs will be represented with the following URL: az://<container name>. The storage account is already
 # pointed to by the connection string, so the user is not expected to specify it in any way.
@@ -41,6 +41,9 @@ class AzureBlobStore(DataStore):
         self._service_client = None
         self._storage_options = None
+    def get_storage_options(self):
+        return self.storage_options
     @property
     def storage_options(self):
         if not self._storage_options:
@@ -75,7 +78,7 @@ class AzureBlobStore(DataStore):
         if not self._filesystem:
             # in order to support az and wasbs kinds
             filesystem_class = get_filesystem_class(protocol=self.kind)
-            self._filesystem = makeDatastoreSchemaSanitizer(
+            self._filesystem = make_datastore_schema_sanitizer(
                 filesystem_class,
                 using_bucket=self.using_bucket,
                 blocksize=self.max_blocksize,
@@ -225,7 +228,7 @@ class AzureBlobStore(DataStore):
     def get_spark_options(self):
         res = {}
-        st = self.storage_options()
+        st = self.storage_options
         service = "blob"
         primary_url = None
         if st.get("connection_string"):

mlrun/datastore/base.py CHANGED Viewed

@@ -748,7 +748,7 @@ class HttpStore(DataStore):
 # As an example, it converts an S3 URL 's3://s3bucket/path' to just 's3bucket/path'.
 # Since 'ds' schemas are not inherently processed by fsspec, we have adapted the _strip_protocol()
 # method specifically to strip away the 'ds' schema as required.
-def makeDatastoreSchemaSanitizer(cls, using_bucket=False, *args, **kwargs):
+def make_datastore_schema_sanitizer(cls, using_bucket=False, *args, **kwargs):
     if not issubclass(cls, fsspec.AbstractFileSystem):
         raise ValueError("Class must be a subclass of fsspec.AbstractFileSystem")

mlrun/datastore/dbfs_store.py CHANGED Viewed

@@ -19,7 +19,7 @@ from fsspec.registry import get_filesystem_class
 import mlrun.errors
-from .base import DataStore, FileStats, makeDatastoreSchemaSanitizer
+from .base import DataStore, FileStats, make_datastore_schema_sanitizer
 class DatabricksFileBugFixed(DatabricksFile):
@@ -89,7 +89,7 @@ class DBFSStore(DataStore):
         """return fsspec file system object, if supported"""
         filesystem_class = get_filesystem_class(protocol=self.kind)
         if not self._filesystem:
-            self._filesystem = makeDatastoreSchemaSanitizer(
+            self._filesystem = make_datastore_schema_sanitizer(
                 cls=filesystem_class,
                 using_bucket=False,
                 **self.get_storage_options(),

mlrun/datastore/google_cloud_storage.py CHANGED Viewed

@@ -12,44 +12,82 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import json
+import os
 from pathlib import Path
 from fsspec.registry import get_filesystem_class
+from google.auth.credentials import Credentials
+from google.cloud.storage import Client, transfer_manager
+from google.oauth2 import service_account
 import mlrun.errors
 from mlrun.utils import logger
-from .base import DataStore, FileStats, makeDatastoreSchemaSanitizer
+from .base import DataStore, FileStats, make_datastore_schema_sanitizer
 # Google storage objects will be represented with the following URL: gcs://<bucket name>/<path> or gs://...
 class GoogleCloudStorageStore(DataStore):
     using_bucket = True
+    workers = 8
+    chunk_size = 32 * 1024 * 1024
     def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
         super().__init__(parent, name, schema, endpoint, secrets=secrets)
+        self._storage_client = None
+        self._storage_options = None
+    @property
+    def storage_client(self):
+        if self._storage_client:
+            return self._storage_client
+        token = self._get_credentials().get("token")
+        access = "https://www.googleapis.com/auth/devstorage.full_control"
+        if isinstance(token, str):
+            if os.path.exists(token):
+                credentials = service_account.Credentials.from_service_account_file(
+                    token, scopes=[access]
+                )
+            else:
+                raise mlrun.errors.MLRunInvalidArgumentError(
+                    "gcsfs authentication file not found!"
+                )
+        elif isinstance(token, dict):
+            credentials = service_account.Credentials.from_service_account_info(
+                token, scopes=[access]
+            )
+        elif isinstance(token, Credentials):
+            credentials = token
+        else:
+            raise ValueError(f"Unsupported token type: {type(token)}")
+        self._storage_client = Client(credentials=credentials)
+        return self._storage_client
     @property
     def filesystem(self):
         """return fsspec file system object, if supported"""
-        if self._filesystem:
-            return self._filesystem
-        try:
-            import gcsfs  # noqa
-        except ImportError as exc:
-            raise ImportError(
-                "Google gcsfs not installed, run pip install gcsfs"
-            ) from exc
-        filesystem_class = get_filesystem_class(protocol=self.kind)
-        self._filesystem = makeDatastoreSchemaSanitizer(
-            filesystem_class,
-            using_bucket=self.using_bucket,
-            **self.get_storage_options(),
-        )
+        if not self._filesystem:
+            filesystem_class = get_filesystem_class(protocol=self.kind)
+            self._filesystem = make_datastore_schema_sanitizer(
+                filesystem_class,
+                using_bucket=self.using_bucket,
+                **self.storage_options,
+            )
         return self._filesystem
-    def get_storage_options(self):
+    @property
+    def storage_options(self):
+        if self._storage_options:
+            return self._storage_options
+        credentials = self._get_credentials()
+        # due to caching problem introduced in gcsfs 2024.3.1 (ML-7636)
+        credentials["use_listings_cache"] = False
+        self._storage_options = credentials
+        return self._storage_options
+    def _get_credentials(self):
         credentials = self._get_secret_or_env(
             "GCP_CREDENTIALS"
         ) or self._get_secret_or_env("GOOGLE_APPLICATION_CREDENTIALS")
@@ -71,6 +109,9 @@ class GoogleCloudStorageStore(DataStore):
             )
             return self._sanitize_storage_options(None)
+    def get_storage_options(self):
+        return self.storage_options
     def _make_path(self, key):
         key = key.strip("/")
         path = Path(self.endpoint, key).as_posix()
@@ -103,8 +144,29 @@ class GoogleCloudStorageStore(DataStore):
             f.write(data)
     def upload(self, key, src_path):
-        path = self._make_path(key)
-        self.filesystem.put_file(src_path, path, overwrite=True)
+        file_size = os.path.getsize(src_path)
+        united_path = self._make_path(key)
+        # Multiple upload limitation recommendations as described in
+        # https://cloud.google.com/storage/docs/multipart-uploads#storage-upload-object-chunks-python
+        if file_size <= self.chunk_size:
+            self.filesystem.put_file(src_path, united_path, overwrite=True)
+            return
+        bucket = self.storage_client.bucket(self.endpoint)
+        blob = bucket.blob(key.strip("/"))
+        try:
+            transfer_manager.upload_chunks_concurrently(
+                src_path, blob, chunk_size=self.chunk_size, max_workers=self.workers
+            )
+        except Exception as upload_chunks_concurrently_exception:
+            logger.warning(
+                f"gcs: failed to concurrently upload {src_path},"
+                f" exception: {upload_chunks_concurrently_exception}. Retrying with single part upload."
+            )
+            self.filesystem.put_file(src_path, united_path, overwrite=True)
     def stat(self, key):
         path = self._make_path(key)
@@ -133,12 +195,13 @@ class GoogleCloudStorageStore(DataStore):
     def rm(self, path, recursive=False, maxdepth=None):
         path = self._make_path(path)
+        # in order to raise an error in case of a connection error (ML-7056)
         self.filesystem.exists(path)
-        self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)
+        super().rm(path, recursive=recursive, maxdepth=maxdepth)
     def get_spark_options(self):
         res = {}
-        st = self.get_storage_options()
+        st = self._get_credentials()
         if "token" in st:
             res = {"spark.hadoop.google.cloud.auth.service.account.enable": "true"}
             if isinstance(st["token"], str):

mlrun/datastore/s3.py CHANGED Viewed

@@ -20,7 +20,7 @@ from fsspec.registry import get_filesystem_class
 import mlrun.errors
-from .base import DataStore, FileStats, get_range, makeDatastoreSchemaSanitizer
+from .base import DataStore, FileStats, get_range, make_datastore_schema_sanitizer
 class S3Store(DataStore):
@@ -126,7 +126,7 @@ class S3Store(DataStore):
         except ImportError as exc:
             raise ImportError("AWS s3fs not installed") from exc
         filesystem_class = get_filesystem_class(protocol=self.kind)
-        self._filesystem = makeDatastoreSchemaSanitizer(
+        self._filesystem = make_datastore_schema_sanitizer(
             filesystem_class,
             using_bucket=self.using_bucket,
             **self.get_storage_options(),

mlrun/datastore/sources.py CHANGED Viewed

@@ -32,6 +32,7 @@ from mlrun.config import config
 from mlrun.datastore.snowflake_utils import get_snowflake_spark_options
 from mlrun.datastore.utils import transform_list_filters_to_tuple
 from mlrun.secrets import SecretsStore
+from mlrun.utils import logger
 from ..model import DataSource
 from ..platforms.iguazio import parse_path
@@ -1163,6 +1164,59 @@ class KafkaSource(OnlineSource):
             "to a Spark dataframe is not possible, as this operation is not supported by Spark"
         )
+    def create_topics(
+        self,
+        num_partitions: int = 4,
+        replication_factor: int = 1,
+        topics: list[str] = None,
+    ):
+        """
+        Create Kafka topics with the specified number of partitions and replication factor.
+        :param num_partitions:      number of partitions for the topics
+        :param replication_factor:  replication factor for the topics
+        :param topics:              list of topic names to create, if None,
+                                    the topics will be taken from the source attributes
+        """
+        from kafka.admin import KafkaAdminClient, NewTopic
+        brokers = self.attributes.get("brokers")
+        if not brokers:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "brokers must be specified in the KafkaSource attributes"
+            )
+        topics = topics or self.attributes.get("topics")
+        if not topics:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "topics must be specified in the KafkaSource attributes"
+            )
+        new_topics = [
+            NewTopic(topic, num_partitions, replication_factor) for topic in topics
+        ]
+        kafka_admin = KafkaAdminClient(
+            bootstrap_servers=brokers,
+            sasl_mechanism=self.attributes.get("sasl", {}).get("sasl_mechanism"),
+            sasl_plain_username=self.attributes.get("sasl", {}).get("username"),
+            sasl_plain_password=self.attributes.get("sasl", {}).get("password"),
+            sasl_kerberos_service_name=self.attributes.get("sasl", {}).get(
+                "sasl_kerberos_service_name", "kafka"
+            ),
+            sasl_kerberos_domain_name=self.attributes.get("sasl", {}).get(
+                "sasl_kerberos_domain_name"
+            ),
+            sasl_oauth_token_provider=self.attributes.get("sasl", {}).get("mechanism"),
+        )
+        try:
+            kafka_admin.create_topics(new_topics)
+        finally:
+            kafka_admin.close()
+        logger.info(
+            "Kafka topics created successfully",
+            topics=topics,
+            num_partitions=num_partitions,
+            replication_factor=replication_factor,
+        )
 class SQLSource(BaseSourceDriver):
     kind = "sqldb"

mlrun/datastore/targets.py CHANGED Viewed

@@ -390,6 +390,7 @@ class BaseStoreTarget(DataTargetBase):
     is_offline = False
     support_spark = False
     support_storey = False
+    support_pandas = False
     support_append = False
     def __init__(
@@ -758,6 +759,8 @@ class BaseStoreTarget(DataTargetBase):
         **kwargs,
     ):
         """return the target data as dataframe"""
+        if not self.support_pandas:
+            raise NotImplementedError()
         mlrun.utils.helpers.additional_filters_warning(
             additional_filters, self.__class__
         )
@@ -819,6 +822,7 @@ class ParquetTarget(BaseStoreTarget):
     support_spark = True
     support_storey = True
     support_dask = True
+    support_pandas = True
     support_append = True
     def __init__(
@@ -1084,6 +1088,7 @@ class CSVTarget(BaseStoreTarget):
     is_offline = True
     support_spark = True
     support_storey = True
+    support_pandas = True
     @staticmethod
     def _write_dataframe(df, storage_options, target_path, partition_cols, **kwargs):
@@ -1292,7 +1297,7 @@ class SnowflakeTarget(BaseStoreTarget):
         **kwargs,
     ):
         raise mlrun.errors.MLRunRuntimeError(
-            f"{type(self).__name__} does not support storey engine"
+            f"{type(self).__name__} does not support pandas engine"
         )
     @property
@@ -1366,19 +1371,6 @@ class NoSqlBaseTarget(BaseStoreTarget):
     def get_dask_options(self):
         return {"format": "csv"}
-    def as_df(
-        self,
-        columns=None,
-        df_module=None,
-        entities=None,
-        start_time=None,
-        end_time=None,
-        time_column=None,
-        additional_filters=None,
-        **kwargs,
-    ):
-        raise NotImplementedError()
     def write_dataframe(
         self, df, key_column=None, timestamp_key=None, chunk_id=0, **kwargs
     ):
@@ -1612,19 +1604,6 @@ class StreamTarget(BaseStoreTarget):
             **self.attributes,
         )
-    def as_df(
-        self,
-        columns=None,
-        df_module=None,
-        entities=None,
-        start_time=None,
-        end_time=None,
-        time_column=None,
-        additional_filters=None,
-        **kwargs,
-    ):
-        raise NotImplementedError()
 class KafkaTarget(BaseStoreTarget):
     """
@@ -1727,19 +1706,6 @@ class KafkaTarget(BaseStoreTarget):
             **attributes,
         )
-    def as_df(
-        self,
-        columns=None,
-        df_module=None,
-        entities=None,
-        start_time=None,
-        end_time=None,
-        time_column=None,
-        additional_filters=None,
-        **kwargs,
-    ):
-        raise NotImplementedError()
     def purge(self):
         pass
@@ -1784,19 +1750,6 @@ class TSDBTarget(BaseStoreTarget):
             **self.attributes,
         )
-    def as_df(
-        self,
-        columns=None,
-        df_module=None,
-        entities=None,
-        start_time=None,
-        end_time=None,
-        time_column=None,
-        additional_filters=None,
-        **kwargs,
-    ):
-        raise NotImplementedError()
     def write_dataframe(
         self, df, key_column=None, timestamp_key=None, chunk_id=0, **kwargs
     ):
@@ -1834,6 +1787,7 @@ class CustomTarget(BaseStoreTarget):
     is_online = False
     support_spark = False
     support_storey = True
+    support_pandas = True
     def __init__(
         self,
@@ -1869,6 +1823,7 @@ class CustomTarget(BaseStoreTarget):
 class DFTarget(BaseStoreTarget):
     kind = TargetTypes.dataframe
     support_storey = True
+    support_pandas = True
     def __init__(self, *args, name="dataframe", **kwargs):
         self._df = None
@@ -1931,6 +1886,7 @@ class SQLTarget(BaseStoreTarget):
     is_online = True
     support_spark = False
     support_storey = True
+    support_pandas = True
     def __init__(
         self,

mlrun/db/httpdb.py CHANGED Viewed

@@ -3475,7 +3475,7 @@ class HTTPRunDB(RunDBInterface):
         if response.status_code == http.HTTPStatus.ACCEPTED:
             if delete_resources:
                 logger.info(
-                    "Model Monitoring is being disable",
+                    "Model Monitoring is being disabled",
                     project_name=project,
                 )
             if delete_user_applications:
@@ -4216,6 +4216,9 @@ class HTTPRunDB(RunDBInterface):
         :param project:    The project that the alert belongs to.
         :returns:          The created/modified alert.
         """
+        if not alert_data:
+            raise mlrun.errors.MLRunInvalidArgumentError("Alert data must be provided")
         project = project or config.default_project
         endpoint_path = f"projects/{project}/alerts/{alert_name}"
         error_message = f"put alert {project}/alerts/{alert_name}"
@@ -4224,6 +4227,8 @@ class HTTPRunDB(RunDBInterface):
             if isinstance(alert_data, AlertConfig)
             else AlertConfig.from_dict(alert_data)
         )
+        # Validation is necessary here because users can directly invoke this function
+        # through `mlrun.get_run_db().store_alert_config()`.
         alert_instance.validate_required_fields()
         alert_data = alert_instance.to_dict()

mlrun/errors.py CHANGED Viewed

@@ -209,6 +209,14 @@ class MLRunInvalidMMStoreType(MLRunHTTPStatusError, ValueError):
     error_status_code = HTTPStatus.BAD_REQUEST.value
+class MLRunStreamConnectionFailure(MLRunHTTPStatusError, ValueError):
+    error_status_code = HTTPStatus.BAD_REQUEST.value
+class MLRunTSDBConnectionFailure(MLRunHTTPStatusError, ValueError):
+    error_status_code = HTTPStatus.BAD_REQUEST.value
 class MLRunRetryExhaustedError(Exception):
     pass

mlrun/execution.py CHANGED Viewed

@@ -921,6 +921,13 @@ class MLClientCtx:
                 updates, self._uid, self.project, iter=self._iteration
             )
+    def get_notifications(self):
+        """Get the list of notifications"""
+        return [
+            mlrun.model.Notification.from_dict(notification)
+            for notification in self._notifications
+        ]
     def to_dict(self):
         """Convert the run context to a dictionary"""

mlrun/feature_store/api.py CHANGED Viewed

@@ -230,6 +230,11 @@ def _get_offline_features(
             "entity_timestamp_column param "
             "can not be specified without entity_rows param"
         )
+    if isinstance(target, BaseStoreTarget) and not target.support_pandas:
+        raise mlrun.errors.MLRunInvalidArgumentError(
+            f"get_offline_features does not support targets that do not support pandas engine."
+            f" Target kind: {target.kind}"
+        )
     if isinstance(feature_vector, FeatureVector):
         update_stats = True

mlrun/feature_store/retrieval/job.py CHANGED Viewed

@@ -181,6 +181,7 @@ class RemoteVectorResponse:
         file_format = kwargs.get("format")
         if not file_format:
             file_format = self.run.status.results["target"]["kind"]
         df = mlrun.get_dataitem(self.target_uri).as_df(
             columns=columns, df_module=df_module, format=file_format, **kwargs
         )

mlrun 1.7.0rc38__py3-none-any.whl → 1.7.0rc40__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc38py3-none-any.whl → 1.7.0rc40py3-none-any.whl