mlrun 1.7.0rc38__py3-none-any.whl → 1.7.0rc40__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (42) hide show
  1. mlrun/alerts/alert.py +30 -27
  2. mlrun/common/schemas/alert.py +3 -0
  3. mlrun/common/schemas/notification.py +1 -0
  4. mlrun/datastore/alibaba_oss.py +2 -2
  5. mlrun/datastore/azure_blob.py +6 -3
  6. mlrun/datastore/base.py +1 -1
  7. mlrun/datastore/dbfs_store.py +2 -2
  8. mlrun/datastore/google_cloud_storage.py +83 -20
  9. mlrun/datastore/s3.py +2 -2
  10. mlrun/datastore/sources.py +54 -0
  11. mlrun/datastore/targets.py +9 -53
  12. mlrun/db/httpdb.py +6 -1
  13. mlrun/errors.py +8 -0
  14. mlrun/execution.py +7 -0
  15. mlrun/feature_store/api.py +5 -0
  16. mlrun/feature_store/retrieval/job.py +1 -0
  17. mlrun/model.py +24 -3
  18. mlrun/model_monitoring/api.py +9 -0
  19. mlrun/model_monitoring/applications/_application_steps.py +36 -0
  20. mlrun/model_monitoring/applications/histogram_data_drift.py +15 -13
  21. mlrun/model_monitoring/controller.py +15 -11
  22. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +5 -5
  23. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +85 -47
  24. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +35 -7
  25. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +3 -1
  26. mlrun/model_monitoring/helpers.py +16 -17
  27. mlrun/model_monitoring/stream_processing.py +2 -3
  28. mlrun/projects/pipelines.py +19 -30
  29. mlrun/projects/project.py +69 -51
  30. mlrun/run.py +8 -6
  31. mlrun/runtimes/__init__.py +4 -0
  32. mlrun/runtimes/nuclio/api_gateway.py +9 -0
  33. mlrun/runtimes/nuclio/application/application.py +112 -54
  34. mlrun/runtimes/nuclio/function.py +1 -1
  35. mlrun/utils/helpers.py +33 -2
  36. mlrun/utils/version/version.json +2 -2
  37. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc40.dist-info}/METADATA +8 -11
  38. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc40.dist-info}/RECORD +42 -42
  39. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc40.dist-info}/WHEEL +1 -1
  40. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc40.dist-info}/LICENSE +0 -0
  41. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc40.dist-info}/entry_points.txt +0 -0
  42. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc40.dist-info}/top_level.txt +0 -0
mlrun/alerts/alert.py CHANGED
@@ -62,6 +62,7 @@ class AlertConfig(ModelObj):
62
62
 
63
63
  # create an alert on endpoint_id, which will be triggered to slack if there is a "data_drift_detected" event
64
64
  # 3 times in the next hour.
65
+
65
66
  from mlrun.alerts import AlertConfig
66
67
  import mlrun.common.schemas.alert as alert_objects
67
68
 
@@ -94,29 +95,29 @@ class AlertConfig(ModelObj):
94
95
  )
95
96
  project.store_alert_config(alert_data)
96
97
 
97
- :param project: name of the project to associate the alert with
98
- :param name: name of the alert
99
- :param template: optional parameter that allows to create an alert based on a predefined template.
100
- you can pass either an AlertTemplate object or a string (the template name).
101
- if a template is used, many fields of the alert will be auto-generated based on the
102
- template. however, you still need to provide the following fields:
98
+ :param project: Name of the project to associate the alert with
99
+ :param name: Name of the alert
100
+ :param template: Optional parameter that allows creating an alert based on a predefined template.
101
+ You can pass either an AlertTemplate object or a string (the template name).
102
+ If a template is used, many fields of the alert will be auto-generated based on the
103
+ template.However, you still need to provide the following fields:
103
104
  `name`, `project`, `entity`, `notifications`
104
- :param description: description of the alert
105
- :param summary: summary of the alert, will be sent in the generated notifications
106
- :param severity: severity of the alert
107
- :param trigger: the events that will trigger this alert, may be a simple trigger based on events or
105
+ :param description: Description of the alert
106
+ :param summary: Summary of the alert, will be sent in the generated notifications
107
+ :param severity: Severity of the alert
108
+ :param trigger: The events that will trigger this alert, may be a simple trigger based on events or
108
109
  complex trigger which is based on a prometheus alert
109
- :param criteria: when the alert will be triggered based on the specified number of events within the
110
+ :param criteria: When the alert will be triggered based on the specified number of events within the
110
111
  defined time period.
111
- :param reset_policy: when to clear the alert. May be "manual" for manual reset of the alert, or
112
+ :param reset_policy: When to clear the alert. May be "manual" for manual reset of the alert, or
112
113
  "auto" if the criteria contains a time period
113
- :param notifications: list of notifications to invoke once the alert is triggered
114
- :param entities: entities that the event relates to. The entity object will contain fields that uniquely
115
- identify a given entity in the system
116
- :param id: internal id of the alert (user should not supply it)
117
- :param state: state of the alert, may be active/inactive (user should not supply it)
118
- :param created: when the alert is created (user should not supply it)
119
- :param count: internal counter of the alert (user should not supply it)
114
+ :param notifications: List of notifications to invoke once the alert is triggered
115
+ :param entities: Entities that the event relates to. The entity object will contain fields that
116
+ uniquely identify a given entity in the system
117
+ :param id: Internal id of the alert (user should not supply it)
118
+ :param state: State of the alert, may be active/inactive (user should not supply it)
119
+ :param created: When the alert is created (user should not supply it)
120
+ :param count: Internal counter of the alert (user should not supply it)
120
121
  """
121
122
  self.project = project
122
123
  self.name = name
@@ -137,8 +138,8 @@ class AlertConfig(ModelObj):
137
138
  self._apply_template(template)
138
139
 
139
140
  def validate_required_fields(self):
140
- if not self.project or not self.name:
141
- raise mlrun.errors.MLRunBadRequestError("Project and name must be provided")
141
+ if not self.name:
142
+ raise mlrun.errors.MLRunInvalidArgumentError("Alert name must be provided")
142
143
 
143
144
  def _serialize_field(
144
145
  self, struct: dict, field_name: str = None, strip: bool = False
@@ -237,9 +238,11 @@ class AlertConfig(ModelObj):
237
238
  db = mlrun.get_run_db()
238
239
  template = db.get_alert_template(template)
239
240
 
240
- # Extract parameters from the template and apply them to the AlertConfig object
241
- self.summary = template.summary
242
- self.severity = template.severity
243
- self.criteria = template.criteria
244
- self.trigger = template.trigger
245
- self.reset_policy = template.reset_policy
241
+ # Apply parameters from the template to the AlertConfig object only if they are not already specified by the
242
+ # user in the current configuration.
243
+ # User-provided parameters will take precedence over corresponding template values
244
+ self.summary = self.summary or template.summary
245
+ self.severity = self.severity or template.severity
246
+ self.criteria = self.criteria or template.criteria
247
+ self.trigger = self.trigger or template.trigger
248
+ self.reset_policy = self.reset_policy or template.reset_policy
@@ -23,6 +23,7 @@ from mlrun.common.types import StrEnum
23
23
 
24
24
  class EventEntityKind(StrEnum):
25
25
  MODEL_ENDPOINT_RESULT = "model-endpoint-result"
26
+ MODEL_MONITORING_APPLICATION = "model-monitoring-application"
26
27
  JOB = "job"
27
28
 
28
29
 
@@ -43,6 +44,7 @@ class EventKind(StrEnum):
43
44
  SYSTEM_PERFORMANCE_SUSPECTED = "system_performance_suspected"
44
45
  MM_APP_ANOMALY_DETECTED = "mm_app_anomaly_detected"
45
46
  MM_APP_ANOMALY_SUSPECTED = "mm_app_anomaly_suspected"
47
+ MM_APP_FAILED = "mm_app_failed"
46
48
  FAILED = "failed"
47
49
 
48
50
 
@@ -57,6 +59,7 @@ _event_kind_entity_map = {
57
59
  EventKind.SYSTEM_PERFORMANCE_SUSPECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
58
60
  EventKind.MM_APP_ANOMALY_DETECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
59
61
  EventKind.MM_APP_ANOMALY_SUSPECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
62
+ EventKind.MM_APP_FAILED: [EventEntityKind.MODEL_MONITORING_APPLICATION],
60
63
  EventKind.FAILED: [EventEntityKind.JOB],
61
64
  }
62
65
 
@@ -52,6 +52,7 @@ class NotificationLimits(enum.Enum):
52
52
  class Notification(pydantic.BaseModel):
53
53
  """
54
54
  Notification object schema
55
+
55
56
  :param kind: notification implementation kind - slack, webhook, etc.
56
57
  :param name: for logging and identification
57
58
  :param message: message content in the notification
@@ -22,7 +22,7 @@ from fsspec.registry import get_filesystem_class
22
22
 
23
23
  import mlrun.errors
24
24
 
25
- from .base import DataStore, FileStats, makeDatastoreSchemaSanitizer
25
+ from .base import DataStore, FileStats, make_datastore_schema_sanitizer
26
26
 
27
27
 
28
28
  class OSSStore(DataStore):
@@ -53,7 +53,7 @@ class OSSStore(DataStore):
53
53
  except ImportError as exc:
54
54
  raise ImportError("ALIBABA ossfs not installed") from exc
55
55
  filesystem_class = get_filesystem_class(protocol=self.kind)
56
- self._filesystem = makeDatastoreSchemaSanitizer(
56
+ self._filesystem = make_datastore_schema_sanitizer(
57
57
  filesystem_class,
58
58
  using_bucket=self.using_bucket,
59
59
  **self.get_storage_options(),
@@ -22,7 +22,7 @@ from fsspec.registry import get_filesystem_class
22
22
 
23
23
  import mlrun.errors
24
24
 
25
- from .base import DataStore, FileStats, makeDatastoreSchemaSanitizer
25
+ from .base import DataStore, FileStats, make_datastore_schema_sanitizer
26
26
 
27
27
  # Azure blobs will be represented with the following URL: az://<container name>. The storage account is already
28
28
  # pointed to by the connection string, so the user is not expected to specify it in any way.
@@ -41,6 +41,9 @@ class AzureBlobStore(DataStore):
41
41
  self._service_client = None
42
42
  self._storage_options = None
43
43
 
44
+ def get_storage_options(self):
45
+ return self.storage_options
46
+
44
47
  @property
45
48
  def storage_options(self):
46
49
  if not self._storage_options:
@@ -75,7 +78,7 @@ class AzureBlobStore(DataStore):
75
78
  if not self._filesystem:
76
79
  # in order to support az and wasbs kinds
77
80
  filesystem_class = get_filesystem_class(protocol=self.kind)
78
- self._filesystem = makeDatastoreSchemaSanitizer(
81
+ self._filesystem = make_datastore_schema_sanitizer(
79
82
  filesystem_class,
80
83
  using_bucket=self.using_bucket,
81
84
  blocksize=self.max_blocksize,
@@ -225,7 +228,7 @@ class AzureBlobStore(DataStore):
225
228
 
226
229
  def get_spark_options(self):
227
230
  res = {}
228
- st = self.storage_options()
231
+ st = self.storage_options
229
232
  service = "blob"
230
233
  primary_url = None
231
234
  if st.get("connection_string"):
mlrun/datastore/base.py CHANGED
@@ -748,7 +748,7 @@ class HttpStore(DataStore):
748
748
  # As an example, it converts an S3 URL 's3://s3bucket/path' to just 's3bucket/path'.
749
749
  # Since 'ds' schemas are not inherently processed by fsspec, we have adapted the _strip_protocol()
750
750
  # method specifically to strip away the 'ds' schema as required.
751
- def makeDatastoreSchemaSanitizer(cls, using_bucket=False, *args, **kwargs):
751
+ def make_datastore_schema_sanitizer(cls, using_bucket=False, *args, **kwargs):
752
752
  if not issubclass(cls, fsspec.AbstractFileSystem):
753
753
  raise ValueError("Class must be a subclass of fsspec.AbstractFileSystem")
754
754
 
@@ -19,7 +19,7 @@ from fsspec.registry import get_filesystem_class
19
19
 
20
20
  import mlrun.errors
21
21
 
22
- from .base import DataStore, FileStats, makeDatastoreSchemaSanitizer
22
+ from .base import DataStore, FileStats, make_datastore_schema_sanitizer
23
23
 
24
24
 
25
25
  class DatabricksFileBugFixed(DatabricksFile):
@@ -89,7 +89,7 @@ class DBFSStore(DataStore):
89
89
  """return fsspec file system object, if supported"""
90
90
  filesystem_class = get_filesystem_class(protocol=self.kind)
91
91
  if not self._filesystem:
92
- self._filesystem = makeDatastoreSchemaSanitizer(
92
+ self._filesystem = make_datastore_schema_sanitizer(
93
93
  cls=filesystem_class,
94
94
  using_bucket=False,
95
95
  **self.get_storage_options(),
@@ -12,44 +12,82 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  import json
15
+ import os
15
16
  from pathlib import Path
16
17
 
17
18
  from fsspec.registry import get_filesystem_class
19
+ from google.auth.credentials import Credentials
20
+ from google.cloud.storage import Client, transfer_manager
21
+ from google.oauth2 import service_account
18
22
 
19
23
  import mlrun.errors
20
24
  from mlrun.utils import logger
21
25
 
22
- from .base import DataStore, FileStats, makeDatastoreSchemaSanitizer
26
+ from .base import DataStore, FileStats, make_datastore_schema_sanitizer
23
27
 
24
28
  # Google storage objects will be represented with the following URL: gcs://<bucket name>/<path> or gs://...
25
29
 
26
30
 
27
31
  class GoogleCloudStorageStore(DataStore):
28
32
  using_bucket = True
33
+ workers = 8
34
+ chunk_size = 32 * 1024 * 1024
29
35
 
30
36
  def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
31
37
  super().__init__(parent, name, schema, endpoint, secrets=secrets)
38
+ self._storage_client = None
39
+ self._storage_options = None
40
+
41
+ @property
42
+ def storage_client(self):
43
+ if self._storage_client:
44
+ return self._storage_client
45
+
46
+ token = self._get_credentials().get("token")
47
+ access = "https://www.googleapis.com/auth/devstorage.full_control"
48
+ if isinstance(token, str):
49
+ if os.path.exists(token):
50
+ credentials = service_account.Credentials.from_service_account_file(
51
+ token, scopes=[access]
52
+ )
53
+ else:
54
+ raise mlrun.errors.MLRunInvalidArgumentError(
55
+ "gcsfs authentication file not found!"
56
+ )
57
+ elif isinstance(token, dict):
58
+ credentials = service_account.Credentials.from_service_account_info(
59
+ token, scopes=[access]
60
+ )
61
+ elif isinstance(token, Credentials):
62
+ credentials = token
63
+ else:
64
+ raise ValueError(f"Unsupported token type: {type(token)}")
65
+ self._storage_client = Client(credentials=credentials)
66
+ return self._storage_client
32
67
 
33
68
  @property
34
69
  def filesystem(self):
35
70
  """return fsspec file system object, if supported"""
36
- if self._filesystem:
37
- return self._filesystem
38
- try:
39
- import gcsfs # noqa
40
- except ImportError as exc:
41
- raise ImportError(
42
- "Google gcsfs not installed, run pip install gcsfs"
43
- ) from exc
44
- filesystem_class = get_filesystem_class(protocol=self.kind)
45
- self._filesystem = makeDatastoreSchemaSanitizer(
46
- filesystem_class,
47
- using_bucket=self.using_bucket,
48
- **self.get_storage_options(),
49
- )
71
+ if not self._filesystem:
72
+ filesystem_class = get_filesystem_class(protocol=self.kind)
73
+ self._filesystem = make_datastore_schema_sanitizer(
74
+ filesystem_class,
75
+ using_bucket=self.using_bucket,
76
+ **self.storage_options,
77
+ )
50
78
  return self._filesystem
51
79
 
52
- def get_storage_options(self):
80
+ @property
81
+ def storage_options(self):
82
+ if self._storage_options:
83
+ return self._storage_options
84
+ credentials = self._get_credentials()
85
+ # due to caching problem introduced in gcsfs 2024.3.1 (ML-7636)
86
+ credentials["use_listings_cache"] = False
87
+ self._storage_options = credentials
88
+ return self._storage_options
89
+
90
+ def _get_credentials(self):
53
91
  credentials = self._get_secret_or_env(
54
92
  "GCP_CREDENTIALS"
55
93
  ) or self._get_secret_or_env("GOOGLE_APPLICATION_CREDENTIALS")
@@ -71,6 +109,9 @@ class GoogleCloudStorageStore(DataStore):
71
109
  )
72
110
  return self._sanitize_storage_options(None)
73
111
 
112
+ def get_storage_options(self):
113
+ return self.storage_options
114
+
74
115
  def _make_path(self, key):
75
116
  key = key.strip("/")
76
117
  path = Path(self.endpoint, key).as_posix()
@@ -103,8 +144,29 @@ class GoogleCloudStorageStore(DataStore):
103
144
  f.write(data)
104
145
 
105
146
  def upload(self, key, src_path):
106
- path = self._make_path(key)
107
- self.filesystem.put_file(src_path, path, overwrite=True)
147
+ file_size = os.path.getsize(src_path)
148
+ united_path = self._make_path(key)
149
+
150
+ # Multiple upload limitation recommendations as described in
151
+ # https://cloud.google.com/storage/docs/multipart-uploads#storage-upload-object-chunks-python
152
+
153
+ if file_size <= self.chunk_size:
154
+ self.filesystem.put_file(src_path, united_path, overwrite=True)
155
+ return
156
+
157
+ bucket = self.storage_client.bucket(self.endpoint)
158
+ blob = bucket.blob(key.strip("/"))
159
+
160
+ try:
161
+ transfer_manager.upload_chunks_concurrently(
162
+ src_path, blob, chunk_size=self.chunk_size, max_workers=self.workers
163
+ )
164
+ except Exception as upload_chunks_concurrently_exception:
165
+ logger.warning(
166
+ f"gcs: failed to concurrently upload {src_path},"
167
+ f" exception: {upload_chunks_concurrently_exception}. Retrying with single part upload."
168
+ )
169
+ self.filesystem.put_file(src_path, united_path, overwrite=True)
108
170
 
109
171
  def stat(self, key):
110
172
  path = self._make_path(key)
@@ -133,12 +195,13 @@ class GoogleCloudStorageStore(DataStore):
133
195
 
134
196
  def rm(self, path, recursive=False, maxdepth=None):
135
197
  path = self._make_path(path)
198
+ # in order to raise an error in case of a connection error (ML-7056)
136
199
  self.filesystem.exists(path)
137
- self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)
200
+ super().rm(path, recursive=recursive, maxdepth=maxdepth)
138
201
 
139
202
  def get_spark_options(self):
140
203
  res = {}
141
- st = self.get_storage_options()
204
+ st = self._get_credentials()
142
205
  if "token" in st:
143
206
  res = {"spark.hadoop.google.cloud.auth.service.account.enable": "true"}
144
207
  if isinstance(st["token"], str):
mlrun/datastore/s3.py CHANGED
@@ -20,7 +20,7 @@ from fsspec.registry import get_filesystem_class
20
20
 
21
21
  import mlrun.errors
22
22
 
23
- from .base import DataStore, FileStats, get_range, makeDatastoreSchemaSanitizer
23
+ from .base import DataStore, FileStats, get_range, make_datastore_schema_sanitizer
24
24
 
25
25
 
26
26
  class S3Store(DataStore):
@@ -126,7 +126,7 @@ class S3Store(DataStore):
126
126
  except ImportError as exc:
127
127
  raise ImportError("AWS s3fs not installed") from exc
128
128
  filesystem_class = get_filesystem_class(protocol=self.kind)
129
- self._filesystem = makeDatastoreSchemaSanitizer(
129
+ self._filesystem = make_datastore_schema_sanitizer(
130
130
  filesystem_class,
131
131
  using_bucket=self.using_bucket,
132
132
  **self.get_storage_options(),
@@ -32,6 +32,7 @@ from mlrun.config import config
32
32
  from mlrun.datastore.snowflake_utils import get_snowflake_spark_options
33
33
  from mlrun.datastore.utils import transform_list_filters_to_tuple
34
34
  from mlrun.secrets import SecretsStore
35
+ from mlrun.utils import logger
35
36
 
36
37
  from ..model import DataSource
37
38
  from ..platforms.iguazio import parse_path
@@ -1163,6 +1164,59 @@ class KafkaSource(OnlineSource):
1163
1164
  "to a Spark dataframe is not possible, as this operation is not supported by Spark"
1164
1165
  )
1165
1166
 
1167
+ def create_topics(
1168
+ self,
1169
+ num_partitions: int = 4,
1170
+ replication_factor: int = 1,
1171
+ topics: list[str] = None,
1172
+ ):
1173
+ """
1174
+ Create Kafka topics with the specified number of partitions and replication factor.
1175
+
1176
+ :param num_partitions: number of partitions for the topics
1177
+ :param replication_factor: replication factor for the topics
1178
+ :param topics: list of topic names to create, if None,
1179
+ the topics will be taken from the source attributes
1180
+ """
1181
+ from kafka.admin import KafkaAdminClient, NewTopic
1182
+
1183
+ brokers = self.attributes.get("brokers")
1184
+ if not brokers:
1185
+ raise mlrun.errors.MLRunInvalidArgumentError(
1186
+ "brokers must be specified in the KafkaSource attributes"
1187
+ )
1188
+ topics = topics or self.attributes.get("topics")
1189
+ if not topics:
1190
+ raise mlrun.errors.MLRunInvalidArgumentError(
1191
+ "topics must be specified in the KafkaSource attributes"
1192
+ )
1193
+ new_topics = [
1194
+ NewTopic(topic, num_partitions, replication_factor) for topic in topics
1195
+ ]
1196
+ kafka_admin = KafkaAdminClient(
1197
+ bootstrap_servers=brokers,
1198
+ sasl_mechanism=self.attributes.get("sasl", {}).get("sasl_mechanism"),
1199
+ sasl_plain_username=self.attributes.get("sasl", {}).get("username"),
1200
+ sasl_plain_password=self.attributes.get("sasl", {}).get("password"),
1201
+ sasl_kerberos_service_name=self.attributes.get("sasl", {}).get(
1202
+ "sasl_kerberos_service_name", "kafka"
1203
+ ),
1204
+ sasl_kerberos_domain_name=self.attributes.get("sasl", {}).get(
1205
+ "sasl_kerberos_domain_name"
1206
+ ),
1207
+ sasl_oauth_token_provider=self.attributes.get("sasl", {}).get("mechanism"),
1208
+ )
1209
+ try:
1210
+ kafka_admin.create_topics(new_topics)
1211
+ finally:
1212
+ kafka_admin.close()
1213
+ logger.info(
1214
+ "Kafka topics created successfully",
1215
+ topics=topics,
1216
+ num_partitions=num_partitions,
1217
+ replication_factor=replication_factor,
1218
+ )
1219
+
1166
1220
 
1167
1221
  class SQLSource(BaseSourceDriver):
1168
1222
  kind = "sqldb"
@@ -390,6 +390,7 @@ class BaseStoreTarget(DataTargetBase):
390
390
  is_offline = False
391
391
  support_spark = False
392
392
  support_storey = False
393
+ support_pandas = False
393
394
  support_append = False
394
395
 
395
396
  def __init__(
@@ -758,6 +759,8 @@ class BaseStoreTarget(DataTargetBase):
758
759
  **kwargs,
759
760
  ):
760
761
  """return the target data as dataframe"""
762
+ if not self.support_pandas:
763
+ raise NotImplementedError()
761
764
  mlrun.utils.helpers.additional_filters_warning(
762
765
  additional_filters, self.__class__
763
766
  )
@@ -819,6 +822,7 @@ class ParquetTarget(BaseStoreTarget):
819
822
  support_spark = True
820
823
  support_storey = True
821
824
  support_dask = True
825
+ support_pandas = True
822
826
  support_append = True
823
827
 
824
828
  def __init__(
@@ -1084,6 +1088,7 @@ class CSVTarget(BaseStoreTarget):
1084
1088
  is_offline = True
1085
1089
  support_spark = True
1086
1090
  support_storey = True
1091
+ support_pandas = True
1087
1092
 
1088
1093
  @staticmethod
1089
1094
  def _write_dataframe(df, storage_options, target_path, partition_cols, **kwargs):
@@ -1292,7 +1297,7 @@ class SnowflakeTarget(BaseStoreTarget):
1292
1297
  **kwargs,
1293
1298
  ):
1294
1299
  raise mlrun.errors.MLRunRuntimeError(
1295
- f"{type(self).__name__} does not support storey engine"
1300
+ f"{type(self).__name__} does not support pandas engine"
1296
1301
  )
1297
1302
 
1298
1303
  @property
@@ -1366,19 +1371,6 @@ class NoSqlBaseTarget(BaseStoreTarget):
1366
1371
  def get_dask_options(self):
1367
1372
  return {"format": "csv"}
1368
1373
 
1369
- def as_df(
1370
- self,
1371
- columns=None,
1372
- df_module=None,
1373
- entities=None,
1374
- start_time=None,
1375
- end_time=None,
1376
- time_column=None,
1377
- additional_filters=None,
1378
- **kwargs,
1379
- ):
1380
- raise NotImplementedError()
1381
-
1382
1374
  def write_dataframe(
1383
1375
  self, df, key_column=None, timestamp_key=None, chunk_id=0, **kwargs
1384
1376
  ):
@@ -1612,19 +1604,6 @@ class StreamTarget(BaseStoreTarget):
1612
1604
  **self.attributes,
1613
1605
  )
1614
1606
 
1615
- def as_df(
1616
- self,
1617
- columns=None,
1618
- df_module=None,
1619
- entities=None,
1620
- start_time=None,
1621
- end_time=None,
1622
- time_column=None,
1623
- additional_filters=None,
1624
- **kwargs,
1625
- ):
1626
- raise NotImplementedError()
1627
-
1628
1607
 
1629
1608
  class KafkaTarget(BaseStoreTarget):
1630
1609
  """
@@ -1727,19 +1706,6 @@ class KafkaTarget(BaseStoreTarget):
1727
1706
  **attributes,
1728
1707
  )
1729
1708
 
1730
- def as_df(
1731
- self,
1732
- columns=None,
1733
- df_module=None,
1734
- entities=None,
1735
- start_time=None,
1736
- end_time=None,
1737
- time_column=None,
1738
- additional_filters=None,
1739
- **kwargs,
1740
- ):
1741
- raise NotImplementedError()
1742
-
1743
1709
  def purge(self):
1744
1710
  pass
1745
1711
 
@@ -1784,19 +1750,6 @@ class TSDBTarget(BaseStoreTarget):
1784
1750
  **self.attributes,
1785
1751
  )
1786
1752
 
1787
- def as_df(
1788
- self,
1789
- columns=None,
1790
- df_module=None,
1791
- entities=None,
1792
- start_time=None,
1793
- end_time=None,
1794
- time_column=None,
1795
- additional_filters=None,
1796
- **kwargs,
1797
- ):
1798
- raise NotImplementedError()
1799
-
1800
1753
  def write_dataframe(
1801
1754
  self, df, key_column=None, timestamp_key=None, chunk_id=0, **kwargs
1802
1755
  ):
@@ -1834,6 +1787,7 @@ class CustomTarget(BaseStoreTarget):
1834
1787
  is_online = False
1835
1788
  support_spark = False
1836
1789
  support_storey = True
1790
+ support_pandas = True
1837
1791
 
1838
1792
  def __init__(
1839
1793
  self,
@@ -1869,6 +1823,7 @@ class CustomTarget(BaseStoreTarget):
1869
1823
  class DFTarget(BaseStoreTarget):
1870
1824
  kind = TargetTypes.dataframe
1871
1825
  support_storey = True
1826
+ support_pandas = True
1872
1827
 
1873
1828
  def __init__(self, *args, name="dataframe", **kwargs):
1874
1829
  self._df = None
@@ -1931,6 +1886,7 @@ class SQLTarget(BaseStoreTarget):
1931
1886
  is_online = True
1932
1887
  support_spark = False
1933
1888
  support_storey = True
1889
+ support_pandas = True
1934
1890
 
1935
1891
  def __init__(
1936
1892
  self,
mlrun/db/httpdb.py CHANGED
@@ -3475,7 +3475,7 @@ class HTTPRunDB(RunDBInterface):
3475
3475
  if response.status_code == http.HTTPStatus.ACCEPTED:
3476
3476
  if delete_resources:
3477
3477
  logger.info(
3478
- "Model Monitoring is being disable",
3478
+ "Model Monitoring is being disabled",
3479
3479
  project_name=project,
3480
3480
  )
3481
3481
  if delete_user_applications:
@@ -4216,6 +4216,9 @@ class HTTPRunDB(RunDBInterface):
4216
4216
  :param project: The project that the alert belongs to.
4217
4217
  :returns: The created/modified alert.
4218
4218
  """
4219
+ if not alert_data:
4220
+ raise mlrun.errors.MLRunInvalidArgumentError("Alert data must be provided")
4221
+
4219
4222
  project = project or config.default_project
4220
4223
  endpoint_path = f"projects/{project}/alerts/{alert_name}"
4221
4224
  error_message = f"put alert {project}/alerts/{alert_name}"
@@ -4224,6 +4227,8 @@ class HTTPRunDB(RunDBInterface):
4224
4227
  if isinstance(alert_data, AlertConfig)
4225
4228
  else AlertConfig.from_dict(alert_data)
4226
4229
  )
4230
+ # Validation is necessary here because users can directly invoke this function
4231
+ # through `mlrun.get_run_db().store_alert_config()`.
4227
4232
  alert_instance.validate_required_fields()
4228
4233
 
4229
4234
  alert_data = alert_instance.to_dict()
mlrun/errors.py CHANGED
@@ -209,6 +209,14 @@ class MLRunInvalidMMStoreType(MLRunHTTPStatusError, ValueError):
209
209
  error_status_code = HTTPStatus.BAD_REQUEST.value
210
210
 
211
211
 
212
+ class MLRunStreamConnectionFailure(MLRunHTTPStatusError, ValueError):
213
+ error_status_code = HTTPStatus.BAD_REQUEST.value
214
+
215
+
216
+ class MLRunTSDBConnectionFailure(MLRunHTTPStatusError, ValueError):
217
+ error_status_code = HTTPStatus.BAD_REQUEST.value
218
+
219
+
212
220
  class MLRunRetryExhaustedError(Exception):
213
221
  pass
214
222
 
mlrun/execution.py CHANGED
@@ -921,6 +921,13 @@ class MLClientCtx:
921
921
  updates, self._uid, self.project, iter=self._iteration
922
922
  )
923
923
 
924
+ def get_notifications(self):
925
+ """Get the list of notifications"""
926
+ return [
927
+ mlrun.model.Notification.from_dict(notification)
928
+ for notification in self._notifications
929
+ ]
930
+
924
931
  def to_dict(self):
925
932
  """Convert the run context to a dictionary"""
926
933
 
@@ -230,6 +230,11 @@ def _get_offline_features(
230
230
  "entity_timestamp_column param "
231
231
  "can not be specified without entity_rows param"
232
232
  )
233
+ if isinstance(target, BaseStoreTarget) and not target.support_pandas:
234
+ raise mlrun.errors.MLRunInvalidArgumentError(
235
+ f"get_offline_features does not support targets that do not support pandas engine."
236
+ f" Target kind: {target.kind}"
237
+ )
233
238
 
234
239
  if isinstance(feature_vector, FeatureVector):
235
240
  update_stats = True
@@ -181,6 +181,7 @@ class RemoteVectorResponse:
181
181
  file_format = kwargs.get("format")
182
182
  if not file_format:
183
183
  file_format = self.run.status.results["target"]["kind"]
184
+
184
185
  df = mlrun.get_dataitem(self.target_uri).as_df(
185
186
  columns=columns, df_module=df_module, format=file_format, **kwargs
186
187
  )