mlrun 1.10.0rc24__py3-none-any.whl → 1.10.0rc26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (50) hide show
  1. mlrun/artifacts/llm_prompt.py +8 -1
  2. mlrun/common/model_monitoring/helpers.py +86 -0
  3. mlrun/common/schemas/hub.py +25 -18
  4. mlrun/common/schemas/model_monitoring/constants.py +1 -0
  5. mlrun/common/schemas/model_monitoring/model_endpoints.py +10 -1
  6. mlrun/config.py +2 -3
  7. mlrun/datastore/__init__.py +2 -2
  8. mlrun/datastore/azure_blob.py +66 -43
  9. mlrun/datastore/datastore_profile.py +35 -5
  10. mlrun/datastore/model_provider/huggingface_provider.py +122 -30
  11. mlrun/datastore/model_provider/model_provider.py +62 -4
  12. mlrun/datastore/model_provider/openai_provider.py +114 -43
  13. mlrun/datastore/s3.py +24 -2
  14. mlrun/datastore/storeytargets.py +2 -3
  15. mlrun/db/base.py +15 -1
  16. mlrun/db/httpdb.py +17 -6
  17. mlrun/db/nopdb.py +14 -0
  18. mlrun/k8s_utils.py +0 -14
  19. mlrun/model_monitoring/api.py +2 -2
  20. mlrun/model_monitoring/applications/base.py +37 -10
  21. mlrun/model_monitoring/applications/context.py +1 -4
  22. mlrun/model_monitoring/controller.py +15 -5
  23. mlrun/model_monitoring/db/_schedules.py +2 -4
  24. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +3 -1
  25. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +17 -4
  26. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +3 -0
  27. mlrun/model_monitoring/helpers.py +5 -5
  28. mlrun/platforms/iguazio.py +7 -3
  29. mlrun/projects/project.py +33 -29
  30. mlrun/runtimes/base.py +0 -3
  31. mlrun/runtimes/mounts.py +15 -2
  32. mlrun/runtimes/nuclio/__init__.py +1 -0
  33. mlrun/runtimes/nuclio/application/application.py +11 -2
  34. mlrun/runtimes/nuclio/function.py +10 -0
  35. mlrun/runtimes/nuclio/serving.py +4 -0
  36. mlrun/runtimes/pod.py +153 -11
  37. mlrun/runtimes/utils.py +22 -5
  38. mlrun/serving/routers.py +23 -41
  39. mlrun/serving/server.py +26 -14
  40. mlrun/serving/states.py +3 -3
  41. mlrun/serving/system_steps.py +52 -29
  42. mlrun/serving/v2_serving.py +9 -10
  43. mlrun/utils/helpers.py +5 -2
  44. mlrun/utils/version/version.json +2 -2
  45. {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/METADATA +24 -23
  46. {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/RECORD +50 -50
  47. {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/WHEEL +0 -0
  48. {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/entry_points.txt +0 -0
  49. {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/licenses/LICENSE +0 -0
  50. {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/top_level.txt +0 -0
@@ -62,12 +62,19 @@ class LLMPromptArtifactSpec(ArtifactSpec):
62
62
  parent_uri=model_artifact.uri
63
63
  if isinstance(model_artifact, model_art.ModelArtifact)
64
64
  else model_artifact,
65
+ format=kwargs.pop("format", "") or "json",
65
66
  **kwargs,
66
67
  )
67
68
 
68
69
  self.prompt_template = prompt_template
69
70
  self.prompt_legend = prompt_legend
70
- self.model_configuration = model_configuration
71
+ if model_configuration is not None and not isinstance(
72
+ model_configuration, dict
73
+ ):
74
+ raise mlrun.errors.MLRunInvalidArgumentError(
75
+ "LLMPromptArtifact model_configuration must be a dictionary or None"
76
+ )
77
+ self.model_configuration = model_configuration or {}
71
78
  self.description = description
72
79
  self._model_artifact = (
73
80
  model_artifact
@@ -14,6 +14,7 @@
14
14
 
15
15
  import sys
16
16
  import typing
17
+ from datetime import datetime
17
18
 
18
19
  import mlrun.common
19
20
  import mlrun.common.schemas.model_monitoring.constants as mm_constants
@@ -24,6 +25,7 @@ BinCounts = typing.NewType("BinCounts", list[int])
24
25
  BinEdges = typing.NewType("BinEdges", list[float])
25
26
 
26
27
  _MAX_FLOAT = sys.float_info.max
28
+ logger = mlrun.utils.create_logger(level="info", name="mm_helpers")
27
29
 
28
30
 
29
31
  def parse_model_endpoint_project_prefix(path: str, project_name: str):
@@ -87,3 +89,87 @@ def pad_features_hist(feature_stats: FeatureStats) -> None:
87
89
  for feature in feature_stats.values():
88
90
  if hist_key in feature:
89
91
  pad_hist(Histogram(feature[hist_key]))
92
+
93
+
94
+ def get_model_endpoints_creation_task_status(
95
+ server,
96
+ ) -> tuple[
97
+ mlrun.common.schemas.BackgroundTaskState,
98
+ typing.Optional[datetime],
99
+ typing.Optional[set[str]],
100
+ ]:
101
+ background_task = None
102
+ background_task_state = mlrun.common.schemas.BackgroundTaskState.running
103
+ background_task_check_timestamp = None
104
+ model_endpoint_uids = None
105
+ try:
106
+ background_task = mlrun.get_run_db().get_project_background_task(
107
+ server.project, server.model_endpoint_creation_task_name
108
+ )
109
+ background_task_check_timestamp = mlrun.utils.now_date()
110
+ log_background_task_state(
111
+ server, background_task.status.state, background_task_check_timestamp
112
+ )
113
+ background_task_state = background_task.status.state
114
+ except mlrun.errors.MLRunNotFoundError:
115
+ logger.warning(
116
+ "Model endpoint creation task not found listing model endpoints",
117
+ project=server.project,
118
+ task_name=server.model_endpoint_creation_task_name,
119
+ )
120
+ if background_task is None:
121
+ model_endpoints = mlrun.get_run_db().list_model_endpoints(
122
+ project=server.project,
123
+ function_name=server.function_name,
124
+ function_tag=server.function_tag,
125
+ tsdb_metrics=False,
126
+ )
127
+ if model_endpoints:
128
+ model_endpoint_uids = {
129
+ endpoint.metadata.uid for endpoint in model_endpoints.endpoints
130
+ }
131
+ logger.info(
132
+ "Model endpoints found after background task not found, model monitoring will monitor "
133
+ "events",
134
+ project=server.project,
135
+ function_name=server.function_name,
136
+ function_tag=server.function_tag,
137
+ uids=model_endpoint_uids,
138
+ )
139
+ background_task_state = mlrun.common.schemas.BackgroundTaskState.succeeded
140
+ else:
141
+ logger.warning(
142
+ "Model endpoints not found after background task not found, model monitoring will not "
143
+ "monitor events",
144
+ project=server.project,
145
+ function_name=server.function_name,
146
+ function_tag=server.function_tag,
147
+ )
148
+ background_task_state = mlrun.common.schemas.BackgroundTaskState.failed
149
+ return background_task_state, background_task_check_timestamp, model_endpoint_uids
150
+
151
+
152
+ def log_background_task_state(
153
+ server,
154
+ background_task_state: mlrun.common.schemas.BackgroundTaskState,
155
+ background_task_check_timestamp: typing.Optional[datetime],
156
+ ):
157
+ logger.info(
158
+ "Checking model endpoint creation task status",
159
+ task_name=server.model_endpoint_creation_task_name,
160
+ )
161
+ if (
162
+ background_task_state
163
+ in mlrun.common.schemas.BackgroundTaskState.terminal_states()
164
+ ):
165
+ logger.info(
166
+ f"Model endpoint creation task completed with state {background_task_state}"
167
+ )
168
+ else: # in progress
169
+ logger.info(
170
+ f"Model endpoint creation task is still in progress with the current state: "
171
+ f"{background_task_state}. Events will not be monitored for the next "
172
+ f"{mlrun.mlconf.model_endpoint_monitoring.model_endpoint_creation_check_period} seconds",
173
+ function_name=server.function.name,
174
+ background_task_check_timestamp=background_task_check_timestamp.isoformat(),
175
+ )
@@ -15,6 +15,7 @@
15
15
  from datetime import datetime, timezone
16
16
  from typing import Optional
17
17
 
18
+ import deepdiff
18
19
  from pydantic.v1 import BaseModel, Extra, Field
19
20
 
20
21
  import mlrun.common.types
@@ -36,9 +37,9 @@ class HubObjectMetadata(BaseModel):
36
37
  extra = Extra.allow
37
38
 
38
39
 
39
- # Currently only functions are supported. Will add more in the future.
40
40
  class HubSourceType(mlrun.common.types.StrEnum):
41
41
  functions = "functions"
42
+ modules = "modules"
42
43
 
43
44
 
44
45
  # Sources-related objects
@@ -46,7 +47,6 @@ class HubSourceSpec(ObjectSpec):
46
47
  path: str # URL to base directory, should include schema (s3://, etc...)
47
48
  channel: str
48
49
  credentials: Optional[dict] = {}
49
- object_type: HubSourceType = Field(HubSourceType.functions, const=True)
50
50
 
51
51
 
52
52
  class HubSource(BaseModel):
@@ -55,11 +55,11 @@ class HubSource(BaseModel):
55
55
  spec: HubSourceSpec
56
56
  status: Optional[ObjectStatus] = ObjectStatus(state="created")
57
57
 
58
- def get_full_uri(self, relative_path):
59
- return f"{self.spec.path}/{self.spec.object_type}/{self.spec.channel}/{relative_path}"
58
+ def get_full_uri(self, relative_path, object_type):
59
+ return f"{self.spec.path}/{object_type}/{self.spec.channel}/{relative_path}"
60
60
 
61
- def get_catalog_uri(self):
62
- return self.get_full_uri(mlrun.mlconf.hub.catalog_filename)
61
+ def get_catalog_uri(self, object_type):
62
+ return self.get_full_uri(mlrun.mlconf.hub.catalog_filename, object_type)
63
63
 
64
64
  @classmethod
65
65
  def generate_default_source(cls):
@@ -78,11 +78,23 @@ class HubSource(BaseModel):
78
78
  spec=HubSourceSpec(
79
79
  path=mlrun.mlconf.hub.default_source.url,
80
80
  channel=mlrun.mlconf.hub.default_source.channel,
81
- object_type=HubSourceType(mlrun.mlconf.hub.default_source.object_type),
82
81
  ),
83
82
  status=ObjectStatus(state="created"),
84
83
  )
85
84
 
85
+ def diff(self, another_source: "HubSource") -> dict:
86
+ """
87
+ Compare this HubSource with another one.
88
+ Returns a dict of differences (metadata, spec, status).
89
+ """
90
+ exclude_paths = [
91
+ "root['metadata']['updated']",
92
+ "root['metadata']['created']",
93
+ ]
94
+ return deepdiff.DeepDiff(
95
+ self.dict(), another_source.dict(), exclude_paths=exclude_paths
96
+ )
97
+
86
98
 
87
99
  last_source_index = -1
88
100
 
@@ -94,21 +106,16 @@ class IndexedHubSource(BaseModel):
94
106
 
95
107
  # Item-related objects
96
108
  class HubItemMetadata(HubObjectMetadata):
97
- source: HubSourceType = Field(HubSourceType.functions, const=True)
109
+ source: HubSourceType = HubSourceType.functions
98
110
  version: str
99
111
  tag: Optional[str]
100
112
 
101
113
  def get_relative_path(self) -> str:
102
- if self.source == HubSourceType.functions:
103
- # This is needed since the hub deployment script modifies the paths to use _ instead of -.
104
- modified_name = self.name.replace("-", "_")
105
- # Prefer using the tag if exists. Otherwise, use version.
106
- version = self.tag or self.version
107
- return f"{modified_name}/{version}/"
108
- else:
109
- raise mlrun.errors.MLRunInvalidArgumentError(
110
- f"Bad source for hub item - {self.source}"
111
- )
114
+ # This is needed since the hub deployment script modifies the paths to use _ instead of -.
115
+ modified_name = self.name.replace("-", "_")
116
+ # Prefer using the tag if exists. Otherwise, use version.
117
+ version = self.tag or self.version
118
+ return f"{modified_name}/{version}/"
112
119
 
113
120
 
114
121
  class HubItemSpec(ObjectSpec):
@@ -331,6 +331,7 @@ class EndpointType(IntEnum):
331
331
  class EndpointMode(IntEnum):
332
332
  REAL_TIME = 0
333
333
  BATCH = 1
334
+ BATCH_LEGACY = 2 # legacy batch mode, used for endpoints created through the batch inference job
334
335
 
335
336
 
336
337
  class MonitoringFunctionNames(MonitoringStrEnum):
@@ -119,7 +119,7 @@ class ModelEndpointMetadata(ObjectMetadata, ModelEndpointParser):
119
119
  project: constr(regex=PROJECT_PATTERN)
120
120
  endpoint_type: EndpointType = EndpointType.NODE_EP
121
121
  uid: Optional[constr(regex=MODEL_ENDPOINT_ID_PATTERN)]
122
- mode: EndpointMode = EndpointMode.REAL_TIME
122
+ mode: Optional[EndpointMode] = None
123
123
 
124
124
  @classmethod
125
125
  def mutable_fields(cls):
@@ -131,6 +131,15 @@ class ModelEndpointMetadata(ObjectMetadata, ModelEndpointParser):
131
131
  return str(v)
132
132
  return v
133
133
 
134
+ @validator("mode", pre=True, always=True)
135
+ def _set_mode_based_on_endpoint_type(cls, v, values): # noqa: N805
136
+ if v is None:
137
+ if values.get("endpoint_type") == EndpointType.BATCH_EP:
138
+ return EndpointMode.BATCH_LEGACY
139
+ else:
140
+ return EndpointMode.REAL_TIME
141
+ return v
142
+
134
143
 
135
144
  class ModelEndpointSpec(ObjectSpec, ModelEndpointParser):
136
145
  model_class: Optional[str] = ""
mlrun/config.py CHANGED
@@ -718,7 +718,6 @@ default_config = {
718
718
  "name": "default",
719
719
  "description": "MLRun global function hub",
720
720
  "url": "https://mlrun.github.io/marketplace",
721
- "object_type": "functions",
722
721
  "channel": "master",
723
722
  },
724
723
  },
@@ -1000,9 +999,9 @@ class Config:
1000
999
  )
1001
1000
 
1002
1001
  @staticmethod
1003
- def get_default_hub_source() -> str:
1002
+ def get_default_hub_source_url_prefix(object_type) -> str:
1004
1003
  default_source = config.hub.default_source
1005
- return f"{default_source.url}/{default_source.object_type}/{default_source.channel}/"
1004
+ return f"{default_source.url}/{object_type}/{default_source.channel}/"
1006
1005
 
1007
1006
  @staticmethod
1008
1007
  def decode_base64_config_and_load_to_object(
@@ -43,7 +43,7 @@ import storey
43
43
 
44
44
  import mlrun.datastore.wasbfs
45
45
  from mlrun.datastore.datastore_profile import (
46
- DatastoreProfileKafkaSource,
46
+ DatastoreProfileKafkaStream,
47
47
  DatastoreProfileKafkaTarget,
48
48
  DatastoreProfileV3io,
49
49
  )
@@ -123,7 +123,7 @@ def get_stream_pusher(stream_path: str, **kwargs):
123
123
  )
124
124
  if isinstance(
125
125
  datastore_profile,
126
- (DatastoreProfileKafkaSource, DatastoreProfileKafkaTarget),
126
+ (DatastoreProfileKafkaStream, DatastoreProfileKafkaTarget),
127
127
  ):
128
128
  attributes = datastore_profile.attributes()
129
129
  brokers = attributes.pop("brokers", None)
@@ -229,18 +229,25 @@ class AzureBlobStore(DataStore):
229
229
  st = self.storage_options
230
230
  service = "blob"
231
231
  primary_url = None
232
- if st.get("connection_string"):
232
+
233
+ # Parse connection string (fills account_name/account_key or SAS)
234
+ connection_string = st.get("connection_string")
235
+ if connection_string:
233
236
  primary_url, _, parsed_credential = parse_connection_str(
234
- st.get("connection_string"), credential=None, service=service
237
+ connection_string, credential=None, service=service
235
238
  )
236
- for key in ["account_name", "account_key"]:
237
- parsed_value = parsed_credential.get(key)
238
- if parsed_value:
239
+
240
+ if isinstance(parsed_credential, str):
241
+ # SharedAccessSignature as raw string
242
+ parsed_credential = {"sas_token": parsed_credential}
243
+
244
+ for key in ["account_name", "account_key", "sas_token"]:
245
+ if parsed_value := parsed_credential.get(key):
239
246
  if key in st and st[key] != parsed_value:
240
247
  if key == "account_name":
241
248
  raise mlrun.errors.MLRunInvalidArgumentError(
242
- f"Storage option for '{key}' is '{st[key]}',\
243
- which does not match corresponding connection string '{parsed_value}'"
249
+ f"Storage option for '{key}' is '{st[key]}', "
250
+ f"which does not match corresponding connection string '{parsed_value}'"
244
251
  )
245
252
  else:
246
253
  raise mlrun.errors.MLRunInvalidArgumentError(
@@ -249,6 +256,7 @@ class AzureBlobStore(DataStore):
249
256
  st[key] = parsed_value
250
257
 
251
258
  account_name = st.get("account_name")
259
+ # Derive host (prefer connection string primary URL)
252
260
  if primary_url:
253
261
  if primary_url.startswith("http://"):
254
262
  primary_url = primary_url[len("http://") :]
@@ -258,48 +266,63 @@ class AzureBlobStore(DataStore):
258
266
  elif account_name:
259
267
  host = f"{account_name}.{service}.core.windows.net"
260
268
  else:
269
+ # nothing to configure yet
261
270
  return res
262
271
 
263
- if "account_key" in st:
272
+ host = host.rstrip("/")
273
+
274
+ # Account key (optional; WASB supports it)
275
+ if "account_key" in st and st["account_key"]:
264
276
  res[f"spark.hadoop.fs.azure.account.key.{host}"] = st["account_key"]
265
277
 
266
- if "client_secret" in st or "client_id" in st or "tenant_id" in st:
267
- res[f"spark.hadoop.fs.azure.account.auth.type.{host}"] = "OAuth"
268
- res[f"spark.hadoop.fs.azure.account.oauth.provider.type.{host}"] = (
269
- "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider"
270
- )
271
- if "client_id" in st:
272
- res[f"spark.hadoop.fs.azure.account.oauth2.client.id.{host}"] = st[
273
- "client_id"
274
- ]
275
- if "client_secret" in st:
276
- res[f"spark.hadoop.fs.azure.account.oauth2.client.secret.{host}"] = st[
277
- "client_secret"
278
- ]
279
- if "tenant_id" in st:
280
- tenant_id = st["tenant_id"]
281
- res[f"spark.hadoop.fs.azure.account.oauth2.client.endpoint.{host}"] = (
282
- f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"
283
- )
278
+ # --- WASB + SAS (container-scoped key; no provider classes needed) ---
279
+ if "sas_token" in st and st["sas_token"]:
280
+ sas = st["sas_token"].lstrip("?")
281
+ if container := getattr(self, "endpoint", None) or st.get("container"):
282
+ # fs.azure.sas.<container>.<account>.blob.core.windows.net = <sas>
283
+ res[f"spark.hadoop.fs.azure.sas.{container}.{host}"] = sas
284
284
 
285
- if "sas_token" in st:
286
- res[f"spark.hadoop.fs.azure.account.auth.type.{host}"] = "SAS"
287
- res[f"spark.hadoop.fs.azure.sas.token.provider.type.{host}"] = (
288
- "org.apache.hadoop.fs.azurebfs.sas.FixedSASTokenProvider"
289
- )
290
- res[f"spark.hadoop.fs.azure.sas.fixed.token.{host}"] = st["sas_token"]
285
+ else:
286
+ raise mlrun.errors.MLRunInvalidArgumentError(
287
+ "Container name is required for WASB SAS. "
288
+ "Set self.endpoint or storage_options['container']."
289
+ )
291
290
  return res
292
291
 
293
292
  @property
294
293
  def spark_url(self):
295
- spark_options = self.get_spark_options()
296
- url = f"wasbs://{self.endpoint}"
297
- prefix = "spark.hadoop.fs.azure.account.key."
298
- if spark_options:
299
- for key in spark_options:
300
- if key.startswith(prefix):
301
- account_key = key[len(prefix) :]
302
- if not url.endswith(account_key):
303
- url += f"@{account_key}"
304
- break
305
- return url
294
+ # Build: wasbs://<container>@<host>
295
+ st = self.storage_options
296
+ service = "blob"
297
+
298
+ container = getattr(self, "endpoint", None) or st.get("container")
299
+ if not container:
300
+ raise mlrun.errors.MLRunInvalidArgumentError(
301
+ "Container is required to build the WASB URL "
302
+ "(self.endpoint or storage_options['container'])."
303
+ )
304
+
305
+ # Prefer host from connection string; else synthesize from account_name
306
+ host = None
307
+ account_name = st.get("account_name")
308
+ connection_string = st.get("connection_string")
309
+
310
+ if connection_string:
311
+ primary_url, _, _ = parse_connection_str(
312
+ connection_string, credential=None, service=service
313
+ )
314
+ if primary_url.startswith("http://"):
315
+ primary_url = primary_url[len("http://") :]
316
+ if primary_url.startswith("https://"):
317
+ primary_url = primary_url[len("https://") :]
318
+ host = primary_url.rstrip("/")
319
+
320
+ if not host and account_name:
321
+ host = f"{account_name}.{service}.core.windows.net"
322
+
323
+ if not host:
324
+ raise mlrun.errors.MLRunInvalidArgumentError(
325
+ "account_name is required (or provide a connection_string) to build the WASB URL."
326
+ )
327
+
328
+ return f"wasbs://{container}@{host}"
@@ -19,6 +19,7 @@ import typing
19
19
  from urllib.parse import ParseResult, urlparse
20
20
 
21
21
  import pydantic.v1
22
+ from deprecated import deprecated
22
23
  from mergedeep import merge
23
24
 
24
25
  import mlrun
@@ -138,6 +139,15 @@ class ConfigProfile(DatastoreProfile):
138
139
  return res
139
140
 
140
141
 
142
+ # TODO: Remove in 1.12.0
143
+ @deprecated(
144
+ version="1.10.0",
145
+ reason=(
146
+ "This class is deprecated from mlrun 1.10.0, and will be removed in 1.12.0. "
147
+ "Use `DatastoreProfileKafkaStream` instead."
148
+ ),
149
+ category=FutureWarning,
150
+ )
141
151
  class DatastoreProfileKafkaTarget(DatastoreProfile):
142
152
  type: str = pydantic.v1.Field("kafka_target")
143
153
  _private_attributes = "kwargs_private"
@@ -158,8 +168,8 @@ class DatastoreProfileKafkaTarget(DatastoreProfile):
158
168
  return attributes
159
169
 
160
170
 
161
- class DatastoreProfileKafkaSource(DatastoreProfile):
162
- type: str = pydantic.v1.Field("kafka_source")
171
+ class DatastoreProfileKafkaStream(DatastoreProfile):
172
+ type: str = pydantic.v1.Field("kafka_stream")
163
173
  _private_attributes = ("kwargs_private", "sasl_user", "sasl_pass")
164
174
  brokers: typing.Union[str, list[str]]
165
175
  topics: typing.Union[str, list[str]]
@@ -198,6 +208,19 @@ class DatastoreProfileKafkaSource(DatastoreProfile):
198
208
  return attributes
199
209
 
200
210
 
211
+ # TODO: Remove in 1.12.0
212
+ @deprecated(
213
+ version="1.10.0",
214
+ reason=(
215
+ "This class is deprecated from mlrun 1.10.0, and will be removed in 1.12.0. "
216
+ "Use `DatastoreProfileKafkaStream` instead."
217
+ ),
218
+ category=FutureWarning,
219
+ )
220
+ class DatastoreProfileKafkaSource(DatastoreProfileKafkaStream):
221
+ type: str = pydantic.v1.Field("kafka_source")
222
+
223
+
201
224
  class DatastoreProfileV3io(DatastoreProfile):
202
225
  type: str = pydantic.v1.Field("v3io")
203
226
  v3io_access_key: typing.Optional[str] = None
@@ -232,7 +255,7 @@ class DatastoreProfileS3(DatastoreProfile):
232
255
  if self.secret_key:
233
256
  res["AWS_SECRET_ACCESS_KEY"] = self.secret_key
234
257
  if self.endpoint_url:
235
- res["S3_ENDPOINT_URL"] = self.endpoint_url
258
+ res["AWS_ENDPOINT_URL_S3"] = self.endpoint_url
236
259
  if self.force_non_anonymous:
237
260
  res["S3_NON_ANONYMOUS"] = self.force_non_anonymous
238
261
  if self.profile_name:
@@ -333,7 +356,9 @@ class DatastoreProfileGCS(DatastoreProfile):
333
356
  # in gcs the path after schema is starts with bucket, wherefore it should not start with "/".
334
357
  subpath = subpath[1:]
335
358
  if self.bucket:
336
- return f"gcs://{self.bucket}/{subpath}"
359
+ return (
360
+ f"gcs://{self.bucket}/{subpath}" if subpath else f"gcs://{self.bucket}"
361
+ )
337
362
  else:
338
363
  return f"gcs://{subpath}"
339
364
 
@@ -370,7 +395,11 @@ class DatastoreProfileAzureBlob(DatastoreProfile):
370
395
  # in azure the path after schema is starts with container, wherefore it should not start with "/".
371
396
  subpath = subpath[1:]
372
397
  if self.container:
373
- return f"az://{self.container}/{subpath}"
398
+ return (
399
+ f"az://{self.container}/{subpath}"
400
+ if subpath
401
+ else f"az://{self.container}"
402
+ )
374
403
  else:
375
404
  return f"az://{subpath}"
376
405
 
@@ -518,6 +547,7 @@ _DATASTORE_TYPE_TO_PROFILE_CLASS: dict[str, type[DatastoreProfile]] = {
518
547
  "basic": DatastoreProfileBasic,
519
548
  "kafka_target": DatastoreProfileKafkaTarget,
520
549
  "kafka_source": DatastoreProfileKafkaSource,
550
+ "kafka_stream": DatastoreProfileKafkaStream,
521
551
  "dbfs": DatastoreProfileDBFS,
522
552
  "gcs": DatastoreProfileGCS,
523
553
  "az": DatastoreProfileAzureBlob,