mlrun 1.10.0rc13__py3-none-any.whl → 1.10.0rc42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (107) hide show
  1. mlrun/__init__.py +22 -2
  2. mlrun/artifacts/base.py +0 -31
  3. mlrun/artifacts/document.py +6 -1
  4. mlrun/artifacts/llm_prompt.py +123 -25
  5. mlrun/artifacts/manager.py +0 -5
  6. mlrun/artifacts/model.py +3 -3
  7. mlrun/common/constants.py +10 -1
  8. mlrun/common/formatters/artifact.py +1 -0
  9. mlrun/common/model_monitoring/helpers.py +86 -0
  10. mlrun/common/schemas/__init__.py +3 -0
  11. mlrun/common/schemas/auth.py +2 -0
  12. mlrun/common/schemas/function.py +10 -0
  13. mlrun/common/schemas/hub.py +30 -18
  14. mlrun/common/schemas/model_monitoring/__init__.py +3 -0
  15. mlrun/common/schemas/model_monitoring/constants.py +30 -6
  16. mlrun/common/schemas/model_monitoring/functions.py +14 -5
  17. mlrun/common/schemas/model_monitoring/model_endpoints.py +21 -0
  18. mlrun/common/schemas/pipeline.py +1 -1
  19. mlrun/common/schemas/serving.py +3 -0
  20. mlrun/common/schemas/workflow.py +3 -1
  21. mlrun/common/secrets.py +22 -1
  22. mlrun/config.py +33 -11
  23. mlrun/datastore/__init__.py +11 -3
  24. mlrun/datastore/azure_blob.py +162 -47
  25. mlrun/datastore/datastore.py +9 -4
  26. mlrun/datastore/datastore_profile.py +61 -5
  27. mlrun/datastore/model_provider/huggingface_provider.py +363 -0
  28. mlrun/datastore/model_provider/mock_model_provider.py +87 -0
  29. mlrun/datastore/model_provider/model_provider.py +230 -65
  30. mlrun/datastore/model_provider/openai_provider.py +295 -42
  31. mlrun/datastore/s3.py +24 -2
  32. mlrun/datastore/storeytargets.py +2 -3
  33. mlrun/datastore/utils.py +15 -3
  34. mlrun/db/base.py +47 -19
  35. mlrun/db/httpdb.py +120 -56
  36. mlrun/db/nopdb.py +38 -10
  37. mlrun/execution.py +70 -19
  38. mlrun/hub/__init__.py +15 -0
  39. mlrun/hub/module.py +181 -0
  40. mlrun/k8s_utils.py +105 -16
  41. mlrun/launcher/base.py +13 -6
  42. mlrun/launcher/local.py +15 -0
  43. mlrun/model.py +24 -3
  44. mlrun/model_monitoring/__init__.py +1 -0
  45. mlrun/model_monitoring/api.py +66 -27
  46. mlrun/model_monitoring/applications/__init__.py +1 -1
  47. mlrun/model_monitoring/applications/base.py +509 -117
  48. mlrun/model_monitoring/applications/context.py +2 -4
  49. mlrun/model_monitoring/applications/results.py +4 -7
  50. mlrun/model_monitoring/controller.py +239 -101
  51. mlrun/model_monitoring/db/_schedules.py +116 -33
  52. mlrun/model_monitoring/db/_stats.py +4 -3
  53. mlrun/model_monitoring/db/tsdb/base.py +100 -9
  54. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +11 -6
  55. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +191 -50
  56. mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py +51 -0
  57. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +17 -4
  58. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +259 -40
  59. mlrun/model_monitoring/helpers.py +54 -9
  60. mlrun/model_monitoring/stream_processing.py +45 -14
  61. mlrun/model_monitoring/writer.py +220 -1
  62. mlrun/platforms/__init__.py +3 -2
  63. mlrun/platforms/iguazio.py +7 -3
  64. mlrun/projects/operations.py +6 -1
  65. mlrun/projects/pipelines.py +46 -26
  66. mlrun/projects/project.py +166 -58
  67. mlrun/run.py +94 -17
  68. mlrun/runtimes/__init__.py +18 -0
  69. mlrun/runtimes/base.py +14 -6
  70. mlrun/runtimes/daskjob.py +7 -0
  71. mlrun/runtimes/local.py +5 -2
  72. mlrun/runtimes/mounts.py +20 -2
  73. mlrun/runtimes/mpijob/abstract.py +6 -0
  74. mlrun/runtimes/mpijob/v1.py +6 -0
  75. mlrun/runtimes/nuclio/__init__.py +1 -0
  76. mlrun/runtimes/nuclio/application/application.py +149 -17
  77. mlrun/runtimes/nuclio/function.py +76 -27
  78. mlrun/runtimes/nuclio/serving.py +97 -15
  79. mlrun/runtimes/pod.py +234 -21
  80. mlrun/runtimes/remotesparkjob.py +6 -0
  81. mlrun/runtimes/sparkjob/spark3job.py +6 -0
  82. mlrun/runtimes/utils.py +49 -11
  83. mlrun/secrets.py +54 -13
  84. mlrun/serving/__init__.py +2 -0
  85. mlrun/serving/remote.py +79 -6
  86. mlrun/serving/routers.py +23 -41
  87. mlrun/serving/server.py +320 -80
  88. mlrun/serving/states.py +725 -157
  89. mlrun/serving/steps.py +62 -0
  90. mlrun/serving/system_steps.py +200 -119
  91. mlrun/serving/v2_serving.py +9 -10
  92. mlrun/utils/helpers.py +288 -88
  93. mlrun/utils/logger.py +3 -1
  94. mlrun/utils/notifications/notification/base.py +18 -0
  95. mlrun/utils/notifications/notification/git.py +2 -4
  96. mlrun/utils/notifications/notification/slack.py +2 -4
  97. mlrun/utils/notifications/notification/webhook.py +2 -5
  98. mlrun/utils/notifications/notification_pusher.py +1 -1
  99. mlrun/utils/retryer.py +15 -2
  100. mlrun/utils/version/version.json +2 -2
  101. {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/METADATA +45 -51
  102. {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/RECORD +106 -101
  103. mlrun/api/schemas/__init__.py +0 -259
  104. {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/WHEEL +0 -0
  105. {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/entry_points.txt +0 -0
  106. {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/licenses/LICENSE +0 -0
  107. {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- # Copyright 2023 Iguazio
1
+ # Copyright 2025 Iguazio
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ import contextlib
15
16
  import time
16
17
  from pathlib import Path
17
18
  from typing import Optional
@@ -30,6 +31,40 @@ from .base import DataStore, FileStats, make_datastore_schema_sanitizer
30
31
 
31
32
 
32
33
  class AzureBlobStore(DataStore):
34
+ """
35
+ Azure Blob Storage datastore implementation.
36
+
37
+ Supports multiple URL schemas: az://, wasbs://, wasb://
38
+
39
+ Supported Connection String Formats:
40
+ ====================================
41
+
42
+ 1. Account Key (Standard):
43
+ "DefaultEndpointsProtocol=https;AccountName=<account>;AccountKey=<key>;EndpointSuffix=core.windows.net"
44
+
45
+ 2. SAS Token:
46
+ "BlobEndpoint=https://<account>.blob.core.windows.net/;SharedAccessSignature=<sas_token>"
47
+
48
+ 3. Minimal BlobEndpoint:
49
+ "BlobEndpoint=https://<account>.blob.core.windows.net/;AccountName=<account>;AccountKey=<key>"
50
+
51
+ 4. Custom Domain:
52
+ "BlobEndpoint=https://<account>.mydomain.com/;AccountName=<account>;AccountKey=<key>"
53
+
54
+ 5. China/Government Cloud:
55
+ "DefaultEndpointsProtocol=https;AccountName=<account>;AccountKey=<key>;EndpointSuffix=core.chinacloudapi.cn"
56
+
57
+ 6. Full Service Endpoints with SAS:
58
+ "BlobEndpoint=https://<account>.blob.core.windows.net/;QueueEndpoint=...;SharedAccessSignature=<sas>"
59
+
60
+ Authentication Methods:
61
+ ======================
62
+ - Account Key (connection_string or storage_options)
63
+ - SAS Token (connection_string or storage_options)
64
+ - OAuth/Azure AD (storage_options: client_id, client_secret, tenant_id)
65
+
66
+ """
67
+
33
68
  using_bucket = True
34
69
  max_concurrency = 100
35
70
  max_blocksize = 1024 * 1024 * 4
@@ -40,6 +75,12 @@ class AzureBlobStore(DataStore):
40
75
  def __init__(
41
76
  self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
42
77
  ):
78
+ # Extract container from WASBS endpoint before calling super()
79
+ self._container_from_endpoint = None
80
+ if schema in ["wasbs", "wasb"] and endpoint and "@" in endpoint:
81
+ # Handle container@host format
82
+ self._container_from_endpoint, endpoint = endpoint.split("@", 1)
83
+
43
84
  super().__init__(parent, name, schema, endpoint, secrets=secrets)
44
85
  self._service_client = None
45
86
  self._storage_options = None
@@ -67,6 +108,34 @@ class AzureBlobStore(DataStore):
67
108
  or self._get_secret_or_env("AZURE_STORAGE_SAS_TOKEN"),
68
109
  credential=self._get_secret_or_env("credential"),
69
110
  )
111
+ # Use container extracted from WASBS endpoint during initialization
112
+ if self._container_from_endpoint:
113
+ res["container"] = self._container_from_endpoint
114
+
115
+ # For az:// URLs, endpoint contains the container name
116
+ if not res.get("container") and self.kind in ["az"]:
117
+ if container := getattr(self, "endpoint", None):
118
+ res["container"] = container
119
+
120
+ # Last resort: For wasbs:// without container, check if connection string has BlobEndpoint with container
121
+ if not res.get("container") and self.kind in ["wasbs", "wasb"]:
122
+ connection_string = res.get("connection_string")
123
+ if connection_string and "BlobEndpoint=" in connection_string:
124
+ # Try to extract container from BlobEndpoint URL
125
+ for part in connection_string.split(";"):
126
+ if part.startswith("BlobEndpoint="):
127
+ blob_endpoint = part.split("=", 1)[1]
128
+ # Parse URL to get path component
129
+ from urllib.parse import urlparse
130
+
131
+ parsed = urlparse(blob_endpoint)
132
+ if parsed.path and parsed.path.strip("/"):
133
+ # Extract first path segment as container
134
+ path_parts = parsed.path.strip("/").split("/")
135
+ if path_parts[0]:
136
+ res["container"] = path_parts[0]
137
+ break
138
+
70
139
  self._storage_options = self._sanitize_options(res)
71
140
  return self._storage_options
72
141
 
@@ -165,7 +234,18 @@ class AzureBlobStore(DataStore):
165
234
  # if called without passing dataitem - like in fset.purge_targets,
166
235
  # key will include schema.
167
236
  if not schema:
168
- key = Path(self.endpoint, key).as_posix()
237
+ # For wasbs/wasb, the filesystem is scoped to the container, so we need to use
238
+ # the container name as the base path, not the hostname endpoint.
239
+ # For az://, endpoint already contains the container name.
240
+ if self.kind in ["wasbs", "wasb"]:
241
+ container = self.storage_options.get("container")
242
+ if container:
243
+ key = Path(container, key).as_posix()
244
+ else:
245
+ # If no container found, use endpoint (might be hostname, but better than nothing)
246
+ key = Path(self.endpoint, key).as_posix()
247
+ else:
248
+ key = Path(self.endpoint, key).as_posix()
169
249
  return key
170
250
 
171
251
  def upload(self, key, src_path):
@@ -229,18 +309,27 @@ class AzureBlobStore(DataStore):
229
309
  st = self.storage_options
230
310
  service = "blob"
231
311
  primary_url = None
232
- if st.get("connection_string"):
312
+
313
+ # Parse connection string (fills account_name/account_key or SAS)
314
+ connection_string = st.get("connection_string")
315
+ if connection_string:
233
316
  primary_url, _, parsed_credential = parse_connection_str(
234
- st.get("connection_string"), credential=None, service=service
317
+ connection_string, credential=None, service=service
235
318
  )
236
- for key in ["account_name", "account_key"]:
237
- parsed_value = parsed_credential.get(key)
238
- if parsed_value:
239
- if key in st and st[key] != parsed_value:
319
+
320
+ if isinstance(parsed_credential, str):
321
+ # SharedAccessSignature as raw string
322
+ parsed_credential = {"sas_token": parsed_credential}
323
+
324
+ for key in ["account_name", "account_key", "sas_token"]:
325
+ if parsed_value := parsed_credential.get(key):
326
+ # Only check for conflicts if storage options has a non-empty value for this key
327
+ existing_value = st.get(key)
328
+ if existing_value and existing_value != parsed_value:
240
329
  if key == "account_name":
241
330
  raise mlrun.errors.MLRunInvalidArgumentError(
242
- f"Storage option for '{key}' is '{st[key]}',\
243
- which does not match corresponding connection string '{parsed_value}'"
331
+ f"Storage option for '{key}' is '{existing_value}', "
332
+ f"which does not match corresponding connection string '{parsed_value}'"
244
333
  )
245
334
  else:
246
335
  raise mlrun.errors.MLRunInvalidArgumentError(
@@ -249,57 +338,83 @@ class AzureBlobStore(DataStore):
249
338
  st[key] = parsed_value
250
339
 
251
340
  account_name = st.get("account_name")
341
+ # Derive host (prefer connection string primary URL)
252
342
  if primary_url:
253
343
  if primary_url.startswith("http://"):
254
344
  primary_url = primary_url[len("http://") :]
255
345
  if primary_url.startswith("https://"):
256
346
  primary_url = primary_url[len("https://") :]
257
- host = primary_url
347
+ # Remove any path components from the host
348
+ host = primary_url.split("/")[0]
258
349
  elif account_name:
259
350
  host = f"{account_name}.{service}.core.windows.net"
260
351
  else:
352
+ # nothing to configure yet
261
353
  return res
262
354
 
263
- if "account_key" in st:
355
+ host = host.rstrip("/")
356
+
357
+ # Account key (optional; WASB supports it)
358
+ if "account_key" in st and st["account_key"]:
264
359
  res[f"spark.hadoop.fs.azure.account.key.{host}"] = st["account_key"]
265
360
 
266
- if "client_secret" in st or "client_id" in st or "tenant_id" in st:
267
- res[f"spark.hadoop.fs.azure.account.auth.type.{host}"] = "OAuth"
268
- res[f"spark.hadoop.fs.azure.account.oauth.provider.type.{host}"] = (
269
- "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider"
270
- )
271
- if "client_id" in st:
272
- res[f"spark.hadoop.fs.azure.account.oauth2.client.id.{host}"] = st[
273
- "client_id"
274
- ]
275
- if "client_secret" in st:
276
- res[f"spark.hadoop.fs.azure.account.oauth2.client.secret.{host}"] = st[
277
- "client_secret"
278
- ]
279
- if "tenant_id" in st:
280
- tenant_id = st["tenant_id"]
281
- res[f"spark.hadoop.fs.azure.account.oauth2.client.endpoint.{host}"] = (
282
- f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"
283
- )
361
+ # --- WASB + SAS (container-scoped key; no provider classes needed) ---
362
+ if "sas_token" in st and st["sas_token"]:
363
+ sas = st["sas_token"].lstrip("?")
284
364
 
285
- if "sas_token" in st:
286
- res[f"spark.hadoop.fs.azure.account.auth.type.{host}"] = "SAS"
287
- res[f"spark.hadoop.fs.azure.sas.token.provider.type.{host}"] = (
288
- "org.apache.hadoop.fs.azurebfs.sas.FixedSASTokenProvider"
289
- )
290
- res[f"spark.hadoop.fs.azure.sas.fixed.token.{host}"] = st["sas_token"]
365
+ container = st.get("container")
366
+
367
+ if container:
368
+ # fs.azure.sas.<container>.<account>.blob.core.windows.net = <sas>
369
+ res[f"spark.hadoop.fs.azure.sas.{container}.{host}"] = sas
370
+
371
+ else:
372
+ raise mlrun.errors.MLRunInvalidArgumentError(
373
+ "Container name is required for WASB SAS. "
374
+ "Set self.endpoint or storage_options['container']."
375
+ )
291
376
  return res
292
377
 
293
378
  @property
294
379
  def spark_url(self):
295
- spark_options = self.get_spark_options()
296
- url = f"wasbs://{self.endpoint}"
297
- prefix = "spark.hadoop.fs.azure.account.key."
298
- if spark_options:
299
- for key in spark_options:
300
- if key.startswith(prefix):
301
- account_key = key[len(prefix) :]
302
- if not url.endswith(account_key):
303
- url += f"@{account_key}"
304
- break
305
- return url
380
+ # Build: wasbs://<container>@<host>
381
+ st = self.storage_options
382
+ service = "blob"
383
+
384
+ container = st.get("container")
385
+
386
+ if not container:
387
+ raise mlrun.errors.MLRunInvalidArgumentError(
388
+ "Container name is required to build the WASB URL. "
389
+ "Set storage_options['container'] or use datastore profile with container specified."
390
+ )
391
+
392
+ # Prefer host from connection string; else synthesize from account_name
393
+ host = None
394
+ account_name = st.get("account_name")
395
+ connection_string = st.get("connection_string")
396
+
397
+ if connection_string:
398
+ with contextlib.suppress(Exception):
399
+ primary_url, _, _ = parse_connection_str(
400
+ connection_string, credential=None, service=service
401
+ )
402
+ if primary_url.startswith("http://"):
403
+ primary_url = primary_url[len("http://") :]
404
+ if primary_url.startswith("https://"):
405
+ primary_url = primary_url[len("https://") :]
406
+ # Remove any path components from the host
407
+ host = primary_url.split("/")[0].rstrip("/")
408
+ if not host and account_name:
409
+ host = f"{account_name}.{service}.core.windows.net"
410
+
411
+ # For wasbs:// URLs where endpoint is already the host
412
+ if not host and self.kind in ["wasbs", "wasb"] and hasattr(self, "endpoint"):
413
+ host = getattr(self, "endpoint", None)
414
+
415
+ if not host:
416
+ raise mlrun.errors.MLRunInvalidArgumentError(
417
+ "account_name is required (or provide a connection_string) to build the WASB URL."
418
+ )
419
+
420
+ return f"wasbs://{container}@{host}"
@@ -38,6 +38,8 @@ from ..utils import DB_SCHEMA, RunKeys
38
38
  from .base import DataItem, DataStore, HttpStore
39
39
  from .filestore import FileStore
40
40
  from .inmem import InMemoryStore
41
+ from .model_provider.huggingface_provider import HuggingFaceProvider
42
+ from .model_provider.mock_model_provider import MockModelProvider
41
43
  from .model_provider.openai_provider import OpenAIProvider
42
44
  from .store_resources import get_store_resource, is_store_uri
43
45
  from .v3io import V3ioStore
@@ -102,8 +104,11 @@ def schema_to_store(schema) -> DataStore.__subclasses__():
102
104
  def schema_to_model_provider(
103
105
  schema: str, raise_missing_schema_exception=True
104
106
  ) -> type[ModelProvider]:
105
- # TODO add hugging face and http
106
- schema_dict = {"openai": OpenAIProvider}
107
+ schema_dict = {
108
+ "openai": OpenAIProvider,
109
+ "huggingface": HuggingFaceProvider,
110
+ "mock": MockModelProvider,
111
+ }
107
112
  provider_class = schema_dict.get(schema, None)
108
113
  if not provider_class:
109
114
  if raise_missing_schema_exception:
@@ -247,7 +252,7 @@ class StoreManager:
247
252
 
248
253
  if schema == "ds":
249
254
  datastore_profile = datastore_profile_read(url, project_name, secrets)
250
- secrets = merge(secrets or {}, datastore_profile.secrets() or {})
255
+ secrets = merge({}, secrets or {}, datastore_profile.secrets() or {})
251
256
  url = datastore_profile.url(subpath)
252
257
  schema, endpoint, parsed_url = parse_url(url)
253
258
  subpath = parsed_url.path
@@ -281,7 +286,7 @@ class StoreManager:
281
286
  endpoint, subpath
282
287
  )
283
288
  remote_client = remote_client_class(
284
- self, schema, cache_key, parsed_url.netloc, secrets=secrets, **kwargs
289
+ self, schema, cache_key, endpoint, secrets=secrets, **kwargs
285
290
  )
286
291
  if not secrets and not mlrun.config.is_running_as_api():
287
292
  cache[cache_key] = remote_client
@@ -19,6 +19,7 @@ import typing
19
19
  from urllib.parse import ParseResult, urlparse
20
20
 
21
21
  import pydantic.v1
22
+ from deprecated import deprecated
22
23
  from mergedeep import merge
23
24
 
24
25
  import mlrun
@@ -138,6 +139,15 @@ class ConfigProfile(DatastoreProfile):
138
139
  return res
139
140
 
140
141
 
142
+ # TODO: Remove in 1.12.0
143
+ @deprecated(
144
+ version="1.10.0",
145
+ reason=(
146
+ "This class is deprecated from mlrun 1.10.0, and will be removed in 1.12.0. "
147
+ "Use `DatastoreProfileKafkaStream` instead."
148
+ ),
149
+ category=FutureWarning,
150
+ )
141
151
  class DatastoreProfileKafkaTarget(DatastoreProfile):
142
152
  type: str = pydantic.v1.Field("kafka_target")
143
153
  _private_attributes = "kwargs_private"
@@ -158,8 +168,8 @@ class DatastoreProfileKafkaTarget(DatastoreProfile):
158
168
  return attributes
159
169
 
160
170
 
161
- class DatastoreProfileKafkaSource(DatastoreProfile):
162
- type: str = pydantic.v1.Field("kafka_source")
171
+ class DatastoreProfileKafkaStream(DatastoreProfile):
172
+ type: str = pydantic.v1.Field("kafka_stream")
163
173
  _private_attributes = ("kwargs_private", "sasl_user", "sasl_pass")
164
174
  brokers: typing.Union[str, list[str]]
165
175
  topics: typing.Union[str, list[str]]
@@ -198,6 +208,19 @@ class DatastoreProfileKafkaSource(DatastoreProfile):
198
208
  return attributes
199
209
 
200
210
 
211
+ # TODO: Remove in 1.12.0
212
+ @deprecated(
213
+ version="1.10.0",
214
+ reason=(
215
+ "This class is deprecated from mlrun 1.10.0, and will be removed in 1.12.0. "
216
+ "Use `DatastoreProfileKafkaStream` instead."
217
+ ),
218
+ category=FutureWarning,
219
+ )
220
+ class DatastoreProfileKafkaSource(DatastoreProfileKafkaStream):
221
+ type: str = pydantic.v1.Field("kafka_source")
222
+
223
+
201
224
  class DatastoreProfileV3io(DatastoreProfile):
202
225
  type: str = pydantic.v1.Field("v3io")
203
226
  v3io_access_key: typing.Optional[str] = None
@@ -232,7 +255,7 @@ class DatastoreProfileS3(DatastoreProfile):
232
255
  if self.secret_key:
233
256
  res["AWS_SECRET_ACCESS_KEY"] = self.secret_key
234
257
  if self.endpoint_url:
235
- res["S3_ENDPOINT_URL"] = self.endpoint_url
258
+ res["AWS_ENDPOINT_URL_S3"] = self.endpoint_url
236
259
  if self.force_non_anonymous:
237
260
  res["S3_NON_ANONYMOUS"] = self.force_non_anonymous
238
261
  if self.profile_name:
@@ -333,7 +356,9 @@ class DatastoreProfileGCS(DatastoreProfile):
333
356
  # in gcs the path after schema is starts with bucket, wherefore it should not start with "/".
334
357
  subpath = subpath[1:]
335
358
  if self.bucket:
336
- return f"gcs://{self.bucket}/{subpath}"
359
+ return (
360
+ f"gcs://{self.bucket}/{subpath}" if subpath else f"gcs://{self.bucket}"
361
+ )
337
362
  else:
338
363
  return f"gcs://{subpath}"
339
364
 
@@ -370,7 +395,11 @@ class DatastoreProfileAzureBlob(DatastoreProfile):
370
395
  # in azure the path after schema is starts with container, wherefore it should not start with "/".
371
396
  subpath = subpath[1:]
372
397
  if self.container:
373
- return f"az://{self.container}/{subpath}"
398
+ return (
399
+ f"az://{self.container}/{subpath}"
400
+ if subpath
401
+ else f"az://{self.container}"
402
+ )
374
403
  else:
375
404
  return f"az://{subpath}"
376
405
 
@@ -486,6 +515,31 @@ class OpenAIProfile(DatastoreProfile):
486
515
  return f"{self.type}://{subpath.lstrip('/')}"
487
516
 
488
517
 
518
+ class HuggingFaceProfile(DatastoreProfile):
519
+ type: str = pydantic.v1.Field("huggingface")
520
+ _private_attributes = ("token", "model_kwargs")
521
+ task: typing.Optional[str] = None
522
+ token: typing.Optional[str] = None
523
+ device: typing.Optional[typing.Union[int, str]] = None
524
+ device_map: typing.Union[str, dict[str, typing.Union[int, str]], None] = None
525
+ trust_remote_code: bool = None
526
+ model_kwargs: typing.Optional[dict[str, typing.Any]] = None
527
+
528
+ def secrets(self) -> dict:
529
+ keys = {
530
+ "HF_TASK": self.task,
531
+ "HF_TOKEN": self.token,
532
+ "HF_DEVICE": self.device,
533
+ "HF_DEVICE_MAP": self.device_map,
534
+ "HF_TRUST_REMOTE_CODE": self.trust_remote_code,
535
+ "HF_MODEL_KWARGS": self.model_kwargs,
536
+ }
537
+ return {k: v for k, v in keys.items() if v}
538
+
539
+ def url(self, subpath):
540
+ return f"{self.type}://{subpath.lstrip('/')}"
541
+
542
+
489
543
  _DATASTORE_TYPE_TO_PROFILE_CLASS: dict[str, type[DatastoreProfile]] = {
490
544
  "v3io": DatastoreProfileV3io,
491
545
  "s3": DatastoreProfileS3,
@@ -493,6 +547,7 @@ _DATASTORE_TYPE_TO_PROFILE_CLASS: dict[str, type[DatastoreProfile]] = {
493
547
  "basic": DatastoreProfileBasic,
494
548
  "kafka_target": DatastoreProfileKafkaTarget,
495
549
  "kafka_source": DatastoreProfileKafkaSource,
550
+ "kafka_stream": DatastoreProfileKafkaStream,
496
551
  "dbfs": DatastoreProfileDBFS,
497
552
  "gcs": DatastoreProfileGCS,
498
553
  "az": DatastoreProfileAzureBlob,
@@ -500,6 +555,7 @@ _DATASTORE_TYPE_TO_PROFILE_CLASS: dict[str, type[DatastoreProfile]] = {
500
555
  "taosws": DatastoreProfileTDEngine,
501
556
  "config": ConfigProfile,
502
557
  "openai": OpenAIProfile,
558
+ "huggingface": HuggingFaceProfile,
503
559
  }
504
560
 
505
561