PyPI - mlrun - Versions diffs - 1.10.0rc18__py3-none-any.whl → 1.11.0rc16__py3-none-any.whl - Mend

mlrun 1.10.0rc18py3-none-any.whl → 1.11.0rc16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (167) hide show

mlrun/__init__.py +24 -3
mlrun/__main__.py +0 -4
mlrun/artifacts/dataset.py +2 -2
mlrun/artifacts/document.py +6 -1
mlrun/artifacts/llm_prompt.py +21 -15
mlrun/artifacts/model.py +3 -3
mlrun/artifacts/plots.py +1 -1
mlrun/{model_monitoring/db/tsdb/tdengine → auth}/__init__.py +2 -3
mlrun/auth/nuclio.py +89 -0
mlrun/auth/providers.py +429 -0
mlrun/auth/utils.py +415 -0
mlrun/common/constants.py +14 -0
mlrun/common/model_monitoring/helpers.py +123 -0
mlrun/common/runtimes/constants.py +28 -0
mlrun/common/schemas/__init__.py +14 -3
mlrun/common/schemas/alert.py +2 -2
mlrun/common/schemas/api_gateway.py +3 -0
mlrun/common/schemas/auth.py +12 -10
mlrun/common/schemas/client_spec.py +4 -0
mlrun/common/schemas/constants.py +25 -0
mlrun/common/schemas/frontend_spec.py +1 -8
mlrun/common/schemas/function.py +34 -0
mlrun/common/schemas/hub.py +33 -20
mlrun/common/schemas/model_monitoring/__init__.py +2 -1
mlrun/common/schemas/model_monitoring/constants.py +12 -15
mlrun/common/schemas/model_monitoring/functions.py +13 -4
mlrun/common/schemas/model_monitoring/model_endpoints.py +11 -0
mlrun/common/schemas/pipeline.py +1 -1
mlrun/common/schemas/secret.py +17 -2
mlrun/common/secrets.py +95 -1
mlrun/common/types.py +10 -10
mlrun/config.py +69 -19
mlrun/data_types/infer.py +2 -2
mlrun/datastore/__init__.py +12 -5
mlrun/datastore/azure_blob.py +162 -47
mlrun/datastore/base.py +274 -10
mlrun/datastore/datastore.py +7 -2
mlrun/datastore/datastore_profile.py +84 -22
mlrun/datastore/model_provider/huggingface_provider.py +225 -41
mlrun/datastore/model_provider/mock_model_provider.py +87 -0
mlrun/datastore/model_provider/model_provider.py +206 -74
mlrun/datastore/model_provider/openai_provider.py +226 -66
mlrun/datastore/s3.py +39 -18
mlrun/datastore/sources.py +1 -1
mlrun/datastore/store_resources.py +4 -4
mlrun/datastore/storeytargets.py +17 -12
mlrun/datastore/targets.py +1 -1
mlrun/datastore/utils.py +25 -6
mlrun/datastore/v3io.py +1 -1
mlrun/db/base.py +63 -32
mlrun/db/httpdb.py +373 -153
mlrun/db/nopdb.py +54 -21
mlrun/errors.py +4 -2
mlrun/execution.py +66 -25
mlrun/feature_store/api.py +1 -1
mlrun/feature_store/common.py +1 -1
mlrun/feature_store/feature_vector_utils.py +1 -1
mlrun/feature_store/steps.py +8 -6
mlrun/frameworks/_common/utils.py +3 -3
mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -1
mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
mlrun/frameworks/_ml_common/utils.py +2 -1
mlrun/frameworks/auto_mlrun/auto_mlrun.py +4 -3
mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +2 -1
mlrun/frameworks/onnx/dataset.py +2 -1
mlrun/frameworks/onnx/mlrun_interface.py +2 -1
mlrun/frameworks/pytorch/callbacks/logging_callback.py +5 -4
mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +2 -1
mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +2 -1
mlrun/frameworks/pytorch/utils.py +2 -1
mlrun/frameworks/sklearn/metric.py +2 -1
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +5 -4
mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +2 -1
mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +2 -1
mlrun/hub/__init__.py +52 -0
mlrun/hub/base.py +142 -0
mlrun/hub/module.py +172 -0
mlrun/hub/step.py +113 -0
mlrun/k8s_utils.py +105 -16
mlrun/launcher/base.py +15 -7
mlrun/launcher/local.py +4 -1
mlrun/model.py +14 -4
mlrun/model_monitoring/__init__.py +0 -1
mlrun/model_monitoring/api.py +65 -28
mlrun/model_monitoring/applications/__init__.py +1 -1
mlrun/model_monitoring/applications/base.py +299 -128
mlrun/model_monitoring/applications/context.py +2 -4
mlrun/model_monitoring/controller.py +132 -58
mlrun/model_monitoring/db/_schedules.py +38 -29
mlrun/model_monitoring/db/_stats.py +6 -16
mlrun/model_monitoring/db/tsdb/__init__.py +9 -7
mlrun/model_monitoring/db/tsdb/base.py +29 -9
mlrun/model_monitoring/db/tsdb/preaggregate.py +234 -0
mlrun/model_monitoring/db/tsdb/stream_graph_steps.py +63 -0
mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_metrics_queries.py +414 -0
mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_predictions_queries.py +376 -0
mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_results_queries.py +590 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connection.py +434 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connector.py +541 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_operations.py +808 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_schema.py +502 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream.py +163 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream_graph_steps.py +60 -0
mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_dataframe_processor.py +141 -0
mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_query_builder.py +585 -0
mlrun/model_monitoring/db/tsdb/timescaledb/writer_graph_steps.py +73 -0
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +20 -9
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +235 -51
mlrun/model_monitoring/features_drift_table.py +2 -1
mlrun/model_monitoring/helpers.py +30 -6
mlrun/model_monitoring/stream_processing.py +34 -28
mlrun/model_monitoring/writer.py +224 -4
mlrun/package/__init__.py +2 -1
mlrun/platforms/__init__.py +0 -43
mlrun/platforms/iguazio.py +8 -4
mlrun/projects/operations.py +17 -11
mlrun/projects/pipelines.py +2 -2
mlrun/projects/project.py +187 -123
mlrun/run.py +95 -21
mlrun/runtimes/__init__.py +2 -186
mlrun/runtimes/base.py +103 -25
mlrun/runtimes/constants.py +225 -0
mlrun/runtimes/daskjob.py +5 -2
mlrun/runtimes/databricks_job/databricks_runtime.py +2 -1
mlrun/runtimes/local.py +5 -2
mlrun/runtimes/mounts.py +20 -2
mlrun/runtimes/nuclio/__init__.py +12 -7
mlrun/runtimes/nuclio/api_gateway.py +36 -6
mlrun/runtimes/nuclio/application/application.py +339 -40
mlrun/runtimes/nuclio/function.py +222 -72
mlrun/runtimes/nuclio/serving.py +132 -42
mlrun/runtimes/pod.py +213 -21
mlrun/runtimes/utils.py +49 -9
mlrun/secrets.py +99 -14
mlrun/serving/__init__.py +2 -0
mlrun/serving/remote.py +84 -11
mlrun/serving/routers.py +26 -44
mlrun/serving/server.py +138 -51
mlrun/serving/serving_wrapper.py +6 -2
mlrun/serving/states.py +997 -283
mlrun/serving/steps.py +62 -0
mlrun/serving/system_steps.py +149 -95
mlrun/serving/v2_serving.py +9 -10
mlrun/track/trackers/mlflow_tracker.py +29 -31
mlrun/utils/helpers.py +292 -94
mlrun/utils/http.py +9 -2
mlrun/utils/notifications/notification/base.py +18 -0
mlrun/utils/notifications/notification/git.py +3 -5
mlrun/utils/notifications/notification/mail.py +39 -16
mlrun/utils/notifications/notification/slack.py +2 -4
mlrun/utils/notifications/notification/webhook.py +2 -5
mlrun/utils/notifications/notification_pusher.py +3 -3
mlrun/utils/version/version.json +2 -2
mlrun/utils/version/version.py +3 -4
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/METADATA +63 -74
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/RECORD +161 -143
mlrun/api/schemas/__init__.py +0 -259
mlrun/db/auth_utils.py +0 -152
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +0 -344
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -75
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +0 -281
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +0 -1266
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/WHEEL +0 -0
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/top_level.txt +0 -0

mlrun/config.py CHANGED Viewed

@@ -40,6 +40,7 @@ import yaml
 import mlrun.common.constants
 import mlrun.common.schemas
+import mlrun.common.types
 import mlrun.errors
 env_prefix = "MLRUN_"
@@ -66,7 +67,6 @@ default_config = {
     "nuclio_version": "",
     "default_nuclio_runtime": "python:3.11",
     "nest_asyncio_enabled": "",  # enable import of nest_asyncio for corner cases with old jupyter, set "1"
-    "ui_url": "",  # remote/external mlrun UI url (for hyperlinks) (This is deprecated in favor of the ui block)
     "remote_host": "",
     "api_base_version": "v1",
     "version": "",  # will be set to current version
@@ -85,7 +85,9 @@ default_config = {
     "kfp_image": "mlrun/mlrun-kfp",  # image to use for KFP runner
     "dask_kfp_image": "mlrun/mlrun",  # image to use for dask KFP runner
     "igz_version": "",  # the version of the iguazio system the API is running on
-    "iguazio_api_url": "",  # the url to iguazio api
+    "iguazio_api_url": "",  # the url to iguazio api (internal / external access with priority to internal)
+    "iguazio_api_url_ingress": "",  # the url to iguazio api ingress (for external access)
+    "iguazio_api_ssl_verify": True,  # verify ssl certificate of iguazio api
     "spark_app_image": "",  # image to use for spark operator app runtime
     "spark_app_image_tag": "",  # image tag to use for spark operator app runtime
     "spark_history_server_path": "",  # spark logs directory for spark history server
@@ -107,7 +109,11 @@ default_config = {
     "submit_timeout": "280",  # timeout when submitting a new k8s resource
     # runtimes cleanup interval in seconds
     "runtimes_cleanup_interval": "300",
-    "background_task_cleanup_interval": "86400",  # 24 hours in seconds
+    # disabled by default due to an internal bug in serving functions
+    # relying on a background task to hold the status for its model endpoints
+    # TODO: need to refine what/when we can delete the background tasks
+    # e.g: use labels or naming convention.
+    "background_task_cleanup_interval": "0",
     "background_task_max_age": "21600",  # 6 hours in seconds
     "monitoring": {
         "runs": {
@@ -251,7 +257,8 @@ default_config = {
             },
             "runtimes": {
                 "dask": "600",
-                "dask_cluster_start": "300",
+                # cluster start might take some time in case k8s needs to spin up new nodes
+                "dask_cluster_start": "600",
             },
             "push_notifications": "60",
         },
@@ -299,6 +306,7 @@ default_config = {
         "application": {
             "default_sidecar_internal_port": 8050,
             "default_authentication_mode": mlrun.common.schemas.APIGatewayAuthenticationMode.none,
+            "default_worker_number": 10000,
         },
     },
     # TODO: function defaults should be moved to the function spec config above
@@ -416,11 +424,17 @@ default_config = {
             "allow_local_run": False,
         },
         "authentication": {
-            "mode": "none",  # one of none, basic, bearer, iguazio
+            "mode": "none",  # one of none, basic, bearer, iguazio, iguazio-v4
             "basic": {"username": "", "password": ""},
             "bearer": {"token": ""},
             "iguazio": {
                 "session_verification_endpoint": "data_sessions/verifications/app_service",
+                "authentication_endpoint": "api/v1/authentication/refresh-access-token",
+            },
+            "service_account": {
+                # the following are the default values for k8s service accounts, but may be changed per deployment
+                "token_expiration_seconds": 600,
+                "token_path": "/var/run/secrets/kubernetes.io/serviceaccount/token",
             },
         },
         "nuclio": {
@@ -475,6 +489,10 @@ default_config = {
         },
         "authorization": {
             "mode": "none",  # one of none, opa
+            "namespaces": {
+                "resources": "",
+                "mgmt": "mgmt",
+            },
             "opa": {
                 "address": "",
                 "request_timeout": 10,
@@ -644,6 +662,13 @@ default_config = {
                 "max_replicas": 1,
             },
         },
+        "writer_graph": {
+            "max_events": 1000,
+            "flush_after_seconds": 30,
+            "writer_version": "v2",  # v1 is the sync version while v2 is async
+            "parquet_batching_max_events": 10,
+            "parquet_batching_timeout_secs": 30,
+        },
         # Store prefixes are used to handle model monitoring storing policies based on project and kind, such as events,
         # stream, and endpoints.
         "store_prefixes": {
@@ -657,6 +682,15 @@ default_config = {
         "parquet_batching_max_events": 10_000,
         "parquet_batching_timeout_secs": timedelta(minutes=1).total_seconds(),
         "model_endpoint_creation_check_period": 15,
+        # TSDB (TimescaleDB) configuration
+        "tsdb": {
+            # When True, automatically create/generate database name using system_id if not explicitly
+            # specified in the connection string. When False, use the database from connection string as-is.
+            "auto_create_database": True,
+            # Connection pool timeout in seconds. This is the maximum time to wait for a connection
+            # from the pool before raising an error.
+            "connection_pool_timeout": 120,
+        },
     },
     "secret_stores": {
         # Use only in testing scenarios (such as integration tests) to avoid using k8s for secrets (will use in-memory
@@ -712,9 +746,8 @@ default_config = {
             # Set false to avoid creating a global source (for example in a dark site)
             "create": True,
             "name": "default",
-            "description": "MLRun global function hub",
+            "description": "MLRun hub",
             "url": "https://mlrun.github.io/marketplace",
-            "object_type": "functions",
             "channel": "master",
         },
     },
@@ -856,6 +889,19 @@ default_config = {
         "enabled": False,
         "request_timeout": 5,
     },
+    "auth_with_oauth_token": {
+        "enabled": False,
+        "request_timeout": 5,
+        "refresh_threshold": 0.75,
+        # Default is empty. automatically set based on configuration (end client vs jupyter vs runtime, etc)
+        # can be set manually set using envvars
+        "token_file": "",
+        # Default is empty because if set, searches for the specific token name in the file, if empty, it will look
+        # for a token named "default", if "default" does not exist, it will use the first token in the file
+        "token_name": "",
+    },
+    # a runtime computed value. Do not set it manually.
+    "auth_token_endpoint": "",
     "services": {
         # The running service name. One of: "api", "alerts"
         "service_name": "api",
@@ -953,7 +999,7 @@ class Config:
                     try:
                         config_value.update(value)
                     except AttributeError as exc:
-                        if not isinstance(config_value, (dict, Config)):
+                        if not isinstance(config_value, dict | Config):
                             raise ValueError(
                                 f"Can not update `{key}` config. "
                                 f"Expected a configuration but received {type(value)}"
@@ -996,9 +1042,9 @@ class Config:
         )
     @staticmethod
-    def get_default_hub_source() -> str:
+    def get_default_hub_source_url_prefix(object_type) -> str:
         default_source = config.hub.default_source
-        return f"{default_source.url}/{default_source.object_type}/{default_source.channel}/"
+        return f"{default_source.url}/{object_type}/{default_source.channel}/"
     @staticmethod
     def decode_base64_config_and_load_to_object(
@@ -1268,10 +1314,7 @@ class Config:
     @staticmethod
     def resolve_ui_url():
-        # ui_url is deprecated in favor of the ui.url (we created the ui block)
-        # since the config class is used in a "recursive" way, we can't use property like we used in other places
-        # since the property will need to be url, which exists in other structs as well
-        return config.ui.url or config.ui_url
+        return config.ui.url
     def is_api_running_on_k8s(self):
         # determine if the API service is attached to K8s cluster
@@ -1391,6 +1434,18 @@ class Config:
             ver in mlrun.mlconf.ce.mode for ver in ["lite", "full"]
         )
+    def is_iguazio_mode(self):
+        return (
+            mlrun.mlconf.httpdb.authentication.mode
+            == mlrun.common.types.AuthenticationMode.IGUAZIO
+        )
+    def is_iguazio_v4_mode(self):
+        return (
+            config.httpdb.authentication.mode
+            == mlrun.common.types.AuthenticationMode.IGUAZIO_V4
+        )
     def is_explicit_ack_enabled(self) -> bool:
         return self.httpdb.nuclio.explicit_ack == "enabled" and (
             not self.nuclio_version
@@ -1558,7 +1613,6 @@ def read_env(env=None, prefix=env_prefix):
             "https://mlrun-api.", "https://framesd."
         )
-    uisvc = env.get("MLRUN_UI_SERVICE_HOST")
     igz_domain = env.get("IGZ_NAMESPACE_DOMAIN")
     # workaround to try and detect IGZ domain
@@ -1584,10 +1638,6 @@ def read_env(env=None, prefix=env_prefix):
     if config.get("nuclio_dashboard_url") == "disabled":
         config["nuclio_dashboard_url"] = ""
-    if uisvc and not config.get("ui_url"):
-        if igz_domain:
-            config["ui_url"] = f"https://mlrun-ui.{igz_domain}"
     if log_level := config.get("log_level"):
         import mlrun.utils.logger

mlrun/data_types/infer.py CHANGED Viewed

@@ -134,9 +134,9 @@ def get_df_stats(df, options, num_bins=None, sample_size=None):
     for col, values in df.describe(include="all", **kwargs).items():
         stats_dict = {}
         for stat, val in values.dropna().items():
-            if isinstance(val, (float, np.floating, np.float64)):
+            if isinstance(val, float | np.floating | np.float64):
                 stats_dict[stat] = float(val)
-            elif isinstance(val, (int, np.integer, np.int64)):
+            elif isinstance(val, int | np.integer | np.int64):
                 # boolean values are considered subclass of int
                 if isinstance(val, bool):
                     stats_dict[stat] = bool(val)

mlrun/datastore/__init__.py CHANGED Viewed

@@ -39,10 +39,11 @@ __all__ = [
 from urllib.parse import urlparse
 import fsspec
+import storey
 import mlrun.datastore.wasbfs
 from mlrun.datastore.datastore_profile import (
-    DatastoreProfileKafkaSource,
+    DatastoreProfileKafkaStream,
     DatastoreProfileKafkaTarget,
     DatastoreProfileV3io,
 )
@@ -58,7 +59,6 @@ from ..utils import logger
 from .base import DataItem
 from .datastore import StoreManager, in_memory_store, uri_to_ipython
 from .dbfs_store import DatabricksFileBugFixed, DatabricksFileSystemDisableCache
-from .s3 import parse_s3_bucket_and_key
 from .sources import (
     BigQuerySource,
     CSVSource,
@@ -74,7 +74,7 @@ from .store_resources import (
     parse_store_uri,
 )
 from .targets import CSVTarget, NoSqlTarget, ParquetTarget, StreamTarget
-from .utils import get_kafka_brokers_from_dict, parse_kafka_url
+from .utils import get_kafka_brokers_from_dict, parse_kafka_url, parse_s3_bucket_and_key
 store_manager = StoreManager()
@@ -122,7 +122,7 @@ def get_stream_pusher(stream_path: str, **kwargs):
         )
         if isinstance(
             datastore_profile,
-            (DatastoreProfileKafkaSource, DatastoreProfileKafkaTarget),
+            DatastoreProfileKafkaStream | DatastoreProfileKafkaTarget,
         ):
             attributes = datastore_profile.attributes()
             brokers = attributes.pop("brokers", None)
@@ -168,11 +168,12 @@ def get_stream_pusher(stream_path: str, **kwargs):
             raise ValueError(f"unsupported stream path {stream_path}")
-class _DummyStream:
+class _DummyStream(storey.MapClass):
     """stream emulator for tests and debug"""
     def __init__(self, event_list=None, **kwargs):
         self.event_list = event_list or []
+        super().__init__(**kwargs)
     def push(self, data, **kwargs):
         if not isinstance(data, list):
@@ -180,3 +181,9 @@ class _DummyStream:
         for item in data:
             logger.info(f"dummy stream got event: {item}, kwargs={kwargs}")
             self.event_list.append(item)
+    def do(self, event):
+        if not isinstance(event, list):
+            event = [event]
+        for item in event:
+            self.event_list.append(item)

mlrun/datastore/azure_blob.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright 2023 Iguazio
+# Copyright 2025 Iguazio
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import contextlib
 import time
 from pathlib import Path
 from typing import Optional
@@ -30,6 +31,40 @@ from .base import DataStore, FileStats, make_datastore_schema_sanitizer
 class AzureBlobStore(DataStore):
+    """
+    Azure Blob Storage datastore implementation.
+    Supports multiple URL schemas: az://, wasbs://, wasb://
+    Supported Connection String Formats:
+    ====================================
+    1. Account Key (Standard):
+       "DefaultEndpointsProtocol=https;AccountName=<account>;AccountKey=<key>;EndpointSuffix=core.windows.net"
+    2. SAS Token:
+       "BlobEndpoint=https://<account>.blob.core.windows.net/;SharedAccessSignature=<sas_token>"
+    3. Minimal BlobEndpoint:
+       "BlobEndpoint=https://<account>.blob.core.windows.net/;AccountName=<account>;AccountKey=<key>"
+    4. Custom Domain:
+       "BlobEndpoint=https://<account>.mydomain.com/;AccountName=<account>;AccountKey=<key>"
+    5. China/Government Cloud:
+       "DefaultEndpointsProtocol=https;AccountName=<account>;AccountKey=<key>;EndpointSuffix=core.chinacloudapi.cn"
+    6. Full Service Endpoints with SAS:
+       "BlobEndpoint=https://<account>.blob.core.windows.net/;QueueEndpoint=...;SharedAccessSignature=<sas>"
+    Authentication Methods:
+    ======================
+    - Account Key (connection_string or storage_options)
+    - SAS Token (connection_string or storage_options)
+    - OAuth/Azure AD (storage_options: client_id, client_secret, tenant_id)
+    """
     using_bucket = True
     max_concurrency = 100
     max_blocksize = 1024 * 1024 * 4
@@ -40,6 +75,12 @@ class AzureBlobStore(DataStore):
     def __init__(
         self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
     ):
+        # Extract container from WASBS endpoint before calling super()
+        self._container_from_endpoint = None
+        if schema in ["wasbs", "wasb"] and endpoint and "@" in endpoint:
+            # Handle container@host format
+            self._container_from_endpoint, endpoint = endpoint.split("@", 1)
         super().__init__(parent, name, schema, endpoint, secrets=secrets)
         self._service_client = None
         self._storage_options = None
@@ -67,6 +108,34 @@ class AzureBlobStore(DataStore):
                 or self._get_secret_or_env("AZURE_STORAGE_SAS_TOKEN"),
                 credential=self._get_secret_or_env("credential"),
             )
+            # Use container extracted from WASBS endpoint during initialization
+            if self._container_from_endpoint:
+                res["container"] = self._container_from_endpoint
+            # For az:// URLs, endpoint contains the container name
+            if not res.get("container") and self.kind in ["az"]:
+                if container := getattr(self, "endpoint", None):
+                    res["container"] = container
+            # Last resort: For wasbs:// without container, check if connection string has BlobEndpoint with container
+            if not res.get("container") and self.kind in ["wasbs", "wasb"]:
+                connection_string = res.get("connection_string")
+                if connection_string and "BlobEndpoint=" in connection_string:
+                    # Try to extract container from BlobEndpoint URL
+                    for part in connection_string.split(";"):
+                        if part.startswith("BlobEndpoint="):
+                            blob_endpoint = part.split("=", 1)[1]
+                            # Parse URL to get path component
+                            from urllib.parse import urlparse
+                            parsed = urlparse(blob_endpoint)
+                            if parsed.path and parsed.path.strip("/"):
+                                # Extract first path segment as container
+                                path_parts = parsed.path.strip("/").split("/")
+                                if path_parts[0]:
+                                    res["container"] = path_parts[0]
+                                    break
             self._storage_options = self._sanitize_options(res)
         return self._storage_options
@@ -165,7 +234,18 @@ class AzureBlobStore(DataStore):
         #  if called without passing dataitem - like in fset.purge_targets,
         #  key will include schema.
         if not schema:
-            key = Path(self.endpoint, key).as_posix()
+            # For wasbs/wasb, the filesystem is scoped to the container, so we need to use
+            # the container name as the base path, not the hostname endpoint.
+            # For az://, endpoint already contains the container name.
+            if self.kind in ["wasbs", "wasb"]:
+                container = self.storage_options.get("container")
+                if container:
+                    key = Path(container, key).as_posix()
+                else:
+                    # If no container found, use endpoint (might be hostname, but better than nothing)
+                    key = Path(self.endpoint, key).as_posix()
+            else:
+                key = Path(self.endpoint, key).as_posix()
         return key
     def upload(self, key, src_path):
@@ -229,18 +309,27 @@ class AzureBlobStore(DataStore):
         st = self.storage_options
         service = "blob"
         primary_url = None
-        if st.get("connection_string"):
+        # Parse connection string (fills account_name/account_key or SAS)
+        connection_string = st.get("connection_string")
+        if connection_string:
             primary_url, _, parsed_credential = parse_connection_str(
-                st.get("connection_string"), credential=None, service=service
+                connection_string, credential=None, service=service
             )
-            for key in ["account_name", "account_key"]:
-                parsed_value = parsed_credential.get(key)
-                if parsed_value:
-                    if key in st and st[key] != parsed_value:
+            if isinstance(parsed_credential, str):
+                # SharedAccessSignature as raw string
+                parsed_credential = {"sas_token": parsed_credential}
+            for key in ["account_name", "account_key", "sas_token"]:
+                if parsed_value := parsed_credential.get(key):
+                    # Only check for conflicts if storage options has a non-empty value for this key
+                    existing_value = st.get(key)
+                    if existing_value and existing_value != parsed_value:
                         if key == "account_name":
                             raise mlrun.errors.MLRunInvalidArgumentError(
-                                f"Storage option for '{key}' is '{st[key]}',\
-                                    which does not match corresponding connection string '{parsed_value}'"
+                                f"Storage option for '{key}' is '{existing_value}', "
+                                f"which does not match corresponding connection string '{parsed_value}'"
                             )
                         else:
                             raise mlrun.errors.MLRunInvalidArgumentError(
@@ -249,57 +338,83 @@ class AzureBlobStore(DataStore):
                     st[key] = parsed_value
         account_name = st.get("account_name")
+        # Derive host (prefer connection string primary URL)
         if primary_url:
             if primary_url.startswith("http://"):
                 primary_url = primary_url[len("http://") :]
             if primary_url.startswith("https://"):
                 primary_url = primary_url[len("https://") :]
-            host = primary_url
+            # Remove any path components from the host
+            host = primary_url.split("/")[0]
         elif account_name:
             host = f"{account_name}.{service}.core.windows.net"
         else:
+            # nothing to configure yet
             return res
-        if "account_key" in st:
+        host = host.rstrip("/")
+        # Account key (optional; WASB supports it)
+        if "account_key" in st and st["account_key"]:
             res[f"spark.hadoop.fs.azure.account.key.{host}"] = st["account_key"]
-        if "client_secret" in st or "client_id" in st or "tenant_id" in st:
-            res[f"spark.hadoop.fs.azure.account.auth.type.{host}"] = "OAuth"
-            res[f"spark.hadoop.fs.azure.account.oauth.provider.type.{host}"] = (
-                "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider"
-            )
-            if "client_id" in st:
-                res[f"spark.hadoop.fs.azure.account.oauth2.client.id.{host}"] = st[
-                    "client_id"
-                ]
-            if "client_secret" in st:
-                res[f"spark.hadoop.fs.azure.account.oauth2.client.secret.{host}"] = st[
-                    "client_secret"
-                ]
-            if "tenant_id" in st:
-                tenant_id = st["tenant_id"]
-                res[f"spark.hadoop.fs.azure.account.oauth2.client.endpoint.{host}"] = (
-                    f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"
-                )
+        # --- WASB + SAS (container-scoped key; no provider classes needed) ---
+        if "sas_token" in st and st["sas_token"]:
+            sas = st["sas_token"].lstrip("?")
-        if "sas_token" in st:
-            res[f"spark.hadoop.fs.azure.account.auth.type.{host}"] = "SAS"
-            res[f"spark.hadoop.fs.azure.sas.token.provider.type.{host}"] = (
-                "org.apache.hadoop.fs.azurebfs.sas.FixedSASTokenProvider"
-            )
-            res[f"spark.hadoop.fs.azure.sas.fixed.token.{host}"] = st["sas_token"]
+            container = st.get("container")
+            if container:
+                # fs.azure.sas.<container>.<account>.blob.core.windows.net = <sas>
+                res[f"spark.hadoop.fs.azure.sas.{container}.{host}"] = sas
+            else:
+                raise mlrun.errors.MLRunInvalidArgumentError(
+                    "Container name is required for WASB SAS. "
+                    "Set self.endpoint or storage_options['container']."
+                )
         return res
     @property
     def spark_url(self):
-        spark_options = self.get_spark_options()
-        url = f"wasbs://{self.endpoint}"
-        prefix = "spark.hadoop.fs.azure.account.key."
-        if spark_options:
-            for key in spark_options:
-                if key.startswith(prefix):
-                    account_key = key[len(prefix) :]
-                    if not url.endswith(account_key):
-                        url += f"@{account_key}"
-                    break
-        return url
+        # Build: wasbs://<container>@<host>
+        st = self.storage_options
+        service = "blob"
+        container = st.get("container")
+        if not container:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "Container name is required to build the WASB URL. "
+                "Set storage_options['container'] or use datastore profile with container specified."
+            )
+        # Prefer host from connection string; else synthesize from account_name
+        host = None
+        account_name = st.get("account_name")
+        connection_string = st.get("connection_string")
+        if connection_string:
+            with contextlib.suppress(Exception):
+                primary_url, _, _ = parse_connection_str(
+                    connection_string, credential=None, service=service
+                )
+                if primary_url.startswith("http://"):
+                    primary_url = primary_url[len("http://") :]
+                if primary_url.startswith("https://"):
+                    primary_url = primary_url[len("https://") :]
+                # Remove any path components from the host
+                host = primary_url.split("/")[0].rstrip("/")
+        if not host and account_name:
+            host = f"{account_name}.{service}.core.windows.net"
+        # For wasbs:// URLs where endpoint is already the host
+        if not host and self.kind in ["wasbs", "wasb"] and hasattr(self, "endpoint"):
+            host = getattr(self, "endpoint", None)
+        if not host:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "account_name is required (or provide a connection_string) to build the WASB URL."
+            )
+        return f"wasbs://{container}@{host}"

mlrun 1.10.0rc18__py3-none-any.whl → 1.11.0rc16__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc18py3-none-any.whl → 1.11.0rc16py3-none-any.whl