PyPI - databricks-sdk - Versions diffs - 0.67.0__py3-none-any.whl → 0.69.0__py3-none-any.whl - Mend

databricks-sdk 0.67.0py3-none-any.whl → 0.69.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of databricks-sdk might be problematic. Click here for more details.

Files changed (49) hide show

databricks/sdk/__init__.py +14 -10
databricks/sdk/_base_client.py +4 -1
databricks/sdk/common/lro.py +17 -0
databricks/sdk/common/types/__init__.py +0 -0
databricks/sdk/common/types/fieldmask.py +39 -0
databricks/sdk/config.py +62 -14
databricks/sdk/credentials_provider.py +61 -12
databricks/sdk/dbutils.py +5 -1
databricks/sdk/errors/parser.py +8 -3
databricks/sdk/mixins/files.py +1156 -111
databricks/sdk/mixins/files_utils.py +293 -0
databricks/sdk/oidc_token_supplier.py +80 -0
databricks/sdk/retries.py +102 -2
databricks/sdk/service/_internal.py +93 -1
databricks/sdk/service/agentbricks.py +1 -1
databricks/sdk/service/apps.py +264 -1
databricks/sdk/service/billing.py +2 -3
databricks/sdk/service/catalog.py +1026 -540
databricks/sdk/service/cleanrooms.py +3 -3
databricks/sdk/service/compute.py +21 -33
databricks/sdk/service/dashboards.py +7 -3
databricks/sdk/service/database.py +3 -2
databricks/sdk/service/dataquality.py +1145 -0
databricks/sdk/service/files.py +2 -1
databricks/sdk/service/iam.py +2 -1
databricks/sdk/service/iamv2.py +1 -1
databricks/sdk/service/jobs.py +6 -9
databricks/sdk/service/marketplace.py +3 -1
databricks/sdk/service/ml.py +3 -1
databricks/sdk/service/oauth2.py +1 -1
databricks/sdk/service/pipelines.py +5 -6
databricks/sdk/service/provisioning.py +544 -655
databricks/sdk/service/qualitymonitorv2.py +1 -1
databricks/sdk/service/serving.py +3 -1
databricks/sdk/service/settings.py +5 -2
databricks/sdk/service/settingsv2.py +1 -1
databricks/sdk/service/sharing.py +12 -3
databricks/sdk/service/sql.py +305 -70
databricks/sdk/service/tags.py +1 -1
databricks/sdk/service/vectorsearch.py +3 -1
databricks/sdk/service/workspace.py +70 -17
databricks/sdk/version.py +1 -1
{databricks_sdk-0.67.0.dist-info → databricks_sdk-0.69.0.dist-info}/METADATA +4 -2
databricks_sdk-0.69.0.dist-info/RECORD +84 -0
databricks_sdk-0.67.0.dist-info/RECORD +0 -79
{databricks_sdk-0.67.0.dist-info → databricks_sdk-0.69.0.dist-info}/WHEEL +0 -0
{databricks_sdk-0.67.0.dist-info → databricks_sdk-0.69.0.dist-info}/licenses/LICENSE +0 -0
{databricks_sdk-0.67.0.dist-info → databricks_sdk-0.69.0.dist-info}/licenses/NOTICE +0 -0
{databricks_sdk-0.67.0.dist-info → databricks_sdk-0.69.0.dist-info}/top_level.txt +0 -0

databricks/sdk/__init__.py CHANGED Viewed

@@ -21,6 +21,7 @@ from databricks.sdk.service import cleanrooms as pkg_cleanrooms
 from databricks.sdk.service import compute as pkg_compute
 from databricks.sdk.service import dashboards as pkg_dashboards
 from databricks.sdk.service import database as pkg_database
+from databricks.sdk.service import dataquality as pkg_dataquality
 from databricks.sdk.service import files as pkg_files
 from databricks.sdk.service import iam as pkg_iam
 from databricks.sdk.service import iamv2 as pkg_iamv2
@@ -79,6 +80,7 @@ from databricks.sdk.service.compute import (ClusterPoliciesAPI, ClustersAPI,
 from databricks.sdk.service.dashboards import (GenieAPI, LakeviewAPI,
                                                LakeviewEmbeddedAPI)
 from databricks.sdk.service.database import DatabaseAPI
+from databricks.sdk.service.dataquality import DataQualityAPI
 from databricks.sdk.service.files import DbfsAPI, FilesAPI
 from databricks.sdk.service.iam import (AccessControlAPI,
                                         AccountAccessControlAPI,
@@ -179,11 +181,7 @@ def _make_dbutils(config: client.Config):
 def _make_files_client(apiClient: client.ApiClient, config: client.Config):
-    if config.enable_experimental_files_api_client:
-        _LOG.info("Experimental Files API client is enabled")
-        return FilesExt(apiClient, config)
-    else:
-        return FilesAPI(apiClient)
+    return FilesExt(apiClient, config)
 class WorkspaceClient:
@@ -282,6 +280,7 @@ class WorkspaceClient:
         self._current_user = pkg_iam.CurrentUserAPI(self._api_client)
         self._dashboard_widgets = pkg_sql.DashboardWidgetsAPI(self._api_client)
         self._dashboards = pkg_sql.DashboardsAPI(self._api_client)
+        self._data_quality = pkg_dataquality.DataQualityAPI(self._api_client)
         self._data_sources = pkg_sql.DataSourcesAPI(self._api_client)
         self._database = pkg_database.DatabaseAPI(self._api_client)
         self._dbfs = DbfsExt(self._api_client)
@@ -540,6 +539,11 @@ class WorkspaceClient:
         """In general, there is little need to modify dashboards using the API."""
         return self._dashboards
+    @property
+    def data_quality(self) -> pkg_dataquality.DataQualityAPI:
+        """Manage the data quality of Unity Catalog objects (currently support `schema` and `table`)."""
+        return self._data_quality
     @property
     def data_sources(self) -> pkg_sql.DataSourcesAPI:
         """This API is provided to assist you in making new query objects."""
@@ -595,11 +599,6 @@ class WorkspaceClient:
         """A feature store is a centralized repository that enables data scientists to find and share features."""
         return self._feature_store
-    @property
-    def files(self) -> pkg_files.FilesAPI:
-        """The Files API is a standard HTTP API that allows you to read, write, list, and delete files and directories by referring to their URI."""
-        return self._files
     @property
     def functions(self) -> pkg_catalog.FunctionsAPI:
         """Functions implement User-Defined Functions (UDFs) in Unity Catalog."""
@@ -1005,6 +1004,11 @@ class WorkspaceClient:
         """User identities recognized by Databricks and represented by email addresses."""
         return self._users
+    @property
+    def files(self) -> FilesExt:
+        """The Files API is a standard HTTP API that allows you to read, write, list, and delete files and directories by referring to their URI."""
+        return self._files
     def get_workspace_id(self) -> int:
         """Get the workspace ID of the workspace that this client is connected to."""
         response = self._api_client.do("GET", "/api/2.0/preview/scim/v2/Me", response_headers=["X-Databricks-Org-Id"])

databricks/sdk/_base_client.py CHANGED Viewed

@@ -99,7 +99,10 @@ class _BaseClient:
         # Default to 60 seconds
         self._http_timeout_seconds = http_timeout_seconds or 60
-        self._error_parser = _Parser(extra_error_customizers=extra_error_customizers)
+        self._error_parser = _Parser(
+            extra_error_customizers=extra_error_customizers,
+            debug_headers=debug_headers,
+        )
     def _authenticate(self, r: requests.PreparedRequest) -> requests.PreparedRequest:
         if self._header_factory:

databricks/sdk/common/lro.py ADDED Viewed

@@ -0,0 +1,17 @@
+from datetime import timedelta
+from typing import Optional
+class LroOptions:
+    """LroOptions is the options for the Long Running Operations.
+    DO NOT USE THIS OPTION. This option is still under development
+    and can be updated in the future without notice.
+    """
+    def __init__(self, *, timeout: Optional[timedelta] = None):
+        """
+        Args:
+            timeout: The timeout for the Long Running Operations.
+                    If not set, the default timeout is 20 minutes.
+        """
+        self.timeout = timeout or timedelta(minutes=20)

databricks/sdk/common/types/__init__.py ADDED Viewed

File without changes

databricks/sdk/common/types/fieldmask.py ADDED Viewed

@@ -0,0 +1,39 @@
+class FieldMask(object):
+    """Class for FieldMask message type."""
+    # This is based on the base implementation from protobuf.
+    # https://pigweed.googlesource.com/third_party/github/protocolbuffers/protobuf/+/HEAD/python/google/protobuf/internal/field_mask.py
+    # The original implementation only works with proto generated classes.
+    # Since our classes are not generated from proto files, we need to implement it manually.
+    def __init__(self, field_mask=None):
+        """Initializes the FieldMask."""
+        if field_mask:
+            self.paths = field_mask
+    def ToJsonString(self) -> str:
+        """Converts FieldMask to string."""
+        return ",".join(self.paths)
+    def FromJsonString(self, value: str) -> None:
+        """Converts string to FieldMask."""
+        if not isinstance(value, str):
+            raise ValueError("FieldMask JSON value not a string: {!r}".format(value))
+        if value:
+            self.paths = value.split(",")
+        else:
+            self.paths = []
+    def __eq__(self, other) -> bool:
+        """Check equality based on paths."""
+        if not isinstance(other, FieldMask):
+            return False
+        return self.paths == other.paths
+    def __hash__(self) -> int:
+        """Hash based on paths tuple."""
+        return hash(tuple(self.paths))
+    def __repr__(self) -> str:
+        """String representation for debugging."""
+        return f"FieldMask(paths={self.paths})"

databricks/sdk/config.py CHANGED Viewed

@@ -6,7 +6,7 @@ import os
 import pathlib
 import sys
 import urllib.parse
-from typing import Dict, Iterable, Optional
+from typing import Dict, Iterable, List, Optional
 import requests
@@ -110,18 +110,27 @@ class Config:
     disable_async_token_refresh: bool = ConfigAttribute(env="DATABRICKS_DISABLE_ASYNC_TOKEN_REFRESH")
-    enable_experimental_files_api_client: bool = ConfigAttribute(env="DATABRICKS_ENABLE_EXPERIMENTAL_FILES_API_CLIENT")
-    files_api_client_download_max_total_recovers = None
-    files_api_client_download_max_total_recovers_without_progressing = 1
+    disable_experimental_files_api_client: bool = ConfigAttribute(
+        env="DATABRICKS_DISABLE_EXPERIMENTAL_FILES_API_CLIENT"
+    )
+    files_ext_client_download_streaming_chunk_size: int = 2 * 1024 * 1024  # 2 MiB
+    # When downloading a file, the maximum number of attempts to retry downloading the whole file. Default is no limit.
+    files_ext_client_download_max_total_recovers: Optional[int] = None
-    # File multipart upload parameters
+    # When downloading a file, the maximum number of attempts to retry downloading from the same offset without progressing.
+    # This is to avoid infinite retrying when the download is not making any progress. Default is 1.
+    files_ext_client_download_max_total_recovers_without_progressing = 1
+    # File multipart upload/download parameters
     # ----------------------
     # Minimal input stream size (bytes) to use multipart / resumable uploads.
     # For small files it's more efficient to make one single-shot upload request.
     # When uploading a file, SDK will initially buffer this many bytes from input stream.
     # This parameter can be less or bigger than multipart_upload_chunk_size.
-    multipart_upload_min_stream_size: int = 5 * 1024 * 1024
+    files_ext_multipart_upload_min_stream_size: int = 50 * 1024 * 1024
     # Maximum number of presigned URLs that can be requested at a time.
     #
@@ -131,23 +140,59 @@ class Config:
     # the stream back. In case of a non-seekable stream we cannot rewind, so we'll abort
     # the upload. To reduce the chance of this, we're requesting presigned URLs one by one
     # and using them immediately.
-    multipart_upload_batch_url_count: int = 1
+    files_ext_multipart_upload_batch_url_count: int = 1
-    # Size of the chunk to use for multipart uploads.
+    # Size of the chunk to use for multipart uploads & downloads.
     #
     # The smaller chunk is, the less chance for network errors (or URL get expired),
     # but the more requests we'll make.
     # For AWS, minimum is 5Mb: https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html
     # For GCP, minimum is 256 KiB (and also recommended multiple is 256 KiB)
     # boto uses 8Mb: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/customizations/s3.html#boto3.s3.transfer.TransferConfig
-    multipart_upload_chunk_size: int = 10 * 1024 * 1024
-    # use maximum duration of 1 hour
-    multipart_upload_url_expiration_duration: datetime.timedelta = datetime.timedelta(hours=1)
+    files_ext_multipart_upload_default_part_size: int = 10 * 1024 * 1024  # 10 MiB
+    # List of multipart upload part sizes that can be automatically selected
+    files_ext_multipart_upload_part_size_options: List[int] = [
+        10 * 1024 * 1024,  # 10 MiB
+        20 * 1024 * 1024,  # 20 MiB
+        50 * 1024 * 1024,  # 50 MiB
+        100 * 1024 * 1024,  # 100 MiB
+        200 * 1024 * 1024,  # 200 MiB
+        500 * 1024 * 1024,  # 500 MiB
+        1 * 1024 * 1024 * 1024,  # 1 GiB
+        2 * 1024 * 1024 * 1024,  # 2 GiB
+        4 * 1024 * 1024 * 1024,  # 4 GiB
+    ]
+    # Maximum size of a single part in multipart upload.
+    # For AWS, maximum is 5 GiB: https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html
+    # For Azure, maximum is 4 GiB: https://learn.microsoft.com/en-us/rest/api/storageservices/put-block
+    # For CloudFlare R2, maximum is 5 GiB: https://developers.cloudflare.com/r2/objects/multipart-objects/
+    files_ext_multipart_upload_max_part_size: int = 4 * 1024 * 1024 * 1024  # 4 GiB
+    # Default parallel multipart upload concurrency. Set to 10 because of the experiment results show that it
+    # gives good performance result.
+    files_ext_multipart_upload_default_parallelism: int = 10
+    # The expiration duration for presigned URLs used in multipart uploads and downloads.
+    # The client will request new presigned URLs if the previous one is expired. The duration should be long enough
+    # to complete the upload or download of a single part.
+    files_ext_multipart_upload_url_expiration_duration: datetime.timedelta = datetime.timedelta(hours=1)
+    files_ext_presigned_download_url_expiration_duration: datetime.timedelta = datetime.timedelta(hours=1)
+    # When downloading a file in parallel, how many worker threads to use.
+    files_ext_parallel_download_default_parallelism: int = 10
+    # When downloading a file, if the file size is smaller than this threshold,
+    # We'll use a single-threaded download even if the parallel download is enabled.
+    files_ext_parallel_download_min_file_size: int = 50 * 1024 * 1024  # 50 MiB
+    # Default chunk size to use when downloading a file in parallel. Not effective for single threaded download.
+    files_ext_parallel_download_default_part_size: int = 10 * 1024 * 1024  # 10 MiB
     # This is not a "wall time" cutoff for the whole upload request,
     # but a maximum time between consecutive data reception events (even 1 byte) from the server
-    multipart_upload_single_chunk_upload_timeout_seconds: float = 60
+    files_ext_network_transfer_inactivity_timeout_seconds: float = 60
     # Cap on the number of custom retries during incremental uploads:
     # 1) multipart: upload part URL is expired, so new upload URLs must be requested to continue upload
@@ -155,7 +200,10 @@ class Config:
     # retrieved to continue the upload.
     # In these two cases standard SDK retries (which are capped by the `retry_timeout_seconds` option) are not used.
     # Note that retry counter is reset when upload is successfully resumed.
-    multipart_upload_max_retries = 3
+    files_ext_multipart_upload_max_retries = 3
+    # Cap on the number of custom retries during parallel downloads.
+    files_ext_parallel_download_max_retries = 3
     def __init__(
         self,

databricks/sdk/credentials_provider.py CHANGED Viewed

@@ -12,7 +12,7 @@ import sys
 import threading
 import time
 from datetime import datetime
-from typing import Callable, Dict, List, Optional, Tuple, Union
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 import google.auth  # type: ignore
 import requests
@@ -89,7 +89,6 @@ def credentials_strategy(name: str, require: List[str]):
         @functools.wraps(func)
         def wrapper(cfg: "Config") -> Optional[CredentialsProvider]:
             for attr in require:
-                getattr(cfg, attr)
                 if not getattr(cfg, attr):
                     return None
             return func(cfg)
@@ -103,7 +102,12 @@ def credentials_strategy(name: str, require: List[str]):
 def oauth_credentials_strategy(name: str, require: List[str]):
     """Given the function that receives a Config and returns an OauthHeaderFactory,
     create an OauthCredentialsProvider with a given name and required configuration
-    attribute names to be present for this function to be called."""
+    attribute names to be present for this function to be called.
+    Args:
+        name: The name of the authentication strategy
+        require: List of config attributes that must be present
+    """
     def inner(
         func: Callable[["Config"], OAuthCredentialsProvider],
@@ -356,33 +360,47 @@ def oidc_credentials_provider(cfg, id_token_source: oidc.IdTokenSource) -> Optio
     return OAuthCredentialsProvider(refreshed_headers, token)
-@oauth_credentials_strategy("github-oidc", ["host", "client_id"])
-def github_oidc(cfg: "Config") -> Optional[CredentialsProvider]:
+def _oidc_credentials_provider(
+    cfg: "Config", supplier_factory: Callable[[], Any], provider_name: str
+) -> Optional[CredentialsProvider]:
     """
-    DatabricksWIFCredentials uses a Token Supplier to get a JWT Token and exchanges
-    it for a Databricks Token.
+    Generic OIDC credentials provider that works with any OIDC token supplier.
+    Args:
+        cfg: Databricks configuration
+        supplier_factory: Callable that returns an OIDC token supplier instance
+        provider_name: Human-readable name (e.g., "GitHub OIDC", "Azure DevOps OIDC")
-    Supported suppliers:
-    - GitHub OIDC
+    Returns:
+        OAuthCredentialsProvider if successful, None if supplier unavailable or token retrieval fails
     """
-    supplier = oidc_token_supplier.GitHubOIDCTokenSupplier()
+    # Try to create the supplier
+    try:
+        supplier = supplier_factory()
+    except Exception as e:
+        logger.debug(f"{provider_name}: {str(e)}")
+        return None
+    # Determine the audience for token exchange
     audience = cfg.token_audience
     if audience is None and cfg.is_account_client:
         audience = cfg.account_id
     if audience is None and not cfg.is_account_client:
         audience = cfg.oidc_endpoints.token_endpoint
-    # Try to get an idToken. If no supplier returns a token, we cannot use this authentication mode.
+    # Try to get an OIDC token. If no supplier returns a token, we cannot use this authentication mode.
     id_token = supplier.get_oidc_token(audience)
     if not id_token:
+        logger.debug(f"{provider_name}: no token available, skipping authentication method")
         return None
+    logger.info(f"Configured {provider_name} authentication")
     def token_source_for(audience: str) -> oauth.TokenSource:
         id_token = supplier.get_oidc_token(audience)
         if not id_token:
             # Should not happen, since we checked it above.
-            raise Exception("Cannot get OIDC token")
+            raise Exception(f"Cannot get {provider_name} token")
         return oauth.ClientCredentials(
             client_id=cfg.client_id,
@@ -408,6 +426,36 @@ def github_oidc(cfg: "Config") -> Optional[CredentialsProvider]:
     return OAuthCredentialsProvider(refreshed_headers, token)
+@oauth_credentials_strategy("github-oidc", ["host", "client_id"])
+def github_oidc(cfg: "Config") -> Optional[CredentialsProvider]:
+    """
+    GitHub OIDC authentication uses a Token Supplier to get a JWT Token and exchanges
+    it for a Databricks Token.
+    Supported in GitHub Actions with OIDC service connections.
+    """
+    return _oidc_credentials_provider(
+        cfg=cfg,
+        supplier_factory=lambda: oidc_token_supplier.GitHubOIDCTokenSupplier(),
+        provider_name="GitHub OIDC",
+    )
+@oauth_credentials_strategy("azure-devops-oidc", ["host", "client_id"])
+def azure_devops_oidc(cfg: "Config") -> Optional[CredentialsProvider]:
+    """
+    Azure DevOps OIDC authentication uses a Token Supplier to get a JWT Token
+    and exchanges it for a Databricks Token.
+    Supported in Azure DevOps pipelines with OIDC service connections.
+    """
+    return _oidc_credentials_provider(
+        cfg=cfg,
+        supplier_factory=lambda: oidc_token_supplier.AzureDevOpsOIDCTokenSupplier(),
+        provider_name="Azure DevOps OIDC",
+    )
 @oauth_credentials_strategy("github-oidc-azure", ["host", "azure_client_id"])
 def github_oidc_azure(cfg: "Config") -> Optional[CredentialsProvider]:
     if "ACTIONS_ID_TOKEN_REQUEST_TOKEN" not in os.environ:
@@ -1019,6 +1067,7 @@ class DefaultCredentials:
             azure_service_principal,
             github_oidc_azure,
             azure_cli,
+            azure_devops_oidc,
             external_browser,
             databricks_cli,
             runtime_native_auth,

databricks/sdk/dbutils.py CHANGED Viewed

@@ -210,7 +210,11 @@ class _JobsUtil:
 class RemoteDbUtils:
     def __init__(self, config: "Config" = None):
-        self._config = Config() if not config else config
+        # Create a shallow copy of the config to allow the use of a custom
+        # user-agent while avoiding modifying the original config.
+        self._config = Config() if not config else config.copy()
+        self._config.with_user_agent_extra("dbutils", "remote")
         self._client = ApiClient(self._config)
         self._clusters = compute_ext.ClustersExt(self._client)
         self._commands = compute.CommandExecutionAPI(self._client)

databricks/sdk/errors/parser.py CHANGED Viewed

@@ -31,12 +31,15 @@ _error_customizers = [
 ]
-def _unknown_error(response: requests.Response) -> str:
+def _unknown_error(response: requests.Response, debug_headers: bool = False) -> str:
     """A standard error message that can be shown when an API response cannot be parsed.
     This error message includes a link to the issue tracker for the SDK for users to report the issue to us.
+    :param response: The response object from the API request.
+    :param debug_headers: Whether to include headers in the request log. Defaults to False to defensively handle cases where request headers might contain sensitive data (e.g. tokens).
     """
-    request_log = RoundTrip(response, debug_headers=True, debug_truncate_bytes=10 * 1024).generate()
+    request_log = RoundTrip(response, debug_headers=debug_headers, debug_truncate_bytes=10 * 1024).generate()
     return (
         "This is likely a bug in the Databricks SDK for Python or the underlying "
         "API. Please report this issue with the following debugging information to the SDK issue tracker at "
@@ -56,11 +59,13 @@ class _Parser:
         self,
         extra_error_parsers: List[_ErrorDeserializer] = [],
         extra_error_customizers: List[_ErrorCustomizer] = [],
+        debug_headers: bool = False,
     ):
         self._error_parsers = _error_deserializers + (extra_error_parsers if extra_error_parsers is not None else [])
         self._error_customizers = _error_customizers + (
             extra_error_customizers if extra_error_customizers is not None else []
         )
+        self._debug_headers = debug_headers
     def get_api_error(self, response: requests.Response) -> Optional[DatabricksError]:
         """
@@ -84,7 +89,7 @@ class _Parser:
                     )
             return _error_mapper(
                 response,
-                {"message": "unable to parse response. " + _unknown_error(response)},
+                {"message": "unable to parse response. " + _unknown_error(response, self._debug_headers)},
             )
         # Private link failures happen via a redirect to the login page. From a requests-perspective, the request

databricks-sdk 0.67.0__py3-none-any.whl → 0.69.0__py3-none-any.whl

Potentially problematic release.

databricks-sdk 0.67.0py3-none-any.whl → 0.69.0py3-none-any.whl