PyPI - databricks-sdk - Versions diffs - 0.40.0__py3-none-any.whl → 0.42.0__py3-none-any.whl - Mend

databricks-sdk 0.40.0py3-none-any.whl → 0.42.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of databricks-sdk might be problematic. Click here for more details.

Files changed (30) hide show

databricks/sdk/__init__.py +273 -239
databricks/sdk/_base_client.py +36 -24
databricks/sdk/config.py +5 -0
databricks/sdk/credentials_provider.py +35 -20
databricks/sdk/data_plane.py +1 -1
databricks/sdk/mixins/files.py +184 -1
databricks/sdk/mixins/open_ai_client.py +55 -1
databricks/sdk/retries.py +5 -1
databricks/sdk/service/apps.py +12 -4
databricks/sdk/service/billing.py +348 -0
databricks/sdk/service/catalog.py +16 -62
databricks/sdk/service/cleanrooms.py +73 -2
databricks/sdk/service/compute.py +40 -0
databricks/sdk/service/dashboards.py +12 -4
databricks/sdk/service/files.py +6 -3
databricks/sdk/service/iam.py +158 -0
databricks/sdk/service/jobs.py +253 -17
databricks/sdk/service/oauth2.py +94 -50
databricks/sdk/service/pipelines.py +89 -12
databricks/sdk/service/serving.py +424 -222
databricks/sdk/service/settings.py +206 -0
databricks/sdk/service/sharing.py +51 -54
databricks/sdk/useragent.py +54 -0
databricks/sdk/version.py +1 -1
{databricks_sdk-0.40.0.dist-info → databricks_sdk-0.42.0.dist-info}/METADATA +26 -26
{databricks_sdk-0.40.0.dist-info → databricks_sdk-0.42.0.dist-info}/RECORD +30 -30
{databricks_sdk-0.40.0.dist-info → databricks_sdk-0.42.0.dist-info}/WHEEL +1 -1
{databricks_sdk-0.40.0.dist-info → databricks_sdk-0.42.0.dist-info}/LICENSE +0 -0
{databricks_sdk-0.40.0.dist-info → databricks_sdk-0.42.0.dist-info}/NOTICE +0 -0
{databricks_sdk-0.40.0.dist-info → databricks_sdk-0.42.0.dist-info}/top_level.txt +0 -0

databricks/sdk/_base_client.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import io
 import logging
 import urllib.parse
+from abc import ABC, abstractmethod
 from datetime import timedelta
 from types import TracebackType
 from typing import (Any, BinaryIO, Callable, Dict, Iterable, Iterator, List,
@@ -158,16 +159,29 @@ class _BaseClient:
         if isinstance(data, (str, bytes)):
             data = io.BytesIO(data.encode('utf-8') if isinstance(data, str) else data)
-        # Only retry if the request is not a stream or if the stream is seekable and
-        # we can rewind it. This is necessary to avoid bugs where the retry doesn't
-        # re-read already read data from the body.
-        if data is not None and not self._is_seekable_stream(data):
-            logger.debug(f"Retry disabled for non-seekable stream: type={type(data)}")
-            call = self._perform
-        else:
+        if not data:
+            # The request is not a stream.
             call = retried(timeout=timedelta(seconds=self._retry_timeout_seconds),
                            is_retryable=self._is_retryable,
                            clock=self._clock)(self._perform)
+        elif self._is_seekable_stream(data):
+            # Keep track of the initial position of the stream so that we can rewind to it
+            # if we need to retry the request.
+            initial_data_position = data.tell()
+            def rewind():
+                logger.debug(f"Rewinding input data to offset {initial_data_position} before retry")
+                data.seek(initial_data_position)
+            call = retried(timeout=timedelta(seconds=self._retry_timeout_seconds),
+                           is_retryable=self._is_retryable,
+                           clock=self._clock,
+                           before_retry=rewind)(self._perform)
+        else:
+            # Do not retry if the stream is not seekable. This is necessary to avoid bugs
+            # where the retry doesn't re-read already read data from the stream.
+            logger.debug(f"Retry disabled for non-seekable stream: type={type(data)}")
+            call = self._perform
         response = call(method,
                         url,
@@ -248,12 +262,6 @@ class _BaseClient:
                  files=None,
                  data=None,
                  auth: Callable[[requests.PreparedRequest], requests.PreparedRequest] = None):
-        # Keep track of the initial position of the stream so that we can rewind it if
-        # we need to retry the request.
-        initial_data_position = 0
-        if self._is_seekable_stream(data):
-            initial_data_position = data.tell()
         response = self._session.request(method,
                                          url,
                                          params=self._fix_query_string(query),
@@ -265,16 +273,8 @@ class _BaseClient:
                                          stream=raw,
                                          timeout=self._http_timeout_seconds)
         self._record_request_log(response, raw=raw or data is not None or files is not None)
         error = self._error_parser.get_api_error(response)
         if error is not None:
-            # If the request body is a seekable stream, rewind it so that it is ready
-            # to be read again in case of a retry.
-            #
-            # TODO: This should be moved into a "before-retry" hook to avoid one
-            # unnecessary seek on the last failed retry before aborting.
-            if self._is_seekable_stream(data):
-                data.seek(initial_data_position)
             raise error from None
         return response
@@ -285,8 +285,20 @@ class _BaseClient:
         logger.debug(RoundTrip(response, self._debug_headers, self._debug_truncate_bytes, raw).generate())
+class _RawResponse(ABC):
+    @abstractmethod
+    # follows Response signature: https://github.com/psf/requests/blob/main/src/requests/models.py#L799
+    def iter_content(self, chunk_size: int = 1, decode_unicode: bool = False):
+        pass
+    @abstractmethod
+    def close(self):
+        pass
 class _StreamingResponse(BinaryIO):
-    _response: requests.Response
+    _response: _RawResponse
     _buffer: bytes
     _content: Union[Iterator[bytes], None]
     _chunk_size: Union[int, None]
@@ -298,7 +310,7 @@ class _StreamingResponse(BinaryIO):
     def flush(self) -> int:
         pass
-    def __init__(self, response: requests.Response, chunk_size: Union[int, None] = None):
+    def __init__(self, response: _RawResponse, chunk_size: Union[int, None] = None):
         self._response = response
         self._buffer = b''
         self._content = None
@@ -308,7 +320,7 @@ class _StreamingResponse(BinaryIO):
         if self._closed:
             raise ValueError("I/O operation on closed file")
         if not self._content:
-            self._content = self._response.iter_content(chunk_size=self._chunk_size)
+            self._content = self._response.iter_content(chunk_size=self._chunk_size, decode_unicode=False)
     def __enter__(self) -> BinaryIO:
         self._open()

databricks/sdk/config.py CHANGED Viewed

@@ -92,6 +92,11 @@ class Config:
     max_connections_per_pool: int = ConfigAttribute()
     databricks_environment: Optional[DatabricksEnvironment] = None
+    enable_experimental_files_api_client: bool = ConfigAttribute(
+        env='DATABRICKS_ENABLE_EXPERIMENTAL_FILES_API_CLIENT')
+    files_api_client_download_max_total_recovers = None
+    files_api_client_download_max_total_recovers_without_progressing = 1
     def __init__(
             self,
             *,

databricks/sdk/credentials_provider.py CHANGED Viewed

@@ -167,6 +167,7 @@ def oauth_service_principal(cfg: 'Config') -> Optional[CredentialsProvider]:
     oidc = cfg.oidc_endpoints
     if oidc is None:
         return None
     token_source = ClientCredentials(client_id=cfg.client_id,
                                      client_secret=cfg.client_secret,
                                      token_url=oidc.token_endpoint,
@@ -187,6 +188,7 @@ def oauth_service_principal(cfg: 'Config') -> Optional[CredentialsProvider]:
 def external_browser(cfg: 'Config') -> Optional[CredentialsProvider]:
     if cfg.auth_type != 'external-browser':
         return None
     client_id, client_secret = None, None
     if cfg.client_id:
         client_id = cfg.client_id
@@ -194,12 +196,11 @@ def external_browser(cfg: 'Config') -> Optional[CredentialsProvider]:
     elif cfg.azure_client_id:
         client_id = cfg.azure_client
         client_secret = cfg.azure_client_secret
     if not client_id:
         client_id = 'databricks-cli'
-    # Load cached credentials from disk if they exist.
-    # Note that these are local to the Python SDK and not reused by other SDKs.
+    # Load cached credentials from disk if they exist. Note that these are
+    # local to the Python SDK and not reused by other SDKs.
     oidc_endpoints = cfg.oidc_endpoints
     redirect_url = 'http://localhost:8020'
     token_cache = TokenCache(host=cfg.host,
@@ -209,17 +210,25 @@ def external_browser(cfg: 'Config') -> Optional[CredentialsProvider]:
                              redirect_url=redirect_url)
     credentials = token_cache.load()
     if credentials:
-        # Force a refresh in case the loaded credentials are expired.
-        credentials.token()
-    else:
-        oauth_client = OAuthClient(oidc_endpoints=oidc_endpoints,
-                                   client_id=client_id,
-                                   redirect_url=redirect_url,
-                                   client_secret=client_secret)
-        consent = oauth_client.initiate_consent()
-        if not consent:
-            return None
-        credentials = consent.launch_external_browser()
+        try:
+            # Pro-actively refresh the loaded credentials. This is done
+            # to detect if the token is expired and needs to be refreshed
+            # by going through the OAuth login flow.
+            credentials.token()
+            return credentials(cfg)
+        # TODO: We should ideally use more specific exceptions.
+        except Exception as e:
+            logger.warning(f'Failed to refresh cached token: {e}. Initiating new OAuth login flow')
+    oauth_client = OAuthClient(oidc_endpoints=oidc_endpoints,
+                               client_id=client_id,
+                               redirect_url=redirect_url,
+                               client_secret=client_secret)
+    consent = oauth_client.initiate_consent()
+    if not consent:
+        return None
+    credentials = consent.launch_external_browser()
     token_cache.save(credentials)
     return credentials(cfg)
@@ -667,12 +676,18 @@ class MetadataServiceTokenSource(Refreshable):
         self.host = cfg.host
     def refresh(self) -> Token:
-        resp = requests.get(self.url,
-                            timeout=self._metadata_service_timeout,
-                            headers={
-                                self.METADATA_SERVICE_VERSION_HEADER: self.METADATA_SERVICE_VERSION,
-                                self.METADATA_SERVICE_HOST_HEADER: self.host
-                            })
+        resp = requests.get(
+            self.url,
+            timeout=self._metadata_service_timeout,
+            headers={
+                self.METADATA_SERVICE_VERSION_HEADER: self.METADATA_SERVICE_VERSION,
+                self.METADATA_SERVICE_HOST_HEADER: self.host
+            },
+            proxies={
+                # Explicitly exclude localhost from being proxied. This is necessary
+                # for Metadata URLs which typically point to localhost.
+                "no_proxy": "localhost,127.0.0.1"
+            })
         json_resp: dict[str, Union[str, float]] = resp.json()
         access_token = json_resp.get("access_token", None)
         if access_token is None:

databricks/sdk/data_plane.py CHANGED Viewed

@@ -3,7 +3,6 @@ from dataclasses import dataclass
 from typing import Callable, List
 from databricks.sdk.oauth import Token
-from databricks.sdk.service.oauth2 import DataPlaneInfo
 @dataclass
@@ -19,6 +18,7 @@ class DataPlaneDetails:
 class DataPlaneService:
     """Helper class to fetch and manage DataPlane details."""
+    from .service.serving import DataPlaneInfo
     def __init__(self):
         self._data_plane_info = {}

databricks/sdk/mixins/files.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from __future__ import annotations
 import base64
+import logging
 import os
 import pathlib
 import platform
@@ -8,19 +9,27 @@ import shutil
 import sys
 from abc import ABC, abstractmethod
 from collections import deque
+from collections.abc import Iterator
 from io import BytesIO
 from types import TracebackType
 from typing import (TYPE_CHECKING, AnyStr, BinaryIO, Generator, Iterable,
-                    Iterator, Type, Union)
+                    Optional, Type, Union)
 from urllib import parse
+from requests import RequestException
+from .._base_client import _RawResponse, _StreamingResponse
 from .._property import _cached_property
 from ..errors import NotFound
 from ..service import files
+from ..service._internal import _escape_multi_segment_path_parameter
+from ..service.files import DownloadResponse
 if TYPE_CHECKING:
     from _typeshed import Self
+_LOG = logging.getLogger(__name__)
 class _DbfsIO(BinaryIO):
     MAX_CHUNK_SIZE = 1024 * 1024
@@ -636,3 +645,177 @@ class DbfsExt(files.DbfsAPI):
         if p.is_dir and not recursive:
             raise IOError('deleting directories requires recursive flag')
         p.delete(recursive=recursive)
+class FilesExt(files.FilesAPI):
+    __doc__ = files.FilesAPI.__doc__
+    def __init__(self, api_client, config: Config):
+        super().__init__(api_client)
+        self._config = config.copy()
+    def download(self, file_path: str) -> DownloadResponse:
+        """Download a file.
+        Downloads a file of any size. The file contents are the response body.
+        This is a standard HTTP file download, not a JSON RPC.
+        It is strongly recommended, for fault tolerance reasons,
+        to iteratively consume from the stream with a maximum read(size)
+        defined instead of using indefinite-size reads.
+        :param file_path: str
+          The remote path of the file, e.g. /Volumes/path/to/your/file
+        :returns: :class:`DownloadResponse`
+        """
+        initial_response: DownloadResponse = self._download_raw_stream(file_path=file_path,
+                                                                       start_byte_offset=0,
+                                                                       if_unmodified_since_timestamp=None)
+        wrapped_response = self._wrap_stream(file_path, initial_response)
+        initial_response.contents._response = wrapped_response
+        return initial_response
+    def _download_raw_stream(self,
+                             file_path: str,
+                             start_byte_offset: int,
+                             if_unmodified_since_timestamp: Optional[str] = None) -> DownloadResponse:
+        headers = {'Accept': 'application/octet-stream', }
+        if start_byte_offset and not if_unmodified_since_timestamp:
+            raise Exception("if_unmodified_since_timestamp is required if start_byte_offset is specified")
+        if start_byte_offset:
+            headers['Range'] = f'bytes={start_byte_offset}-'
+        if if_unmodified_since_timestamp:
+            headers['If-Unmodified-Since'] = if_unmodified_since_timestamp
+        response_headers = ['content-length', 'content-type', 'last-modified', ]
+        res = self._api.do('GET',
+                           f'/api/2.0/fs/files{_escape_multi_segment_path_parameter(file_path)}',
+                           headers=headers,
+                           response_headers=response_headers,
+                           raw=True)
+        result = DownloadResponse.from_dict(res)
+        if not isinstance(result.contents, _StreamingResponse):
+            raise Exception("Internal error: response contents is of unexpected type: " +
+                            type(result.contents).__name__)
+        return result
+    def _wrap_stream(self, file_path: str, downloadResponse: DownloadResponse):
+        underlying_response = _ResilientIterator._extract_raw_response(downloadResponse)
+        return _ResilientResponse(self,
+                                  file_path,
+                                  downloadResponse.last_modified,
+                                  offset=0,
+                                  underlying_response=underlying_response)
+class _ResilientResponse(_RawResponse):
+    def __init__(self, api: FilesExt, file_path: str, file_last_modified: str, offset: int,
+                 underlying_response: _RawResponse):
+        self.api = api
+        self.file_path = file_path
+        self.underlying_response = underlying_response
+        self.offset = offset
+        self.file_last_modified = file_last_modified
+    def iter_content(self, chunk_size=1, decode_unicode=False):
+        if decode_unicode:
+            raise ValueError('Decode unicode is not supported')
+        iterator = self.underlying_response.iter_content(chunk_size=chunk_size, decode_unicode=False)
+        self.iterator = _ResilientIterator(iterator, self.file_path, self.file_last_modified, self.offset,
+                                           self.api, chunk_size)
+        return self.iterator
+    def close(self):
+        self.iterator.close()
+class _ResilientIterator(Iterator):
+    # This class tracks current offset (returned to the client code)
+    # and recovers from failures by requesting download from the current offset.
+    @staticmethod
+    def _extract_raw_response(download_response: DownloadResponse) -> _RawResponse:
+        streaming_response: _StreamingResponse = download_response.contents # this is an instance of _StreamingResponse
+        return streaming_response._response
+    def __init__(self, underlying_iterator, file_path: str, file_last_modified: str, offset: int,
+                 api: FilesExt, chunk_size: int):
+        self._underlying_iterator = underlying_iterator
+        self._api = api
+        self._file_path = file_path
+        # Absolute current offset (0-based), i.e. number of bytes from the beginning of the file
+        # that were so far returned to the caller code.
+        self._offset = offset
+        self._file_last_modified = file_last_modified
+        self._chunk_size = chunk_size
+        self._total_recovers_count: int = 0
+        self._recovers_without_progressing_count: int = 0
+        self._closed: bool = False
+    def _should_recover(self) -> bool:
+        if self._total_recovers_count == self._api._config.files_api_client_download_max_total_recovers:
+            _LOG.debug("Total recovers limit exceeded")
+            return False
+        if self._api._config.files_api_client_download_max_total_recovers_without_progressing is not None and self._recovers_without_progressing_count >= self._api._config.files_api_client_download_max_total_recovers_without_progressing:
+            _LOG.debug("No progression recovers limit exceeded")
+            return False
+        return True
+    def _recover(self) -> bool:
+        if not self._should_recover():
+            return False # recover suppressed, rethrow original exception
+        self._total_recovers_count += 1
+        self._recovers_without_progressing_count += 1
+        try:
+            self._underlying_iterator.close()
+            _LOG.debug("Trying to recover from offset " + str(self._offset))
+            # following call includes all the required network retries
+            downloadResponse = self._api._download_raw_stream(self._file_path, self._offset,
+                                                              self._file_last_modified)
+            underlying_response = _ResilientIterator._extract_raw_response(downloadResponse)
+            self._underlying_iterator = underlying_response.iter_content(chunk_size=self._chunk_size,
+                                                                         decode_unicode=False)
+            _LOG.debug("Recover succeeded")
+            return True
+        except:
+            return False # recover failed, rethrow original exception
+    def __next__(self):
+        if self._closed:
+            # following _BaseClient
+            raise ValueError("I/O operation on closed file")
+        while True:
+            try:
+                returned_bytes = next(self._underlying_iterator)
+                self._offset += len(returned_bytes)
+                self._recovers_without_progressing_count = 0
+                return returned_bytes
+            except StopIteration:
+                raise
+            # https://requests.readthedocs.io/en/latest/user/quickstart/#errors-and-exceptions
+            except RequestException:
+                if not self._recover():
+                    raise
+    def close(self):
+        self._underlying_iterator.close()
+        self._closed = True

databricks/sdk/mixins/open_ai_client.py CHANGED Viewed

@@ -1,4 +1,10 @@
-from databricks.sdk.service.serving import ServingEndpointsAPI
+import json as js
+from typing import Dict, Optional
+from requests import Response
+from databricks.sdk.service.serving import (ExternalFunctionRequestHttpMethod,
+                                            ServingEndpointsAPI)
 class ServingEndpointsExt(ServingEndpointsAPI):
@@ -50,3 +56,51 @@ class ServingEndpointsExt(ServingEndpointsAPI):
             openai_api_base=self._api._cfg.host + "/serving-endpoints",
             api_key="no-token", # Passing in a placeholder to pass validations, this will not be used
             http_client=self._get_authorized_http_client())
+    def http_request(self,
+                     conn: str,
+                     method: ExternalFunctionRequestHttpMethod,
+                     path: str,
+                     *,
+                     headers: Optional[Dict[str, str]] = None,
+                     json: Optional[Dict[str, str]] = None,
+                     params: Optional[Dict[str, str]] = None) -> Response:
+        """Make external services call using the credentials stored in UC Connection.
+        **NOTE:** Experimental: This API may change or be removed in a future release without warning.
+        :param conn: str
+          The connection name to use. This is required to identify the external connection.
+        :param method: :class:`ExternalFunctionRequestHttpMethod`
+          The HTTP method to use (e.g., 'GET', 'POST'). This is required.
+        :param path: str
+          The relative path for the API endpoint. This is required.
+        :param headers: Dict[str,str] (optional)
+          Additional headers for the request. If not provided, only auth headers from connections would be
+          passed.
+        :param json: Dict[str,str] (optional)
+          JSON payload for the request.
+        :param params: Dict[str,str] (optional)
+          Query parameters for the request.
+        :returns: :class:`Response`
+        """
+        response = Response()
+        response.status_code = 200
+        server_response = super().http_request(connection_name=conn,
+                                               method=method,
+                                               path=path,
+                                               headers=js.dumps(headers) if headers is not None else None,
+                                               json=js.dumps(json) if json is not None else None,
+                                               params=js.dumps(params) if params is not None else None)
+        # Read the content from the HttpRequestResponse object
+        if hasattr(server_response, "contents") and hasattr(server_response.contents, "read"):
+            raw_content = server_response.contents.read() # Read the bytes
+        else:
+            raise ValueError("Invalid response from the server.")
+        # Set the raw content
+        if isinstance(raw_content, bytes):
+            response._content = raw_content
+        else:
+            raise ValueError("Contents must be bytes.")
+        return response

databricks/sdk/retries.py CHANGED Viewed

@@ -13,7 +13,8 @@ def retried(*,
             on: Sequence[Type[BaseException]] = None,
             is_retryable: Callable[[BaseException], Optional[str]] = None,
             timeout=timedelta(minutes=20),
-            clock: Clock = None):
+            clock: Clock = None,
+            before_retry: Callable = None):
     has_allowlist = on is not None
     has_callback = is_retryable is not None
     if not (has_allowlist or has_callback) or (has_allowlist and has_callback):
@@ -54,6 +55,9 @@ def retried(*,
                         raise err
                     logger.debug(f'Retrying: {retry_reason} (sleeping ~{sleep}s)')
+                    if before_retry:
+                        before_retry()
                     clock.sleep(sleep + random())
                     attempt += 1
             raise TimeoutError(f'Timed out after {timeout}') from last_err

databricks/sdk/service/apps.py CHANGED Viewed

@@ -967,25 +967,33 @@ class AppsAPI:
             attempt += 1
         raise TimeoutError(f'timed out after {timeout}: {status_message}')
-    def create(self, *, app: Optional[App] = None) -> Wait[App]:
+    def create(self, *, app: Optional[App] = None, no_compute: Optional[bool] = None) -> Wait[App]:
         """Create an app.
         Creates a new app.
         :param app: :class:`App` (optional)
+        :param no_compute: bool (optional)
+          If true, the app will not be started after creation.
         :returns:
           Long-running operation waiter for :class:`App`.
           See :method:wait_get_app_active for more details.
         """
         body = app.as_dict()
+        query = {}
+        if no_compute is not None: query['no_compute'] = no_compute
         headers = {'Accept': 'application/json', 'Content-Type': 'application/json', }
-        op_response = self._api.do('POST', '/api/2.0/apps', body=body, headers=headers)
+        op_response = self._api.do('POST', '/api/2.0/apps', query=query, body=body, headers=headers)
         return Wait(self.wait_get_app_active, response=App.from_dict(op_response), name=op_response['name'])
-    def create_and_wait(self, *, app: Optional[App] = None, timeout=timedelta(minutes=20)) -> App:
-        return self.create(app=app).result(timeout=timeout)
+    def create_and_wait(self,
+                        *,
+                        app: Optional[App] = None,
+                        no_compute: Optional[bool] = None,
+                        timeout=timedelta(minutes=20)) -> App:
+        return self.create(app=app, no_compute=no_compute).result(timeout=timeout)
     def delete(self, name: str) -> App:
         """Delete an app.

databricks-sdk 0.40.0__py3-none-any.whl → 0.42.0__py3-none-any.whl

Potentially problematic release.

databricks-sdk 0.40.0py3-none-any.whl → 0.42.0py3-none-any.whl