PyPI - databricks-sdk - Versions diffs - 0.37.0__py3-none-any.whl → 0.39.0__py3-none-any.whl - Mend

databricks-sdk 0.37.0py3-none-any.whl → 0.39.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of databricks-sdk might be problematic. Click here for more details.

Files changed (33) hide show

databricks/sdk/__init__.py +24 -2
databricks/sdk/_base_client.py +61 -14
databricks/sdk/config.py +10 -9
databricks/sdk/credentials_provider.py +6 -5
databricks/sdk/mixins/jobs.py +49 -0
databricks/sdk/mixins/open_ai_client.py +2 -2
databricks/sdk/service/apps.py +185 -4
databricks/sdk/service/billing.py +248 -1
databricks/sdk/service/catalog.py +1943 -46
databricks/sdk/service/cleanrooms.py +1281 -0
databricks/sdk/service/compute.py +1486 -8
databricks/sdk/service/dashboards.py +336 -11
databricks/sdk/service/files.py +162 -2
databricks/sdk/service/iam.py +353 -2
databricks/sdk/service/jobs.py +1281 -16
databricks/sdk/service/marketplace.py +688 -0
databricks/sdk/service/ml.py +1038 -2
databricks/sdk/service/oauth2.py +176 -0
databricks/sdk/service/pipelines.py +602 -15
databricks/sdk/service/provisioning.py +402 -0
databricks/sdk/service/serving.py +615 -0
databricks/sdk/service/settings.py +1190 -3
databricks/sdk/service/sharing.py +328 -2
databricks/sdk/service/sql.py +1186 -2
databricks/sdk/service/vectorsearch.py +290 -0
databricks/sdk/service/workspace.py +453 -1
databricks/sdk/version.py +1 -1
{databricks_sdk-0.37.0.dist-info → databricks_sdk-0.39.0.dist-info}/METADATA +26 -26
{databricks_sdk-0.37.0.dist-info → databricks_sdk-0.39.0.dist-info}/RECORD +33 -31
{databricks_sdk-0.37.0.dist-info → databricks_sdk-0.39.0.dist-info}/WHEEL +1 -1
{databricks_sdk-0.37.0.dist-info → databricks_sdk-0.39.0.dist-info}/LICENSE +0 -0
{databricks_sdk-0.37.0.dist-info → databricks_sdk-0.39.0.dist-info}/NOTICE +0 -0
{databricks_sdk-0.37.0.dist-info → databricks_sdk-0.39.0.dist-info}/top_level.txt +0 -0

databricks/sdk/__init__.py CHANGED Viewed

@@ -6,6 +6,7 @@ from databricks.sdk import azure
 from databricks.sdk.credentials_provider import CredentialsStrategy
 from databricks.sdk.mixins.compute import ClustersExt
 from databricks.sdk.mixins.files import DbfsExt
+from databricks.sdk.mixins.jobs import JobsExt
 from databricks.sdk.mixins.open_ai_client import ServingEndpointsExt
 from databricks.sdk.mixins.workspace import WorkspaceExt
 from databricks.sdk.service.apps import AppsAPI
@@ -27,6 +28,9 @@ from databricks.sdk.service.catalog import (AccountMetastoreAssignmentsAPI,
                                             TableConstraintsAPI, TablesAPI,
                                             TemporaryTableCredentialsAPI,
                                             VolumesAPI, WorkspaceBindingsAPI)
+from databricks.sdk.service.cleanrooms import (CleanRoomAssetsAPI,
+                                               CleanRoomsAPI,
+                                               CleanRoomTaskRunsAPI)
 from databricks.sdk.service.compute import (ClusterPoliciesAPI, ClustersAPI,
                                             CommandExecutionAPI,
                                             GlobalInitScriptsAPI,
@@ -175,6 +179,9 @@ class WorkspaceClient:
         self._apps = AppsAPI(self._api_client)
         self._artifact_allowlists = ArtifactAllowlistsAPI(self._api_client)
         self._catalogs = CatalogsAPI(self._api_client)
+        self._clean_room_assets = CleanRoomAssetsAPI(self._api_client)
+        self._clean_room_task_runs = CleanRoomTaskRunsAPI(self._api_client)
+        self._clean_rooms = CleanRoomsAPI(self._api_client)
         self._cluster_policies = ClusterPoliciesAPI(self._api_client)
         self._clusters = ClustersExt(self._api_client)
         self._command_execution = CommandExecutionAPI(self._api_client)
@@ -204,7 +211,7 @@ class WorkspaceClient:
         self._instance_pools = InstancePoolsAPI(self._api_client)
         self._instance_profiles = InstanceProfilesAPI(self._api_client)
         self._ip_access_lists = IpAccessListsAPI(self._api_client)
-        self._jobs = JobsAPI(self._api_client)
+        self._jobs = JobsExt(self._api_client)
         self._lakeview = LakeviewAPI(self._api_client)
         self._libraries = LibrariesAPI(self._api_client)
         self._metastores = MetastoresAPI(self._api_client)
@@ -304,6 +311,21 @@ class WorkspaceClient:
         """A catalog is the first layer of Unity Catalog’s three-level namespace."""
         return self._catalogs
+    @property
+    def clean_room_assets(self) -> CleanRoomAssetsAPI:
+        """Clean room assets are data and code objects — Tables, volumes, and notebooks that are shared with the clean room."""
+        return self._clean_room_assets
+    @property
+    def clean_room_task_runs(self) -> CleanRoomTaskRunsAPI:
+        """Clean room task runs are the executions of notebooks in a clean room."""
+        return self._clean_room_task_runs
+    @property
+    def clean_rooms(self) -> CleanRoomsAPI:
+        """A clean room uses Delta Sharing and serverless compute to provide a secure and privacy-protecting environment where multiple parties can work together on sensitive enterprise data without direct access to each other’s data."""
+        return self._clean_rooms
     @property
     def cluster_policies(self) -> ClusterPoliciesAPI:
         """You can use cluster policies to control users' ability to configure clusters based on a set of rules."""
@@ -450,7 +472,7 @@ class WorkspaceClient:
         return self._ip_access_lists
     @property
-    def jobs(self) -> JobsAPI:
+    def jobs(self) -> JobsExt:
         """The Jobs API allows you to create, edit, and delete jobs."""
         return self._jobs

databricks/sdk/_base_client.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import io
 import logging
 import urllib.parse
 from datetime import timedelta
@@ -50,7 +51,8 @@ class _BaseClient:
                  http_timeout_seconds: float = None,
                  extra_error_customizers: List[_ErrorCustomizer] = None,
                  debug_headers: bool = False,
-                 clock: Clock = None):
+                 clock: Clock = None,
+                 streaming_buffer_size: int = 1024 * 1024): # 1MB
         """
         :param debug_truncate_bytes:
         :param retry_timeout_seconds:
@@ -68,6 +70,7 @@ class _BaseClient:
         :param extra_error_customizers:
         :param debug_headers: Whether to include debug headers in the request log.
         :param clock: Clock object to use for time-related operations.
+        :param streaming_buffer_size: The size of the buffer to use for streaming responses.
         """
         self._debug_truncate_bytes = debug_truncate_bytes or 96
@@ -78,6 +81,7 @@ class _BaseClient:
         self._clock = clock or RealClock()
         self._session = requests.Session()
         self._session.auth = self._authenticate
+        self._streaming_buffer_size = streaming_buffer_size
         # We don't use `max_retries` from HTTPAdapter to align with a more production-ready
         # retry strategy established in the Databricks SDK for Go. See _is_retryable and
@@ -127,6 +131,14 @@ class _BaseClient:
         flattened = dict(flatten_dict(with_fixed_bools))
         return flattened
+    @staticmethod
+    def _is_seekable_stream(data) -> bool:
+        if data is None:
+            return False
+        if not isinstance(data, io.IOBase):
+            return False
+        return data.seekable()
     def do(self,
            method: str,
            url: str,
@@ -141,24 +153,39 @@ class _BaseClient:
         if headers is None:
             headers = {}
         headers['User-Agent'] = self._user_agent_base
-        retryable = retried(timeout=timedelta(seconds=self._retry_timeout_seconds),
-                            is_retryable=self._is_retryable,
-                            clock=self._clock)
-        response = retryable(self._perform)(method,
-                                            url,
-                                            query=query,
-                                            headers=headers,
-                                            body=body,
-                                            raw=raw,
-                                            files=files,
-                                            data=data,
-                                            auth=auth)
+        # Wrap strings and bytes in a seekable stream so that we can rewind them.
+        if isinstance(data, (str, bytes)):
+            data = io.BytesIO(data.encode('utf-8') if isinstance(data, str) else data)
+        # Only retry if the request is not a stream or if the stream is seekable and
+        # we can rewind it. This is necessary to avoid bugs where the retry doesn't
+        # re-read already read data from the body.
+        if data is not None and not self._is_seekable_stream(data):
+            logger.debug(f"Retry disabled for non-seekable stream: type={type(data)}")
+            call = self._perform
+        else:
+            call = retried(timeout=timedelta(seconds=self._retry_timeout_seconds),
+                           is_retryable=self._is_retryable,
+                           clock=self._clock)(self._perform)
+        response = call(method,
+                        url,
+                        query=query,
+                        headers=headers,
+                        body=body,
+                        raw=raw,
+                        files=files,
+                        data=data,
+                        auth=auth)
         resp = dict()
         for header in response_headers if response_headers else []:
             resp[header] = response.headers.get(Casing.to_header_case(header))
         if raw:
-            resp["contents"] = _StreamingResponse(response)
+            streaming_response = _StreamingResponse(response)
+            streaming_response.set_chunk_size(self._streaming_buffer_size)
+            resp["contents"] = streaming_response
             return resp
         if not len(response.content):
             return resp
@@ -221,6 +248,12 @@ class _BaseClient:
                  files=None,
                  data=None,
                  auth: Callable[[requests.PreparedRequest], requests.PreparedRequest] = None):
+        # Keep track of the initial position of the stream so that we can rewind it if
+        # we need to retry the request.
+        initial_data_position = 0
+        if self._is_seekable_stream(data):
+            initial_data_position = data.tell()
         response = self._session.request(method,
                                          url,
                                          params=self._fix_query_string(query),
@@ -232,9 +265,18 @@ class _BaseClient:
                                          stream=raw,
                                          timeout=self._http_timeout_seconds)
         self._record_request_log(response, raw=raw or data is not None or files is not None)
         error = self._error_parser.get_api_error(response)
         if error is not None:
+            # If the request body is a seekable stream, rewind it so that it is ready
+            # to be read again in case of a retry.
+            #
+            # TODO: This should be moved into a "before-retry" hook to avoid one
+            # unnecessary seek on the last failed retry before aborting.
+            if self._is_seekable_stream(data):
+                data.seek(initial_data_position)
             raise error from None
         return response
     def _record_request_log(self, response: requests.Response, raw: bool = False) -> None:
@@ -283,6 +325,11 @@ class _StreamingResponse(BinaryIO):
         return False
     def read(self, n: int = -1) -> bytes:
+        """
+        Read up to n bytes from the response stream. If n is negative, read
+        until the end of the stream.
+        """
         self._open()
         read_everything = n < 0
         remaining_bytes = n

databricks/sdk/config.py CHANGED Viewed

@@ -92,15 +92,16 @@ class Config:
     max_connections_per_pool: int = ConfigAttribute()
     databricks_environment: Optional[DatabricksEnvironment] = None
-    def __init__(self,
-                 *,
-                 # Deprecated. Use credentials_strategy instead.
-                 credentials_provider: Optional[CredentialsStrategy] = None,
-                 credentials_strategy: Optional[CredentialsStrategy] = None,
-                 product=None,
-                 product_version=None,
-                 clock: Optional[Clock] = None,
-                 **kwargs):
+    def __init__(
+            self,
+            *,
+            # Deprecated. Use credentials_strategy instead.
+            credentials_provider: Optional[CredentialsStrategy] = None,
+            credentials_strategy: Optional[CredentialsStrategy] = None,
+            product=None,
+            product_version=None,
+            clock: Optional[Clock] = None,
+            **kwargs):
         self._header_factory = None
         self._inner = {}
         self._user_agent_other_info = []

databricks/sdk/credentials_provider.py CHANGED Viewed

@@ -304,11 +304,12 @@ def github_oidc_azure(cfg: 'Config') -> Optional[CredentialsProvider]:
         # detect Azure AD Tenant ID if it's not specified directly
         token_endpoint = cfg.oidc_endpoints.token_endpoint
         cfg.azure_tenant_id = token_endpoint.replace(aad_endpoint, '').split('/')[0]
-    inner = ClientCredentials(client_id=cfg.azure_client_id,
-                              client_secret="", # we have no (rotatable) secrets in OIDC flow
-                              token_url=f"{aad_endpoint}{cfg.azure_tenant_id}/oauth2/token",
-                              endpoint_params=params,
-                              use_params=True)
+    inner = ClientCredentials(
+        client_id=cfg.azure_client_id,
+        client_secret="", # we have no (rotatable) secrets in OIDC flow
+        token_url=f"{aad_endpoint}{cfg.azure_tenant_id}/oauth2/token",
+        endpoint_params=params,
+        use_params=True)
     def refreshed_headers() -> Dict[str, str]:
         token = inner.token()

databricks/sdk/mixins/jobs.py ADDED Viewed

@@ -0,0 +1,49 @@
+from typing import Optional
+from databricks.sdk.service import jobs
+class JobsExt(jobs.JobsAPI):
+    def get_run(self,
+                run_id: int,
+                *,
+                include_history: Optional[bool] = None,
+                include_resolved_values: Optional[bool] = None,
+                page_token: Optional[str] = None) -> jobs.Run:
+        """
+        This method fetches the details of a run identified by `run_id`. If the run has multiple pages of tasks or iterations,
+        it will paginate through all pages and aggregate the results.
+        :param run_id: int
+          The canonical identifier of the run for which to retrieve the metadata. This field is required.
+        :param include_history: bool (optional)
+          Whether to include the repair history in the response.
+        :param include_resolved_values: bool (optional)
+          Whether to include resolved parameter values in the response.
+        :param page_token: str (optional)
+          To list the next page or the previous page of job tasks, set this field to the value of the
+          `next_page_token` or `prev_page_token` returned in the GetJob response.
+        :returns: :class:`Run`
+        """
+        run = super().get_run(run_id,
+                              include_history=include_history,
+                              include_resolved_values=include_resolved_values,
+                              page_token=page_token)
+        # When querying a Job run, a page token is returned when there are more than 100 tasks. No iterations are defined for a Job run. Therefore, the next page in the response only includes the next page of tasks.
+        # When querying a ForEach task run, a page token is returned when there are more than 100 iterations. Only a single task is returned, corresponding to the ForEach task itself. Therefore, the client only reads the iterations from the next page and not the tasks.
+        is_paginating_iterations = run.iterations is not None and len(run.iterations) > 0
+        while run.next_page_token is not None:
+            next_run = super().get_run(run_id,
+                                       include_history=include_history,
+                                       include_resolved_values=include_resolved_values,
+                                       page_token=run.next_page_token)
+            if is_paginating_iterations:
+                run.iterations.extend(next_run.iterations)
+            else:
+                run.tasks.extend(next_run.tasks)
+            run.next_page_token = next_run.next_page_token
+        run.prev_page_token = None
+        return run

databricks/sdk/mixins/open_ai_client.py CHANGED Viewed

@@ -29,7 +29,7 @@ class ServingEndpointsExt(ServingEndpointsAPI):
             from openai import OpenAI
         except Exception:
             raise ImportError(
-                "Open AI is not installed. Please install the Databricks SDK with the following command `pip isntall databricks-sdk[openai]`"
+                "Open AI is not installed. Please install the Databricks SDK with the following command `pip install databricks-sdk[openai]`"
             )
         return OpenAI(
@@ -42,7 +42,7 @@ class ServingEndpointsExt(ServingEndpointsAPI):
             from langchain_openai import ChatOpenAI
         except Exception:
             raise ImportError(
-                "Langchain Open AI is not installed. Please install the Databricks SDK with the following command `pip isntall databricks-sdk[openai]` and ensure you are using python>3.7"
+                "Langchain Open AI is not installed. Please install the Databricks SDK with the following command `pip install databricks-sdk[openai]` and ensure you are using python>3.7"
             )
         return ChatOpenAI(

databricks-sdk 0.37.0__py3-none-any.whl → 0.39.0__py3-none-any.whl

Potentially problematic release.

databricks-sdk 0.37.0py3-none-any.whl → 0.39.0py3-none-any.whl