PyPI - databricks-sdk - Versions diffs - 0.37.0__py3-none-any.whl → 0.38.0__py3-none-any.whl - Mend

databricks-sdk 0.37.0py3-none-any.whl → 0.38.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of databricks-sdk might be problematic. Click here for more details.

Files changed (24) hide show

databricks/sdk/__init__.py +3 -2
databricks/sdk/_base_client.py +61 -14
databricks/sdk/config.py +10 -9
databricks/sdk/credentials_provider.py +6 -5
databricks/sdk/mixins/jobs.py +49 -0
databricks/sdk/service/apps.py +10 -4
databricks/sdk/service/billing.py +1 -1
databricks/sdk/service/catalog.py +196 -32
databricks/sdk/service/dashboards.py +10 -10
databricks/sdk/service/iam.py +2 -2
databricks/sdk/service/jobs.py +17 -8
databricks/sdk/service/oauth2.py +1 -0
databricks/sdk/service/pipelines.py +82 -15
databricks/sdk/service/provisioning.py +15 -0
databricks/sdk/service/settings.py +3 -1
databricks/sdk/service/sharing.py +2 -0
databricks/sdk/service/workspace.py +2 -1
databricks/sdk/version.py +1 -1
{databricks_sdk-0.37.0.dist-info → databricks_sdk-0.38.0.dist-info}/METADATA +1 -1
{databricks_sdk-0.37.0.dist-info → databricks_sdk-0.38.0.dist-info}/RECORD +24 -23
{databricks_sdk-0.37.0.dist-info → databricks_sdk-0.38.0.dist-info}/LICENSE +0 -0
{databricks_sdk-0.37.0.dist-info → databricks_sdk-0.38.0.dist-info}/NOTICE +0 -0
{databricks_sdk-0.37.0.dist-info → databricks_sdk-0.38.0.dist-info}/WHEEL +0 -0
{databricks_sdk-0.37.0.dist-info → databricks_sdk-0.38.0.dist-info}/top_level.txt +0 -0

databricks/sdk/__init__.py CHANGED Viewed

@@ -6,6 +6,7 @@ from databricks.sdk import azure
 from databricks.sdk.credentials_provider import CredentialsStrategy
 from databricks.sdk.mixins.compute import ClustersExt
 from databricks.sdk.mixins.files import DbfsExt
+from databricks.sdk.mixins.jobs import JobsExt
 from databricks.sdk.mixins.open_ai_client import ServingEndpointsExt
 from databricks.sdk.mixins.workspace import WorkspaceExt
 from databricks.sdk.service.apps import AppsAPI
@@ -204,7 +205,7 @@ class WorkspaceClient:
         self._instance_pools = InstancePoolsAPI(self._api_client)
         self._instance_profiles = InstanceProfilesAPI(self._api_client)
         self._ip_access_lists = IpAccessListsAPI(self._api_client)
-        self._jobs = JobsAPI(self._api_client)
+        self._jobs = JobsExt(self._api_client)
         self._lakeview = LakeviewAPI(self._api_client)
         self._libraries = LibrariesAPI(self._api_client)
         self._metastores = MetastoresAPI(self._api_client)
@@ -450,7 +451,7 @@ class WorkspaceClient:
         return self._ip_access_lists
     @property
-    def jobs(self) -> JobsAPI:
+    def jobs(self) -> JobsExt:
         """The Jobs API allows you to create, edit, and delete jobs."""
         return self._jobs

databricks/sdk/_base_client.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import io
 import logging
 import urllib.parse
 from datetime import timedelta
@@ -50,7 +51,8 @@ class _BaseClient:
                  http_timeout_seconds: float = None,
                  extra_error_customizers: List[_ErrorCustomizer] = None,
                  debug_headers: bool = False,
-                 clock: Clock = None):
+                 clock: Clock = None,
+                 streaming_buffer_size: int = 1024 * 1024): # 1MB
         """
         :param debug_truncate_bytes:
         :param retry_timeout_seconds:
@@ -68,6 +70,7 @@ class _BaseClient:
         :param extra_error_customizers:
         :param debug_headers: Whether to include debug headers in the request log.
         :param clock: Clock object to use for time-related operations.
+        :param streaming_buffer_size: The size of the buffer to use for streaming responses.
         """
         self._debug_truncate_bytes = debug_truncate_bytes or 96
@@ -78,6 +81,7 @@ class _BaseClient:
         self._clock = clock or RealClock()
         self._session = requests.Session()
         self._session.auth = self._authenticate
+        self._streaming_buffer_size = streaming_buffer_size
         # We don't use `max_retries` from HTTPAdapter to align with a more production-ready
         # retry strategy established in the Databricks SDK for Go. See _is_retryable and
@@ -127,6 +131,14 @@ class _BaseClient:
         flattened = dict(flatten_dict(with_fixed_bools))
         return flattened
+    @staticmethod
+    def _is_seekable_stream(data) -> bool:
+        if data is None:
+            return False
+        if not isinstance(data, io.IOBase):
+            return False
+        return data.seekable()
     def do(self,
            method: str,
            url: str,
@@ -141,24 +153,39 @@ class _BaseClient:
         if headers is None:
             headers = {}
         headers['User-Agent'] = self._user_agent_base
-        retryable = retried(timeout=timedelta(seconds=self._retry_timeout_seconds),
-                            is_retryable=self._is_retryable,
-                            clock=self._clock)
-        response = retryable(self._perform)(method,
-                                            url,
-                                            query=query,
-                                            headers=headers,
-                                            body=body,
-                                            raw=raw,
-                                            files=files,
-                                            data=data,
-                                            auth=auth)
+        # Wrap strings and bytes in a seekable stream so that we can rewind them.
+        if isinstance(data, (str, bytes)):
+            data = io.BytesIO(data.encode('utf-8') if isinstance(data, str) else data)
+        # Only retry if the request is not a stream or if the stream is seekable and
+        # we can rewind it. This is necessary to avoid bugs where the retry doesn't
+        # re-read already read data from the body.
+        if data is not None and not self._is_seekable_stream(data):
+            logger.debug(f"Retry disabled for non-seekable stream: type={type(data)}")
+            call = self._perform
+        else:
+            call = retried(timeout=timedelta(seconds=self._retry_timeout_seconds),
+                           is_retryable=self._is_retryable,
+                           clock=self._clock)(self._perform)
+        response = call(method,
+                        url,
+                        query=query,
+                        headers=headers,
+                        body=body,
+                        raw=raw,
+                        files=files,
+                        data=data,
+                        auth=auth)
         resp = dict()
         for header in response_headers if response_headers else []:
             resp[header] = response.headers.get(Casing.to_header_case(header))
         if raw:
-            resp["contents"] = _StreamingResponse(response)
+            streaming_response = _StreamingResponse(response)
+            streaming_response.set_chunk_size(self._streaming_buffer_size)
+            resp["contents"] = streaming_response
             return resp
         if not len(response.content):
             return resp
@@ -221,6 +248,12 @@ class _BaseClient:
                  files=None,
                  data=None,
                  auth: Callable[[requests.PreparedRequest], requests.PreparedRequest] = None):
+        # Keep track of the initial position of the stream so that we can rewind it if
+        # we need to retry the request.
+        initial_data_position = 0
+        if self._is_seekable_stream(data):
+            initial_data_position = data.tell()
         response = self._session.request(method,
                                          url,
                                          params=self._fix_query_string(query),
@@ -232,9 +265,18 @@ class _BaseClient:
                                          stream=raw,
                                          timeout=self._http_timeout_seconds)
         self._record_request_log(response, raw=raw or data is not None or files is not None)
         error = self._error_parser.get_api_error(response)
         if error is not None:
+            # If the request body is a seekable stream, rewind it so that it is ready
+            # to be read again in case of a retry.
+            #
+            # TODO: This should be moved into a "before-retry" hook to avoid one
+            # unnecessary seek on the last failed retry before aborting.
+            if self._is_seekable_stream(data):
+                data.seek(initial_data_position)
             raise error from None
         return response
     def _record_request_log(self, response: requests.Response, raw: bool = False) -> None:
@@ -283,6 +325,11 @@ class _StreamingResponse(BinaryIO):
         return False
     def read(self, n: int = -1) -> bytes:
+        """
+        Read up to n bytes from the response stream. If n is negative, read
+        until the end of the stream.
+        """
         self._open()
         read_everything = n < 0
         remaining_bytes = n

databricks/sdk/config.py CHANGED Viewed

@@ -92,15 +92,16 @@ class Config:
     max_connections_per_pool: int = ConfigAttribute()
     databricks_environment: Optional[DatabricksEnvironment] = None
-    def __init__(self,
-                 *,
-                 # Deprecated. Use credentials_strategy instead.
-                 credentials_provider: Optional[CredentialsStrategy] = None,
-                 credentials_strategy: Optional[CredentialsStrategy] = None,
-                 product=None,
-                 product_version=None,
-                 clock: Optional[Clock] = None,
-                 **kwargs):
+    def __init__(
+            self,
+            *,
+            # Deprecated. Use credentials_strategy instead.
+            credentials_provider: Optional[CredentialsStrategy] = None,
+            credentials_strategy: Optional[CredentialsStrategy] = None,
+            product=None,
+            product_version=None,
+            clock: Optional[Clock] = None,
+            **kwargs):
         self._header_factory = None
         self._inner = {}
         self._user_agent_other_info = []

databricks/sdk/credentials_provider.py CHANGED Viewed

@@ -304,11 +304,12 @@ def github_oidc_azure(cfg: 'Config') -> Optional[CredentialsProvider]:
         # detect Azure AD Tenant ID if it's not specified directly
         token_endpoint = cfg.oidc_endpoints.token_endpoint
         cfg.azure_tenant_id = token_endpoint.replace(aad_endpoint, '').split('/')[0]
-    inner = ClientCredentials(client_id=cfg.azure_client_id,
-                              client_secret="", # we have no (rotatable) secrets in OIDC flow
-                              token_url=f"{aad_endpoint}{cfg.azure_tenant_id}/oauth2/token",
-                              endpoint_params=params,
-                              use_params=True)
+    inner = ClientCredentials(
+        client_id=cfg.azure_client_id,
+        client_secret="", # we have no (rotatable) secrets in OIDC flow
+        token_url=f"{aad_endpoint}{cfg.azure_tenant_id}/oauth2/token",
+        endpoint_params=params,
+        use_params=True)
     def refreshed_headers() -> Dict[str, str]:
         token = inner.token()

databricks/sdk/mixins/jobs.py ADDED Viewed

@@ -0,0 +1,49 @@
+from typing import Optional
+from databricks.sdk.service import jobs
+class JobsExt(jobs.JobsAPI):
+    def get_run(self,
+                run_id: int,
+                *,
+                include_history: Optional[bool] = None,
+                include_resolved_values: Optional[bool] = None,
+                page_token: Optional[str] = None) -> jobs.Run:
+        """
+        This method fetches the details of a run identified by `run_id`. If the run has multiple pages of tasks or iterations,
+        it will paginate through all pages and aggregate the results.
+        :param run_id: int
+          The canonical identifier of the run for which to retrieve the metadata. This field is required.
+        :param include_history: bool (optional)
+          Whether to include the repair history in the response.
+        :param include_resolved_values: bool (optional)
+          Whether to include resolved parameter values in the response.
+        :param page_token: str (optional)
+          To list the next page or the previous page of job tasks, set this field to the value of the
+          `next_page_token` or `prev_page_token` returned in the GetJob response.
+        :returns: :class:`Run`
+        """
+        run = super().get_run(run_id,
+                              include_history=include_history,
+                              include_resolved_values=include_resolved_values,
+                              page_token=page_token)
+        # When querying a Job run, a page token is returned when there are more than 100 tasks. No iterations are defined for a Job run. Therefore, the next page in the response only includes the next page of tasks.
+        # When querying a ForEach task run, a page token is returned when there are more than 100 iterations. Only a single task is returned, corresponding to the ForEach task itself. Therefore, the client only reads the iterations from the next page and not the tasks.
+        is_paginating_iterations = run.iterations is not None and len(run.iterations) > 0
+        while run.next_page_token is not None:
+            next_run = super().get_run(run_id,
+                                       include_history=include_history,
+                                       include_resolved_values=include_resolved_values,
+                                       page_token=run.next_page_token)
+            if is_paginating_iterations:
+                run.iterations.extend(next_run.iterations)
+            else:
+                run.tasks.extend(next_run.tasks)
+            run.next_page_token = next_run.next_page_token
+        run.prev_page_token = None
+        return run

databricks/sdk/service/apps.py CHANGED Viewed

@@ -52,6 +52,8 @@ class App:
     resources: Optional[List[AppResource]] = None
     """Resources for the app."""
+    service_principal_client_id: Optional[str] = None
     service_principal_id: Optional[int] = None
     service_principal_name: Optional[str] = None
@@ -79,6 +81,8 @@ class App:
         if self.name is not None: body['name'] = self.name
         if self.pending_deployment: body['pending_deployment'] = self.pending_deployment.as_dict()
         if self.resources: body['resources'] = [v.as_dict() for v in self.resources]
+        if self.service_principal_client_id is not None:
+            body['service_principal_client_id'] = self.service_principal_client_id
         if self.service_principal_id is not None: body['service_principal_id'] = self.service_principal_id
         if self.service_principal_name is not None:
             body['service_principal_name'] = self.service_principal_name
@@ -100,6 +104,7 @@ class App:
                    name=d.get('name', None),
                    pending_deployment=_from_dict(d, 'pending_deployment', AppDeployment),
                    resources=_repeated_dict(d, 'resources', AppResource),
+                   service_principal_client_id=d.get('service_principal_client_id', None),
                    service_principal_id=d.get('service_principal_id', None),
                    service_principal_name=d.get('service_principal_name', None),
                    update_time=d.get('update_time', None),
@@ -798,7 +803,7 @@ class AppsAPI:
           Long-running operation waiter for :class:`App`.
           See :method:wait_get_app_active for more details.
         """
-        body = app
+        body = app.as_dict()
         headers = {'Accept': 'application/json', 'Content-Type': 'application/json', }
         op_response = self._api.do('POST', '/api/2.0/apps', body=body, headers=headers)
@@ -836,7 +841,7 @@ class AppsAPI:
           Long-running operation waiter for :class:`AppDeployment`.
           See :method:wait_get_deployment_app_succeeded for more details.
         """
-        body = app_deployment
+        body = app_deployment.as_dict()
         headers = {'Accept': 'application/json', 'Content-Type': 'application/json', }
         op_response = self._api.do('POST',
@@ -1053,12 +1058,13 @@ class AppsAPI:
         Updates the app with the supplied name.
         :param name: str
-          The name of the app.
+          The name of the app. The name must contain only lowercase alphanumeric characters and hyphens. It
+          must be unique within the workspace.
         :param app: :class:`App` (optional)
         :returns: :class:`App`
         """
-        body = app
+        body = app.as_dict()
         headers = {'Accept': 'application/json', 'Content-Type': 'application/json', }
         res = self._api.do('PATCH', f'/api/2.0/apps/{name}', body=body, headers=headers)

databricks/sdk/service/billing.py CHANGED Viewed

@@ -1121,7 +1121,7 @@ class BudgetsAPI:
         Gets a budget configuration for an account. Both account and budget configuration are specified by ID.
         :param budget_id: str
-          The Databricks budget configuration ID.
+          The budget configuration ID
         :returns: :class:`GetBudgetConfigurationResponse`
         """

databricks-sdk 0.37.0__py3-none-any.whl → 0.38.0__py3-none-any.whl

Potentially problematic release.

databricks-sdk 0.37.0py3-none-any.whl → 0.38.0py3-none-any.whl