databricks-sdk 0.67.0__py3-none-any.whl → 0.69.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of databricks-sdk might be problematic. Click here for more details.
- databricks/sdk/__init__.py +14 -10
- databricks/sdk/_base_client.py +4 -1
- databricks/sdk/common/lro.py +17 -0
- databricks/sdk/common/types/__init__.py +0 -0
- databricks/sdk/common/types/fieldmask.py +39 -0
- databricks/sdk/config.py +62 -14
- databricks/sdk/credentials_provider.py +61 -12
- databricks/sdk/dbutils.py +5 -1
- databricks/sdk/errors/parser.py +8 -3
- databricks/sdk/mixins/files.py +1156 -111
- databricks/sdk/mixins/files_utils.py +293 -0
- databricks/sdk/oidc_token_supplier.py +80 -0
- databricks/sdk/retries.py +102 -2
- databricks/sdk/service/_internal.py +93 -1
- databricks/sdk/service/agentbricks.py +1 -1
- databricks/sdk/service/apps.py +264 -1
- databricks/sdk/service/billing.py +2 -3
- databricks/sdk/service/catalog.py +1026 -540
- databricks/sdk/service/cleanrooms.py +3 -3
- databricks/sdk/service/compute.py +21 -33
- databricks/sdk/service/dashboards.py +7 -3
- databricks/sdk/service/database.py +3 -2
- databricks/sdk/service/dataquality.py +1145 -0
- databricks/sdk/service/files.py +2 -1
- databricks/sdk/service/iam.py +2 -1
- databricks/sdk/service/iamv2.py +1 -1
- databricks/sdk/service/jobs.py +6 -9
- databricks/sdk/service/marketplace.py +3 -1
- databricks/sdk/service/ml.py +3 -1
- databricks/sdk/service/oauth2.py +1 -1
- databricks/sdk/service/pipelines.py +5 -6
- databricks/sdk/service/provisioning.py +544 -655
- databricks/sdk/service/qualitymonitorv2.py +1 -1
- databricks/sdk/service/serving.py +3 -1
- databricks/sdk/service/settings.py +5 -2
- databricks/sdk/service/settingsv2.py +1 -1
- databricks/sdk/service/sharing.py +12 -3
- databricks/sdk/service/sql.py +305 -70
- databricks/sdk/service/tags.py +1 -1
- databricks/sdk/service/vectorsearch.py +3 -1
- databricks/sdk/service/workspace.py +70 -17
- databricks/sdk/version.py +1 -1
- {databricks_sdk-0.67.0.dist-info → databricks_sdk-0.69.0.dist-info}/METADATA +4 -2
- databricks_sdk-0.69.0.dist-info/RECORD +84 -0
- databricks_sdk-0.67.0.dist-info/RECORD +0 -79
- {databricks_sdk-0.67.0.dist-info → databricks_sdk-0.69.0.dist-info}/WHEEL +0 -0
- {databricks_sdk-0.67.0.dist-info → databricks_sdk-0.69.0.dist-info}/licenses/LICENSE +0 -0
- {databricks_sdk-0.67.0.dist-info → databricks_sdk-0.69.0.dist-info}/licenses/NOTICE +0 -0
- {databricks_sdk-0.67.0.dist-info → databricks_sdk-0.69.0.dist-info}/top_level.txt +0 -0
databricks/sdk/__init__.py
CHANGED
|
@@ -21,6 +21,7 @@ from databricks.sdk.service import cleanrooms as pkg_cleanrooms
|
|
|
21
21
|
from databricks.sdk.service import compute as pkg_compute
|
|
22
22
|
from databricks.sdk.service import dashboards as pkg_dashboards
|
|
23
23
|
from databricks.sdk.service import database as pkg_database
|
|
24
|
+
from databricks.sdk.service import dataquality as pkg_dataquality
|
|
24
25
|
from databricks.sdk.service import files as pkg_files
|
|
25
26
|
from databricks.sdk.service import iam as pkg_iam
|
|
26
27
|
from databricks.sdk.service import iamv2 as pkg_iamv2
|
|
@@ -79,6 +80,7 @@ from databricks.sdk.service.compute import (ClusterPoliciesAPI, ClustersAPI,
|
|
|
79
80
|
from databricks.sdk.service.dashboards import (GenieAPI, LakeviewAPI,
|
|
80
81
|
LakeviewEmbeddedAPI)
|
|
81
82
|
from databricks.sdk.service.database import DatabaseAPI
|
|
83
|
+
from databricks.sdk.service.dataquality import DataQualityAPI
|
|
82
84
|
from databricks.sdk.service.files import DbfsAPI, FilesAPI
|
|
83
85
|
from databricks.sdk.service.iam import (AccessControlAPI,
|
|
84
86
|
AccountAccessControlAPI,
|
|
@@ -179,11 +181,7 @@ def _make_dbutils(config: client.Config):
|
|
|
179
181
|
|
|
180
182
|
|
|
181
183
|
def _make_files_client(apiClient: client.ApiClient, config: client.Config):
|
|
182
|
-
|
|
183
|
-
_LOG.info("Experimental Files API client is enabled")
|
|
184
|
-
return FilesExt(apiClient, config)
|
|
185
|
-
else:
|
|
186
|
-
return FilesAPI(apiClient)
|
|
184
|
+
return FilesExt(apiClient, config)
|
|
187
185
|
|
|
188
186
|
|
|
189
187
|
class WorkspaceClient:
|
|
@@ -282,6 +280,7 @@ class WorkspaceClient:
|
|
|
282
280
|
self._current_user = pkg_iam.CurrentUserAPI(self._api_client)
|
|
283
281
|
self._dashboard_widgets = pkg_sql.DashboardWidgetsAPI(self._api_client)
|
|
284
282
|
self._dashboards = pkg_sql.DashboardsAPI(self._api_client)
|
|
283
|
+
self._data_quality = pkg_dataquality.DataQualityAPI(self._api_client)
|
|
285
284
|
self._data_sources = pkg_sql.DataSourcesAPI(self._api_client)
|
|
286
285
|
self._database = pkg_database.DatabaseAPI(self._api_client)
|
|
287
286
|
self._dbfs = DbfsExt(self._api_client)
|
|
@@ -540,6 +539,11 @@ class WorkspaceClient:
|
|
|
540
539
|
"""In general, there is little need to modify dashboards using the API."""
|
|
541
540
|
return self._dashboards
|
|
542
541
|
|
|
542
|
+
@property
|
|
543
|
+
def data_quality(self) -> pkg_dataquality.DataQualityAPI:
|
|
544
|
+
"""Manage the data quality of Unity Catalog objects (currently support `schema` and `table`)."""
|
|
545
|
+
return self._data_quality
|
|
546
|
+
|
|
543
547
|
@property
|
|
544
548
|
def data_sources(self) -> pkg_sql.DataSourcesAPI:
|
|
545
549
|
"""This API is provided to assist you in making new query objects."""
|
|
@@ -595,11 +599,6 @@ class WorkspaceClient:
|
|
|
595
599
|
"""A feature store is a centralized repository that enables data scientists to find and share features."""
|
|
596
600
|
return self._feature_store
|
|
597
601
|
|
|
598
|
-
@property
|
|
599
|
-
def files(self) -> pkg_files.FilesAPI:
|
|
600
|
-
"""The Files API is a standard HTTP API that allows you to read, write, list, and delete files and directories by referring to their URI."""
|
|
601
|
-
return self._files
|
|
602
|
-
|
|
603
602
|
@property
|
|
604
603
|
def functions(self) -> pkg_catalog.FunctionsAPI:
|
|
605
604
|
"""Functions implement User-Defined Functions (UDFs) in Unity Catalog."""
|
|
@@ -1005,6 +1004,11 @@ class WorkspaceClient:
|
|
|
1005
1004
|
"""User identities recognized by Databricks and represented by email addresses."""
|
|
1006
1005
|
return self._users
|
|
1007
1006
|
|
|
1007
|
+
@property
|
|
1008
|
+
def files(self) -> FilesExt:
|
|
1009
|
+
"""The Files API is a standard HTTP API that allows you to read, write, list, and delete files and directories by referring to their URI."""
|
|
1010
|
+
return self._files
|
|
1011
|
+
|
|
1008
1012
|
def get_workspace_id(self) -> int:
|
|
1009
1013
|
"""Get the workspace ID of the workspace that this client is connected to."""
|
|
1010
1014
|
response = self._api_client.do("GET", "/api/2.0/preview/scim/v2/Me", response_headers=["X-Databricks-Org-Id"])
|
databricks/sdk/_base_client.py
CHANGED
|
@@ -99,7 +99,10 @@ class _BaseClient:
|
|
|
99
99
|
# Default to 60 seconds
|
|
100
100
|
self._http_timeout_seconds = http_timeout_seconds or 60
|
|
101
101
|
|
|
102
|
-
self._error_parser = _Parser(
|
|
102
|
+
self._error_parser = _Parser(
|
|
103
|
+
extra_error_customizers=extra_error_customizers,
|
|
104
|
+
debug_headers=debug_headers,
|
|
105
|
+
)
|
|
103
106
|
|
|
104
107
|
def _authenticate(self, r: requests.PreparedRequest) -> requests.PreparedRequest:
|
|
105
108
|
if self._header_factory:
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from datetime import timedelta
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class LroOptions:
|
|
6
|
+
"""LroOptions is the options for the Long Running Operations.
|
|
7
|
+
DO NOT USE THIS OPTION. This option is still under development
|
|
8
|
+
and can be updated in the future without notice.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
def __init__(self, *, timeout: Optional[timedelta] = None):
|
|
12
|
+
"""
|
|
13
|
+
Args:
|
|
14
|
+
timeout: The timeout for the Long Running Operations.
|
|
15
|
+
If not set, the default timeout is 20 minutes.
|
|
16
|
+
"""
|
|
17
|
+
self.timeout = timeout or timedelta(minutes=20)
|
|
File without changes
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
class FieldMask(object):
|
|
2
|
+
"""Class for FieldMask message type."""
|
|
3
|
+
|
|
4
|
+
# This is based on the base implementation from protobuf.
|
|
5
|
+
# https://pigweed.googlesource.com/third_party/github/protocolbuffers/protobuf/+/HEAD/python/google/protobuf/internal/field_mask.py
|
|
6
|
+
# The original implementation only works with proto generated classes.
|
|
7
|
+
# Since our classes are not generated from proto files, we need to implement it manually.
|
|
8
|
+
|
|
9
|
+
def __init__(self, field_mask=None):
|
|
10
|
+
"""Initializes the FieldMask."""
|
|
11
|
+
if field_mask:
|
|
12
|
+
self.paths = field_mask
|
|
13
|
+
|
|
14
|
+
def ToJsonString(self) -> str:
|
|
15
|
+
"""Converts FieldMask to string."""
|
|
16
|
+
return ",".join(self.paths)
|
|
17
|
+
|
|
18
|
+
def FromJsonString(self, value: str) -> None:
|
|
19
|
+
"""Converts string to FieldMask."""
|
|
20
|
+
if not isinstance(value, str):
|
|
21
|
+
raise ValueError("FieldMask JSON value not a string: {!r}".format(value))
|
|
22
|
+
if value:
|
|
23
|
+
self.paths = value.split(",")
|
|
24
|
+
else:
|
|
25
|
+
self.paths = []
|
|
26
|
+
|
|
27
|
+
def __eq__(self, other) -> bool:
|
|
28
|
+
"""Check equality based on paths."""
|
|
29
|
+
if not isinstance(other, FieldMask):
|
|
30
|
+
return False
|
|
31
|
+
return self.paths == other.paths
|
|
32
|
+
|
|
33
|
+
def __hash__(self) -> int:
|
|
34
|
+
"""Hash based on paths tuple."""
|
|
35
|
+
return hash(tuple(self.paths))
|
|
36
|
+
|
|
37
|
+
def __repr__(self) -> str:
|
|
38
|
+
"""String representation for debugging."""
|
|
39
|
+
return f"FieldMask(paths={self.paths})"
|
databricks/sdk/config.py
CHANGED
|
@@ -6,7 +6,7 @@ import os
|
|
|
6
6
|
import pathlib
|
|
7
7
|
import sys
|
|
8
8
|
import urllib.parse
|
|
9
|
-
from typing import Dict, Iterable, Optional
|
|
9
|
+
from typing import Dict, Iterable, List, Optional
|
|
10
10
|
|
|
11
11
|
import requests
|
|
12
12
|
|
|
@@ -110,18 +110,27 @@ class Config:
|
|
|
110
110
|
|
|
111
111
|
disable_async_token_refresh: bool = ConfigAttribute(env="DATABRICKS_DISABLE_ASYNC_TOKEN_REFRESH")
|
|
112
112
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
113
|
+
disable_experimental_files_api_client: bool = ConfigAttribute(
|
|
114
|
+
env="DATABRICKS_DISABLE_EXPERIMENTAL_FILES_API_CLIENT"
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
files_ext_client_download_streaming_chunk_size: int = 2 * 1024 * 1024 # 2 MiB
|
|
118
|
+
|
|
119
|
+
# When downloading a file, the maximum number of attempts to retry downloading the whole file. Default is no limit.
|
|
120
|
+
files_ext_client_download_max_total_recovers: Optional[int] = None
|
|
116
121
|
|
|
117
|
-
#
|
|
122
|
+
# When downloading a file, the maximum number of attempts to retry downloading from the same offset without progressing.
|
|
123
|
+
# This is to avoid infinite retrying when the download is not making any progress. Default is 1.
|
|
124
|
+
files_ext_client_download_max_total_recovers_without_progressing = 1
|
|
125
|
+
|
|
126
|
+
# File multipart upload/download parameters
|
|
118
127
|
# ----------------------
|
|
119
128
|
|
|
120
129
|
# Minimal input stream size (bytes) to use multipart / resumable uploads.
|
|
121
130
|
# For small files it's more efficient to make one single-shot upload request.
|
|
122
131
|
# When uploading a file, SDK will initially buffer this many bytes from input stream.
|
|
123
132
|
# This parameter can be less or bigger than multipart_upload_chunk_size.
|
|
124
|
-
|
|
133
|
+
files_ext_multipart_upload_min_stream_size: int = 50 * 1024 * 1024
|
|
125
134
|
|
|
126
135
|
# Maximum number of presigned URLs that can be requested at a time.
|
|
127
136
|
#
|
|
@@ -131,23 +140,59 @@ class Config:
|
|
|
131
140
|
# the stream back. In case of a non-seekable stream we cannot rewind, so we'll abort
|
|
132
141
|
# the upload. To reduce the chance of this, we're requesting presigned URLs one by one
|
|
133
142
|
# and using them immediately.
|
|
134
|
-
|
|
143
|
+
files_ext_multipart_upload_batch_url_count: int = 1
|
|
135
144
|
|
|
136
|
-
# Size of the chunk to use for multipart uploads.
|
|
145
|
+
# Size of the chunk to use for multipart uploads & downloads.
|
|
137
146
|
#
|
|
138
147
|
# The smaller chunk is, the less chance for network errors (or URL get expired),
|
|
139
148
|
# but the more requests we'll make.
|
|
140
149
|
# For AWS, minimum is 5Mb: https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html
|
|
141
150
|
# For GCP, minimum is 256 KiB (and also recommended multiple is 256 KiB)
|
|
142
151
|
# boto uses 8Mb: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/customizations/s3.html#boto3.s3.transfer.TransferConfig
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
#
|
|
146
|
-
|
|
152
|
+
files_ext_multipart_upload_default_part_size: int = 10 * 1024 * 1024 # 10 MiB
|
|
153
|
+
|
|
154
|
+
# List of multipart upload part sizes that can be automatically selected
|
|
155
|
+
files_ext_multipart_upload_part_size_options: List[int] = [
|
|
156
|
+
10 * 1024 * 1024, # 10 MiB
|
|
157
|
+
20 * 1024 * 1024, # 20 MiB
|
|
158
|
+
50 * 1024 * 1024, # 50 MiB
|
|
159
|
+
100 * 1024 * 1024, # 100 MiB
|
|
160
|
+
200 * 1024 * 1024, # 200 MiB
|
|
161
|
+
500 * 1024 * 1024, # 500 MiB
|
|
162
|
+
1 * 1024 * 1024 * 1024, # 1 GiB
|
|
163
|
+
2 * 1024 * 1024 * 1024, # 2 GiB
|
|
164
|
+
4 * 1024 * 1024 * 1024, # 4 GiB
|
|
165
|
+
]
|
|
166
|
+
|
|
167
|
+
# Maximum size of a single part in multipart upload.
|
|
168
|
+
# For AWS, maximum is 5 GiB: https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html
|
|
169
|
+
# For Azure, maximum is 4 GiB: https://learn.microsoft.com/en-us/rest/api/storageservices/put-block
|
|
170
|
+
# For CloudFlare R2, maximum is 5 GiB: https://developers.cloudflare.com/r2/objects/multipart-objects/
|
|
171
|
+
files_ext_multipart_upload_max_part_size: int = 4 * 1024 * 1024 * 1024 # 4 GiB
|
|
172
|
+
|
|
173
|
+
# Default parallel multipart upload concurrency. Set to 10 because of the experiment results show that it
|
|
174
|
+
# gives good performance result.
|
|
175
|
+
files_ext_multipart_upload_default_parallelism: int = 10
|
|
176
|
+
|
|
177
|
+
# The expiration duration for presigned URLs used in multipart uploads and downloads.
|
|
178
|
+
# The client will request new presigned URLs if the previous one is expired. The duration should be long enough
|
|
179
|
+
# to complete the upload or download of a single part.
|
|
180
|
+
files_ext_multipart_upload_url_expiration_duration: datetime.timedelta = datetime.timedelta(hours=1)
|
|
181
|
+
files_ext_presigned_download_url_expiration_duration: datetime.timedelta = datetime.timedelta(hours=1)
|
|
182
|
+
|
|
183
|
+
# When downloading a file in parallel, how many worker threads to use.
|
|
184
|
+
files_ext_parallel_download_default_parallelism: int = 10
|
|
185
|
+
|
|
186
|
+
# When downloading a file, if the file size is smaller than this threshold,
|
|
187
|
+
# We'll use a single-threaded download even if the parallel download is enabled.
|
|
188
|
+
files_ext_parallel_download_min_file_size: int = 50 * 1024 * 1024 # 50 MiB
|
|
189
|
+
|
|
190
|
+
# Default chunk size to use when downloading a file in parallel. Not effective for single threaded download.
|
|
191
|
+
files_ext_parallel_download_default_part_size: int = 10 * 1024 * 1024 # 10 MiB
|
|
147
192
|
|
|
148
193
|
# This is not a "wall time" cutoff for the whole upload request,
|
|
149
194
|
# but a maximum time between consecutive data reception events (even 1 byte) from the server
|
|
150
|
-
|
|
195
|
+
files_ext_network_transfer_inactivity_timeout_seconds: float = 60
|
|
151
196
|
|
|
152
197
|
# Cap on the number of custom retries during incremental uploads:
|
|
153
198
|
# 1) multipart: upload part URL is expired, so new upload URLs must be requested to continue upload
|
|
@@ -155,7 +200,10 @@ class Config:
|
|
|
155
200
|
# retrieved to continue the upload.
|
|
156
201
|
# In these two cases standard SDK retries (which are capped by the `retry_timeout_seconds` option) are not used.
|
|
157
202
|
# Note that retry counter is reset when upload is successfully resumed.
|
|
158
|
-
|
|
203
|
+
files_ext_multipart_upload_max_retries = 3
|
|
204
|
+
|
|
205
|
+
# Cap on the number of custom retries during parallel downloads.
|
|
206
|
+
files_ext_parallel_download_max_retries = 3
|
|
159
207
|
|
|
160
208
|
def __init__(
|
|
161
209
|
self,
|
|
@@ -12,7 +12,7 @@ import sys
|
|
|
12
12
|
import threading
|
|
13
13
|
import time
|
|
14
14
|
from datetime import datetime
|
|
15
|
-
from typing import Callable, Dict, List, Optional, Tuple, Union
|
|
15
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
|
16
16
|
|
|
17
17
|
import google.auth # type: ignore
|
|
18
18
|
import requests
|
|
@@ -89,7 +89,6 @@ def credentials_strategy(name: str, require: List[str]):
|
|
|
89
89
|
@functools.wraps(func)
|
|
90
90
|
def wrapper(cfg: "Config") -> Optional[CredentialsProvider]:
|
|
91
91
|
for attr in require:
|
|
92
|
-
getattr(cfg, attr)
|
|
93
92
|
if not getattr(cfg, attr):
|
|
94
93
|
return None
|
|
95
94
|
return func(cfg)
|
|
@@ -103,7 +102,12 @@ def credentials_strategy(name: str, require: List[str]):
|
|
|
103
102
|
def oauth_credentials_strategy(name: str, require: List[str]):
|
|
104
103
|
"""Given the function that receives a Config and returns an OauthHeaderFactory,
|
|
105
104
|
create an OauthCredentialsProvider with a given name and required configuration
|
|
106
|
-
attribute names to be present for this function to be called.
|
|
105
|
+
attribute names to be present for this function to be called.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
name: The name of the authentication strategy
|
|
109
|
+
require: List of config attributes that must be present
|
|
110
|
+
"""
|
|
107
111
|
|
|
108
112
|
def inner(
|
|
109
113
|
func: Callable[["Config"], OAuthCredentialsProvider],
|
|
@@ -356,33 +360,47 @@ def oidc_credentials_provider(cfg, id_token_source: oidc.IdTokenSource) -> Optio
|
|
|
356
360
|
return OAuthCredentialsProvider(refreshed_headers, token)
|
|
357
361
|
|
|
358
362
|
|
|
359
|
-
|
|
360
|
-
|
|
363
|
+
def _oidc_credentials_provider(
|
|
364
|
+
cfg: "Config", supplier_factory: Callable[[], Any], provider_name: str
|
|
365
|
+
) -> Optional[CredentialsProvider]:
|
|
361
366
|
"""
|
|
362
|
-
|
|
363
|
-
|
|
367
|
+
Generic OIDC credentials provider that works with any OIDC token supplier.
|
|
368
|
+
|
|
369
|
+
Args:
|
|
370
|
+
cfg: Databricks configuration
|
|
371
|
+
supplier_factory: Callable that returns an OIDC token supplier instance
|
|
372
|
+
provider_name: Human-readable name (e.g., "GitHub OIDC", "Azure DevOps OIDC")
|
|
364
373
|
|
|
365
|
-
|
|
366
|
-
|
|
374
|
+
Returns:
|
|
375
|
+
OAuthCredentialsProvider if successful, None if supplier unavailable or token retrieval fails
|
|
367
376
|
"""
|
|
368
|
-
|
|
377
|
+
# Try to create the supplier
|
|
378
|
+
try:
|
|
379
|
+
supplier = supplier_factory()
|
|
380
|
+
except Exception as e:
|
|
381
|
+
logger.debug(f"{provider_name}: {str(e)}")
|
|
382
|
+
return None
|
|
369
383
|
|
|
384
|
+
# Determine the audience for token exchange
|
|
370
385
|
audience = cfg.token_audience
|
|
371
386
|
if audience is None and cfg.is_account_client:
|
|
372
387
|
audience = cfg.account_id
|
|
373
388
|
if audience is None and not cfg.is_account_client:
|
|
374
389
|
audience = cfg.oidc_endpoints.token_endpoint
|
|
375
390
|
|
|
376
|
-
# Try to get an
|
|
391
|
+
# Try to get an OIDC token. If no supplier returns a token, we cannot use this authentication mode.
|
|
377
392
|
id_token = supplier.get_oidc_token(audience)
|
|
378
393
|
if not id_token:
|
|
394
|
+
logger.debug(f"{provider_name}: no token available, skipping authentication method")
|
|
379
395
|
return None
|
|
380
396
|
|
|
397
|
+
logger.info(f"Configured {provider_name} authentication")
|
|
398
|
+
|
|
381
399
|
def token_source_for(audience: str) -> oauth.TokenSource:
|
|
382
400
|
id_token = supplier.get_oidc_token(audience)
|
|
383
401
|
if not id_token:
|
|
384
402
|
# Should not happen, since we checked it above.
|
|
385
|
-
raise Exception("Cannot get
|
|
403
|
+
raise Exception(f"Cannot get {provider_name} token")
|
|
386
404
|
|
|
387
405
|
return oauth.ClientCredentials(
|
|
388
406
|
client_id=cfg.client_id,
|
|
@@ -408,6 +426,36 @@ def github_oidc(cfg: "Config") -> Optional[CredentialsProvider]:
|
|
|
408
426
|
return OAuthCredentialsProvider(refreshed_headers, token)
|
|
409
427
|
|
|
410
428
|
|
|
429
|
+
@oauth_credentials_strategy("github-oidc", ["host", "client_id"])
|
|
430
|
+
def github_oidc(cfg: "Config") -> Optional[CredentialsProvider]:
|
|
431
|
+
"""
|
|
432
|
+
GitHub OIDC authentication uses a Token Supplier to get a JWT Token and exchanges
|
|
433
|
+
it for a Databricks Token.
|
|
434
|
+
|
|
435
|
+
Supported in GitHub Actions with OIDC service connections.
|
|
436
|
+
"""
|
|
437
|
+
return _oidc_credentials_provider(
|
|
438
|
+
cfg=cfg,
|
|
439
|
+
supplier_factory=lambda: oidc_token_supplier.GitHubOIDCTokenSupplier(),
|
|
440
|
+
provider_name="GitHub OIDC",
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
@oauth_credentials_strategy("azure-devops-oidc", ["host", "client_id"])
|
|
445
|
+
def azure_devops_oidc(cfg: "Config") -> Optional[CredentialsProvider]:
|
|
446
|
+
"""
|
|
447
|
+
Azure DevOps OIDC authentication uses a Token Supplier to get a JWT Token
|
|
448
|
+
and exchanges it for a Databricks Token.
|
|
449
|
+
|
|
450
|
+
Supported in Azure DevOps pipelines with OIDC service connections.
|
|
451
|
+
"""
|
|
452
|
+
return _oidc_credentials_provider(
|
|
453
|
+
cfg=cfg,
|
|
454
|
+
supplier_factory=lambda: oidc_token_supplier.AzureDevOpsOIDCTokenSupplier(),
|
|
455
|
+
provider_name="Azure DevOps OIDC",
|
|
456
|
+
)
|
|
457
|
+
|
|
458
|
+
|
|
411
459
|
@oauth_credentials_strategy("github-oidc-azure", ["host", "azure_client_id"])
|
|
412
460
|
def github_oidc_azure(cfg: "Config") -> Optional[CredentialsProvider]:
|
|
413
461
|
if "ACTIONS_ID_TOKEN_REQUEST_TOKEN" not in os.environ:
|
|
@@ -1019,6 +1067,7 @@ class DefaultCredentials:
|
|
|
1019
1067
|
azure_service_principal,
|
|
1020
1068
|
github_oidc_azure,
|
|
1021
1069
|
azure_cli,
|
|
1070
|
+
azure_devops_oidc,
|
|
1022
1071
|
external_browser,
|
|
1023
1072
|
databricks_cli,
|
|
1024
1073
|
runtime_native_auth,
|
databricks/sdk/dbutils.py
CHANGED
|
@@ -210,7 +210,11 @@ class _JobsUtil:
|
|
|
210
210
|
class RemoteDbUtils:
|
|
211
211
|
|
|
212
212
|
def __init__(self, config: "Config" = None):
|
|
213
|
-
|
|
213
|
+
# Create a shallow copy of the config to allow the use of a custom
|
|
214
|
+
# user-agent while avoiding modifying the original config.
|
|
215
|
+
self._config = Config() if not config else config.copy()
|
|
216
|
+
self._config.with_user_agent_extra("dbutils", "remote")
|
|
217
|
+
|
|
214
218
|
self._client = ApiClient(self._config)
|
|
215
219
|
self._clusters = compute_ext.ClustersExt(self._client)
|
|
216
220
|
self._commands = compute.CommandExecutionAPI(self._client)
|
databricks/sdk/errors/parser.py
CHANGED
|
@@ -31,12 +31,15 @@ _error_customizers = [
|
|
|
31
31
|
]
|
|
32
32
|
|
|
33
33
|
|
|
34
|
-
def _unknown_error(response: requests.Response) -> str:
|
|
34
|
+
def _unknown_error(response: requests.Response, debug_headers: bool = False) -> str:
|
|
35
35
|
"""A standard error message that can be shown when an API response cannot be parsed.
|
|
36
36
|
|
|
37
37
|
This error message includes a link to the issue tracker for the SDK for users to report the issue to us.
|
|
38
|
+
|
|
39
|
+
:param response: The response object from the API request.
|
|
40
|
+
:param debug_headers: Whether to include headers in the request log. Defaults to False to defensively handle cases where request headers might contain sensitive data (e.g. tokens).
|
|
38
41
|
"""
|
|
39
|
-
request_log = RoundTrip(response, debug_headers=
|
|
42
|
+
request_log = RoundTrip(response, debug_headers=debug_headers, debug_truncate_bytes=10 * 1024).generate()
|
|
40
43
|
return (
|
|
41
44
|
"This is likely a bug in the Databricks SDK for Python or the underlying "
|
|
42
45
|
"API. Please report this issue with the following debugging information to the SDK issue tracker at "
|
|
@@ -56,11 +59,13 @@ class _Parser:
|
|
|
56
59
|
self,
|
|
57
60
|
extra_error_parsers: List[_ErrorDeserializer] = [],
|
|
58
61
|
extra_error_customizers: List[_ErrorCustomizer] = [],
|
|
62
|
+
debug_headers: bool = False,
|
|
59
63
|
):
|
|
60
64
|
self._error_parsers = _error_deserializers + (extra_error_parsers if extra_error_parsers is not None else [])
|
|
61
65
|
self._error_customizers = _error_customizers + (
|
|
62
66
|
extra_error_customizers if extra_error_customizers is not None else []
|
|
63
67
|
)
|
|
68
|
+
self._debug_headers = debug_headers
|
|
64
69
|
|
|
65
70
|
def get_api_error(self, response: requests.Response) -> Optional[DatabricksError]:
|
|
66
71
|
"""
|
|
@@ -84,7 +89,7 @@ class _Parser:
|
|
|
84
89
|
)
|
|
85
90
|
return _error_mapper(
|
|
86
91
|
response,
|
|
87
|
-
{"message": "unable to parse response. " + _unknown_error(response)},
|
|
92
|
+
{"message": "unable to parse response. " + _unknown_error(response, self._debug_headers)},
|
|
88
93
|
)
|
|
89
94
|
|
|
90
95
|
# Private link failures happen via a redirect to the login page. From a requests-perspective, the request
|