databricks-sdk 0.17.0__py3-none-any.whl → 0.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of databricks-sdk might be problematic. Click here for more details.
- databricks/sdk/__init__.py +41 -5
- databricks/sdk/azure.py +17 -7
- databricks/sdk/clock.py +49 -0
- databricks/sdk/config.py +459 -0
- databricks/sdk/core.py +7 -1026
- databricks/sdk/credentials_provider.py +628 -0
- databricks/sdk/environments.py +72 -0
- databricks/sdk/errors/__init__.py +1 -1
- databricks/sdk/errors/mapper.py +5 -5
- databricks/sdk/mixins/workspace.py +3 -3
- databricks/sdk/oauth.py +2 -1
- databricks/sdk/retries.py +9 -5
- databricks/sdk/service/_internal.py +1 -1
- databricks/sdk/service/catalog.py +946 -82
- databricks/sdk/service/compute.py +106 -41
- databricks/sdk/service/files.py +145 -31
- databricks/sdk/service/iam.py +44 -40
- databricks/sdk/service/jobs.py +199 -20
- databricks/sdk/service/ml.py +33 -42
- databricks/sdk/service/oauth2.py +3 -4
- databricks/sdk/service/pipelines.py +51 -31
- databricks/sdk/service/serving.py +1 -2
- databricks/sdk/service/settings.py +377 -72
- databricks/sdk/service/sharing.py +3 -4
- databricks/sdk/service/sql.py +27 -19
- databricks/sdk/service/vectorsearch.py +13 -17
- databricks/sdk/service/workspace.py +20 -11
- databricks/sdk/version.py +1 -1
- {databricks_sdk-0.17.0.dist-info → databricks_sdk-0.19.0.dist-info}/METADATA +4 -4
- databricks_sdk-0.19.0.dist-info/RECORD +53 -0
- databricks_sdk-0.17.0.dist-info/RECORD +0 -49
- /databricks/sdk/errors/{mapping.py → platform.py} +0 -0
- {databricks_sdk-0.17.0.dist-info → databricks_sdk-0.19.0.dist-info}/LICENSE +0 -0
- {databricks_sdk-0.17.0.dist-info → databricks_sdk-0.19.0.dist-info}/NOTICE +0 -0
- {databricks_sdk-0.17.0.dist-info → databricks_sdk-0.19.0.dist-info}/WHEEL +0 -0
- {databricks_sdk-0.17.0.dist-info → databricks_sdk-0.19.0.dist-info}/top_level.txt +0 -0
databricks/sdk/core.py
CHANGED
|
@@ -1,1042 +1,22 @@
|
|
|
1
|
-
import abc
|
|
2
|
-
import base64
|
|
3
|
-
import configparser
|
|
4
|
-
import copy
|
|
5
|
-
import functools
|
|
6
|
-
import io
|
|
7
|
-
import json
|
|
8
|
-
import logging
|
|
9
|
-
import os
|
|
10
|
-
import pathlib
|
|
11
|
-
import platform
|
|
12
1
|
import re
|
|
13
|
-
import subprocess
|
|
14
|
-
import sys
|
|
15
2
|
import urllib.parse
|
|
16
|
-
from datetime import
|
|
3
|
+
from datetime import timedelta
|
|
17
4
|
from json import JSONDecodeError
|
|
18
5
|
from types import TracebackType
|
|
19
|
-
from typing import
|
|
20
|
-
Optional, Type, Union)
|
|
6
|
+
from typing import Any, BinaryIO, Iterator, Type
|
|
21
7
|
|
|
22
|
-
import google.auth
|
|
23
|
-
import requests
|
|
24
|
-
from google.auth import impersonated_credentials
|
|
25
|
-
from google.auth.transport.requests import Request
|
|
26
|
-
from google.oauth2 import service_account
|
|
27
8
|
from requests.adapters import HTTPAdapter
|
|
28
9
|
|
|
29
|
-
from .
|
|
30
|
-
|
|
10
|
+
from .config import *
|
|
11
|
+
# To preserve backwards compatibility (as these definitions were previously in this module)
|
|
12
|
+
from .credentials_provider import *
|
|
31
13
|
from .errors import DatabricksError, error_mapper
|
|
32
|
-
from .oauth import (ClientCredentials, OAuthClient, OidcEndpoints, Refreshable,
|
|
33
|
-
Token, TokenCache, TokenSource)
|
|
34
14
|
from .retries import retried
|
|
35
|
-
from .version import __version__
|
|
36
15
|
|
|
37
16
|
__all__ = ['Config', 'DatabricksError']
|
|
38
17
|
|
|
39
18
|
logger = logging.getLogger('databricks.sdk')
|
|
40
19
|
|
|
41
|
-
HeaderFactory = Callable[[], Dict[str, str]]
|
|
42
|
-
|
|
43
|
-
GcpScopes = ["https://www.googleapis.com/auth/cloud-platform", "https://www.googleapis.com/auth/compute"]
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
class CredentialsProvider(abc.ABC):
|
|
47
|
-
""" CredentialsProvider is the protocol (call-side interface)
|
|
48
|
-
for authenticating requests to Databricks REST APIs"""
|
|
49
|
-
|
|
50
|
-
@abc.abstractmethod
|
|
51
|
-
def auth_type(self) -> str:
|
|
52
|
-
...
|
|
53
|
-
|
|
54
|
-
@abc.abstractmethod
|
|
55
|
-
def __call__(self, cfg: 'Config') -> HeaderFactory:
|
|
56
|
-
...
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
def credentials_provider(name: str, require: List[str]):
|
|
60
|
-
""" Given the function that receives a Config and returns RequestVisitor,
|
|
61
|
-
create CredentialsProvider with a given name and required configuration
|
|
62
|
-
attribute names to be present for this function to be called. """
|
|
63
|
-
|
|
64
|
-
def inner(func: Callable[['Config'], HeaderFactory]) -> CredentialsProvider:
|
|
65
|
-
|
|
66
|
-
@functools.wraps(func)
|
|
67
|
-
def wrapper(cfg: 'Config') -> Optional[HeaderFactory]:
|
|
68
|
-
for attr in require:
|
|
69
|
-
if not getattr(cfg, attr):
|
|
70
|
-
return None
|
|
71
|
-
return func(cfg)
|
|
72
|
-
|
|
73
|
-
wrapper.auth_type = lambda: name
|
|
74
|
-
return wrapper
|
|
75
|
-
|
|
76
|
-
return inner
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
@credentials_provider('basic', ['host', 'username', 'password'])
|
|
80
|
-
def basic_auth(cfg: 'Config') -> HeaderFactory:
|
|
81
|
-
""" Given username and password, add base64-encoded Basic credentials """
|
|
82
|
-
encoded = base64.b64encode(f'{cfg.username}:{cfg.password}'.encode()).decode()
|
|
83
|
-
static_credentials = {'Authorization': f'Basic {encoded}'}
|
|
84
|
-
|
|
85
|
-
def inner() -> Dict[str, str]:
|
|
86
|
-
return static_credentials
|
|
87
|
-
|
|
88
|
-
return inner
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
@credentials_provider('pat', ['host', 'token'])
|
|
92
|
-
def pat_auth(cfg: 'Config') -> HeaderFactory:
|
|
93
|
-
""" Adds Databricks Personal Access Token to every request """
|
|
94
|
-
static_credentials = {'Authorization': f'Bearer {cfg.token}'}
|
|
95
|
-
|
|
96
|
-
def inner() -> Dict[str, str]:
|
|
97
|
-
return static_credentials
|
|
98
|
-
|
|
99
|
-
return inner
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
@credentials_provider('runtime', [])
|
|
103
|
-
def runtime_native_auth(cfg: 'Config') -> Optional[HeaderFactory]:
|
|
104
|
-
if 'DATABRICKS_RUNTIME_VERSION' not in os.environ:
|
|
105
|
-
return None
|
|
106
|
-
|
|
107
|
-
# This import MUST be after the "DATABRICKS_RUNTIME_VERSION" check
|
|
108
|
-
# above, so that we are not throwing import errors when not in
|
|
109
|
-
# runtime and no config variables are set.
|
|
110
|
-
from databricks.sdk.runtime import (init_runtime_legacy_auth,
|
|
111
|
-
init_runtime_native_auth,
|
|
112
|
-
init_runtime_repl_auth)
|
|
113
|
-
for init in [init_runtime_native_auth, init_runtime_repl_auth, init_runtime_legacy_auth]:
|
|
114
|
-
if init is None:
|
|
115
|
-
continue
|
|
116
|
-
host, inner = init()
|
|
117
|
-
if host is None:
|
|
118
|
-
logger.debug(f'[{init.__name__}] no host detected')
|
|
119
|
-
continue
|
|
120
|
-
cfg.host = host
|
|
121
|
-
logger.debug(f'[{init.__name__}] runtime native auth configured')
|
|
122
|
-
return inner
|
|
123
|
-
return None
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
@credentials_provider('oauth-m2m', ['is_aws', 'host', 'client_id', 'client_secret'])
|
|
127
|
-
def oauth_service_principal(cfg: 'Config') -> Optional[HeaderFactory]:
|
|
128
|
-
""" Adds refreshed Databricks machine-to-machine OAuth Bearer token to every request,
|
|
129
|
-
if /oidc/.well-known/oauth-authorization-server is available on the given host. """
|
|
130
|
-
# TODO: Azure returns 404 for UC workspace after redirecting to
|
|
131
|
-
# https://login.microsoftonline.com/{cfg.azure_tenant_id}/.well-known/oauth-authorization-server
|
|
132
|
-
oidc = cfg.oidc_endpoints
|
|
133
|
-
if oidc is None:
|
|
134
|
-
return None
|
|
135
|
-
token_source = ClientCredentials(client_id=cfg.client_id,
|
|
136
|
-
client_secret=cfg.client_secret,
|
|
137
|
-
token_url=oidc.token_endpoint,
|
|
138
|
-
scopes=["all-apis"],
|
|
139
|
-
use_header=True)
|
|
140
|
-
|
|
141
|
-
def inner() -> Dict[str, str]:
|
|
142
|
-
token = token_source.token()
|
|
143
|
-
return {'Authorization': f'{token.token_type} {token.access_token}'}
|
|
144
|
-
|
|
145
|
-
return inner
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
@credentials_provider('external-browser', ['host', 'auth_type'])
|
|
149
|
-
def external_browser(cfg: 'Config') -> Optional[HeaderFactory]:
|
|
150
|
-
if cfg.auth_type != 'external-browser':
|
|
151
|
-
return None
|
|
152
|
-
if cfg.client_id:
|
|
153
|
-
client_id = cfg.client_id
|
|
154
|
-
elif cfg.is_aws:
|
|
155
|
-
client_id = 'databricks-cli'
|
|
156
|
-
elif cfg.is_azure:
|
|
157
|
-
# Use Azure AD app for cases when Azure CLI is not available on the machine.
|
|
158
|
-
# App has to be registered as Single-page multi-tenant to support PKCE
|
|
159
|
-
# TODO: temporary app ID, change it later.
|
|
160
|
-
client_id = '6128a518-99a9-425b-8333-4cc94f04cacd'
|
|
161
|
-
else:
|
|
162
|
-
raise ValueError(f'local browser SSO is not supported')
|
|
163
|
-
oauth_client = OAuthClient(host=cfg.host,
|
|
164
|
-
client_id=client_id,
|
|
165
|
-
redirect_url='http://localhost:8020',
|
|
166
|
-
client_secret=cfg.client_secret)
|
|
167
|
-
|
|
168
|
-
# Load cached credentials from disk if they exist.
|
|
169
|
-
# Note that these are local to the Python SDK and not reused by other SDKs.
|
|
170
|
-
token_cache = TokenCache(oauth_client)
|
|
171
|
-
credentials = token_cache.load()
|
|
172
|
-
if credentials:
|
|
173
|
-
# Force a refresh in case the loaded credentials are expired.
|
|
174
|
-
credentials.token()
|
|
175
|
-
else:
|
|
176
|
-
consent = oauth_client.initiate_consent()
|
|
177
|
-
if not consent:
|
|
178
|
-
return None
|
|
179
|
-
credentials = consent.launch_external_browser()
|
|
180
|
-
token_cache.save(credentials)
|
|
181
|
-
return credentials(cfg)
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
def _ensure_host_present(cfg: 'Config', token_source_for: Callable[[str], TokenSource]):
|
|
185
|
-
""" Resolves Azure Databricks workspace URL from ARM Resource ID """
|
|
186
|
-
if cfg.host:
|
|
187
|
-
return
|
|
188
|
-
if not cfg.azure_workspace_resource_id:
|
|
189
|
-
return
|
|
190
|
-
arm = cfg.arm_environment.resource_manager_endpoint
|
|
191
|
-
token = token_source_for(arm).token()
|
|
192
|
-
resp = requests.get(f"{arm}{cfg.azure_workspace_resource_id}?api-version=2018-04-01",
|
|
193
|
-
headers={"Authorization": f"Bearer {token.access_token}"})
|
|
194
|
-
if not resp.ok:
|
|
195
|
-
raise ValueError(f"Cannot resolve Azure Databricks workspace: {resp.content}")
|
|
196
|
-
cfg.host = f"https://{resp.json()['properties']['workspaceUrl']}"
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
@credentials_provider('azure-client-secret',
|
|
200
|
-
['is_azure', 'azure_client_id', 'azure_client_secret', 'azure_tenant_id'])
|
|
201
|
-
def azure_service_principal(cfg: 'Config') -> HeaderFactory:
|
|
202
|
-
""" Adds refreshed Azure Active Directory (AAD) Service Principal OAuth tokens
|
|
203
|
-
to every request, while automatically resolving different Azure environment endpoints. """
|
|
204
|
-
|
|
205
|
-
def token_source_for(resource: str) -> TokenSource:
|
|
206
|
-
aad_endpoint = cfg.arm_environment.active_directory_endpoint
|
|
207
|
-
return ClientCredentials(client_id=cfg.azure_client_id,
|
|
208
|
-
client_secret=cfg.azure_client_secret,
|
|
209
|
-
token_url=f"{aad_endpoint}{cfg.azure_tenant_id}/oauth2/token",
|
|
210
|
-
endpoint_params={"resource": resource},
|
|
211
|
-
use_params=True)
|
|
212
|
-
|
|
213
|
-
_ensure_host_present(cfg, token_source_for)
|
|
214
|
-
logger.info("Configured AAD token for Service Principal (%s)", cfg.azure_client_id)
|
|
215
|
-
inner = token_source_for(cfg.effective_azure_login_app_id)
|
|
216
|
-
cloud = token_source_for(cfg.arm_environment.service_management_endpoint)
|
|
217
|
-
|
|
218
|
-
def refreshed_headers() -> Dict[str, str]:
|
|
219
|
-
headers = {'Authorization': f"Bearer {inner.token().access_token}", }
|
|
220
|
-
add_workspace_id_header(cfg, headers)
|
|
221
|
-
add_sp_management_token(cloud, headers)
|
|
222
|
-
return headers
|
|
223
|
-
|
|
224
|
-
return refreshed_headers
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
@credentials_provider('github-oidc-azure', ['host', 'azure_client_id'])
|
|
228
|
-
def github_oidc_azure(cfg: 'Config') -> Optional[HeaderFactory]:
|
|
229
|
-
if 'ACTIONS_ID_TOKEN_REQUEST_TOKEN' not in os.environ:
|
|
230
|
-
# not in GitHub actions
|
|
231
|
-
return None
|
|
232
|
-
|
|
233
|
-
# Client ID is the minimal thing we need, as otherwise we get AADSTS700016: Application with
|
|
234
|
-
# identifier 'https://token.actions.githubusercontent.com' was not found in the directory '...'.
|
|
235
|
-
if not cfg.is_azure:
|
|
236
|
-
return None
|
|
237
|
-
|
|
238
|
-
# See https://docs.github.com/en/actions/deployment/security-hardening-your-deployments/configuring-openid-connect-in-cloud-providers
|
|
239
|
-
headers = {'Authorization': f"Bearer {os.environ['ACTIONS_ID_TOKEN_REQUEST_TOKEN']}"}
|
|
240
|
-
endpoint = f"{os.environ['ACTIONS_ID_TOKEN_REQUEST_URL']}&audience=api://AzureADTokenExchange"
|
|
241
|
-
response = requests.get(endpoint, headers=headers)
|
|
242
|
-
if not response.ok:
|
|
243
|
-
return None
|
|
244
|
-
|
|
245
|
-
# get the ID Token with aud=api://AzureADTokenExchange sub=repo:org/repo:environment:name
|
|
246
|
-
response_json = response.json()
|
|
247
|
-
if 'value' not in response_json:
|
|
248
|
-
return None
|
|
249
|
-
|
|
250
|
-
logger.info("Configured AAD token for GitHub Actions OIDC (%s)", cfg.azure_client_id)
|
|
251
|
-
params = {
|
|
252
|
-
'client_assertion_type': 'urn:ietf:params:oauth:client-assertion-type:jwt-bearer',
|
|
253
|
-
'resource': cfg.effective_azure_login_app_id,
|
|
254
|
-
'client_assertion': response_json['value'],
|
|
255
|
-
}
|
|
256
|
-
aad_endpoint = cfg.arm_environment.active_directory_endpoint
|
|
257
|
-
if not cfg.azure_tenant_id:
|
|
258
|
-
# detect Azure AD Tenant ID if it's not specified directly
|
|
259
|
-
token_endpoint = cfg.oidc_endpoints.token_endpoint
|
|
260
|
-
cfg.azure_tenant_id = token_endpoint.replace(aad_endpoint, '').split('/')[0]
|
|
261
|
-
inner = ClientCredentials(client_id=cfg.azure_client_id,
|
|
262
|
-
client_secret="", # we have no (rotatable) secrets in OIDC flow
|
|
263
|
-
token_url=f"{aad_endpoint}{cfg.azure_tenant_id}/oauth2/token",
|
|
264
|
-
endpoint_params=params,
|
|
265
|
-
use_params=True)
|
|
266
|
-
|
|
267
|
-
def refreshed_headers() -> Dict[str, str]:
|
|
268
|
-
token = inner.token()
|
|
269
|
-
return {'Authorization': f'{token.token_type} {token.access_token}'}
|
|
270
|
-
|
|
271
|
-
return refreshed_headers
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
@credentials_provider('google-credentials', ['host', 'google_credentials'])
|
|
275
|
-
def google_credentials(cfg: 'Config') -> Optional[HeaderFactory]:
|
|
276
|
-
if not cfg.is_gcp:
|
|
277
|
-
return None
|
|
278
|
-
# Reads credentials as JSON. Credentials can be either a path to JSON file, or actual JSON string.
|
|
279
|
-
# Obtain the id token by providing the json file path and target audience.
|
|
280
|
-
if (os.path.isfile(cfg.google_credentials)):
|
|
281
|
-
with io.open(cfg.google_credentials, "r", encoding="utf-8") as json_file:
|
|
282
|
-
account_info = json.load(json_file)
|
|
283
|
-
else:
|
|
284
|
-
# If the file doesn't exist, assume that the config is the actual JSON content.
|
|
285
|
-
account_info = json.loads(cfg.google_credentials)
|
|
286
|
-
|
|
287
|
-
credentials = service_account.IDTokenCredentials.from_service_account_info(info=account_info,
|
|
288
|
-
target_audience=cfg.host)
|
|
289
|
-
|
|
290
|
-
request = Request()
|
|
291
|
-
|
|
292
|
-
gcp_credentials = service_account.Credentials.from_service_account_info(info=account_info,
|
|
293
|
-
scopes=GcpScopes)
|
|
294
|
-
|
|
295
|
-
def refreshed_headers() -> Dict[str, str]:
|
|
296
|
-
credentials.refresh(request)
|
|
297
|
-
headers = {'Authorization': f'Bearer {credentials.token}'}
|
|
298
|
-
if cfg.is_account_client:
|
|
299
|
-
gcp_credentials.refresh(request)
|
|
300
|
-
headers["X-Databricks-GCP-SA-Access-Token"] = gcp_credentials.token
|
|
301
|
-
return headers
|
|
302
|
-
|
|
303
|
-
return refreshed_headers
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
@credentials_provider('google-id', ['host', 'google_service_account'])
|
|
307
|
-
def google_id(cfg: 'Config') -> Optional[HeaderFactory]:
|
|
308
|
-
if not cfg.is_gcp:
|
|
309
|
-
return None
|
|
310
|
-
credentials, _project_id = google.auth.default()
|
|
311
|
-
|
|
312
|
-
# Create the impersonated credential.
|
|
313
|
-
target_credentials = impersonated_credentials.Credentials(source_credentials=credentials,
|
|
314
|
-
target_principal=cfg.google_service_account,
|
|
315
|
-
target_scopes=[])
|
|
316
|
-
|
|
317
|
-
# Set the impersonated credential, target audience and token options.
|
|
318
|
-
id_creds = impersonated_credentials.IDTokenCredentials(target_credentials,
|
|
319
|
-
target_audience=cfg.host,
|
|
320
|
-
include_email=True)
|
|
321
|
-
|
|
322
|
-
gcp_impersonated_credentials = impersonated_credentials.Credentials(
|
|
323
|
-
source_credentials=credentials, target_principal=cfg.google_service_account, target_scopes=GcpScopes)
|
|
324
|
-
|
|
325
|
-
request = Request()
|
|
326
|
-
|
|
327
|
-
def refreshed_headers() -> Dict[str, str]:
|
|
328
|
-
id_creds.refresh(request)
|
|
329
|
-
headers = {'Authorization': f'Bearer {id_creds.token}'}
|
|
330
|
-
if cfg.is_account_client:
|
|
331
|
-
gcp_impersonated_credentials.refresh(request)
|
|
332
|
-
headers["X-Databricks-GCP-SA-Access-Token"] = gcp_impersonated_credentials.token
|
|
333
|
-
return headers
|
|
334
|
-
|
|
335
|
-
return refreshed_headers
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
class CliTokenSource(Refreshable):
|
|
339
|
-
|
|
340
|
-
def __init__(self, cmd: List[str], token_type_field: str, access_token_field: str, expiry_field: str):
|
|
341
|
-
super().__init__()
|
|
342
|
-
self._cmd = cmd
|
|
343
|
-
self._token_type_field = token_type_field
|
|
344
|
-
self._access_token_field = access_token_field
|
|
345
|
-
self._expiry_field = expiry_field
|
|
346
|
-
|
|
347
|
-
@staticmethod
|
|
348
|
-
def _parse_expiry(expiry: str) -> datetime:
|
|
349
|
-
expiry = expiry.rstrip("Z").split(".")[0]
|
|
350
|
-
for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S"):
|
|
351
|
-
try:
|
|
352
|
-
return datetime.strptime(expiry, fmt)
|
|
353
|
-
except ValueError as e:
|
|
354
|
-
last_e = e
|
|
355
|
-
if last_e:
|
|
356
|
-
raise last_e
|
|
357
|
-
|
|
358
|
-
def refresh(self) -> Token:
|
|
359
|
-
try:
|
|
360
|
-
is_windows = sys.platform.startswith('win')
|
|
361
|
-
# windows requires shell=True to be able to execute 'az login' or other commands
|
|
362
|
-
# cannot use shell=True all the time, as it breaks macOS
|
|
363
|
-
out = subprocess.run(self._cmd, capture_output=True, check=True, shell=is_windows)
|
|
364
|
-
it = json.loads(out.stdout.decode())
|
|
365
|
-
expires_on = self._parse_expiry(it[self._expiry_field])
|
|
366
|
-
return Token(access_token=it[self._access_token_field],
|
|
367
|
-
token_type=it[self._token_type_field],
|
|
368
|
-
expiry=expires_on)
|
|
369
|
-
except ValueError as e:
|
|
370
|
-
raise ValueError(f"cannot unmarshal CLI result: {e}")
|
|
371
|
-
except subprocess.CalledProcessError as e:
|
|
372
|
-
stdout = e.stdout.decode().strip()
|
|
373
|
-
stderr = e.stderr.decode().strip()
|
|
374
|
-
message = stdout or stderr
|
|
375
|
-
raise IOError(f'cannot get access token: {message}') from e
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
class AzureCliTokenSource(CliTokenSource):
|
|
379
|
-
""" Obtain the token granted by `az login` CLI command """
|
|
380
|
-
|
|
381
|
-
def __init__(self, resource: str, subscription: str = ""):
|
|
382
|
-
cmd = ["az", "account", "get-access-token", "--resource", resource, "--output", "json"]
|
|
383
|
-
if subscription != "":
|
|
384
|
-
cmd.append("--subscription")
|
|
385
|
-
cmd.append(subscription)
|
|
386
|
-
super().__init__(cmd=cmd,
|
|
387
|
-
token_type_field='tokenType',
|
|
388
|
-
access_token_field='accessToken',
|
|
389
|
-
expiry_field='expiresOn')
|
|
390
|
-
|
|
391
|
-
def is_human_user(self) -> bool:
|
|
392
|
-
"""The UPN claim is the username of the user, but not the Service Principal.
|
|
393
|
-
|
|
394
|
-
Azure CLI can be authenticated by both human users (`az login`) and service principals. In case of service
|
|
395
|
-
principals, it can be either OIDC from GitHub or login with a password:
|
|
396
|
-
|
|
397
|
-
~ $ az login --service-principal --user $clientID --password $clientSecret --tenant $tenantID
|
|
398
|
-
|
|
399
|
-
Human users get more claims:
|
|
400
|
-
- 'amr' - how the subject of the token was authenticated
|
|
401
|
-
- 'name', 'family_name', 'given_name' - human-readable values that identifies the subject of the token
|
|
402
|
-
- 'scp' with `user_impersonation` value, that shows the set of scopes exposed by your application for which
|
|
403
|
-
the client application has requested (and received) consent
|
|
404
|
-
- 'unique_name' - a human-readable value that identifies the subject of the token. This value is not
|
|
405
|
-
guaranteed to be unique within a tenant and should be used only for display purposes.
|
|
406
|
-
- 'upn' - The username of the user.
|
|
407
|
-
"""
|
|
408
|
-
return 'upn' in self.token().jwt_claims()
|
|
409
|
-
|
|
410
|
-
@staticmethod
|
|
411
|
-
def for_resource(cfg: 'Config', resource: str) -> 'AzureCliTokenSource':
|
|
412
|
-
subscription = AzureCliTokenSource.get_subscription(cfg)
|
|
413
|
-
if subscription != "":
|
|
414
|
-
token_source = AzureCliTokenSource(resource, subscription)
|
|
415
|
-
try:
|
|
416
|
-
# This will fail if the user has access to the workspace, but not to the subscription
|
|
417
|
-
# itself.
|
|
418
|
-
# In such case, we fall back to not using the subscription.
|
|
419
|
-
token_source.token()
|
|
420
|
-
return token_source
|
|
421
|
-
except OSError:
|
|
422
|
-
logger.warning("Failed to get token for subscription. Using resource only token.")
|
|
423
|
-
|
|
424
|
-
token_source = AzureCliTokenSource(resource)
|
|
425
|
-
token_source.token()
|
|
426
|
-
return token_source
|
|
427
|
-
|
|
428
|
-
@staticmethod
|
|
429
|
-
def get_subscription(cfg: 'Config') -> str:
|
|
430
|
-
resource = cfg.azure_workspace_resource_id
|
|
431
|
-
if resource is None or resource == "":
|
|
432
|
-
return ""
|
|
433
|
-
components = resource.split('/')
|
|
434
|
-
if len(components) < 3:
|
|
435
|
-
logger.warning("Invalid azure workspace resource ID")
|
|
436
|
-
return ""
|
|
437
|
-
return components[2]
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
@credentials_provider('azure-cli', ['is_azure'])
|
|
441
|
-
def azure_cli(cfg: 'Config') -> Optional[HeaderFactory]:
|
|
442
|
-
""" Adds refreshed OAuth token granted by `az login` command to every request. """
|
|
443
|
-
token_source = None
|
|
444
|
-
mgmt_token_source = None
|
|
445
|
-
try:
|
|
446
|
-
token_source = AzureCliTokenSource.for_resource(cfg, cfg.effective_azure_login_app_id)
|
|
447
|
-
except FileNotFoundError:
|
|
448
|
-
doc = 'https://docs.microsoft.com/en-us/cli/azure/?view=azure-cli-latest'
|
|
449
|
-
logger.debug(f'Most likely Azure CLI is not installed. See {doc} for details')
|
|
450
|
-
return None
|
|
451
|
-
if not token_source.is_human_user():
|
|
452
|
-
try:
|
|
453
|
-
management_endpoint = cfg.arm_environment.service_management_endpoint
|
|
454
|
-
mgmt_token_source = AzureCliTokenSource.for_resource(cfg, management_endpoint)
|
|
455
|
-
except Exception as e:
|
|
456
|
-
logger.debug(f'Not including service management token in headers', exc_info=e)
|
|
457
|
-
mgmt_token_source = None
|
|
458
|
-
|
|
459
|
-
_ensure_host_present(cfg, lambda resource: AzureCliTokenSource.for_resource(cfg, resource))
|
|
460
|
-
logger.info("Using Azure CLI authentication with AAD tokens")
|
|
461
|
-
if not cfg.is_account_client and AzureCliTokenSource.get_subscription(cfg) == "":
|
|
462
|
-
logger.warning(
|
|
463
|
-
"azure_workspace_resource_id field not provided. "
|
|
464
|
-
"It is recommended to specify this field in the Databricks configuration to avoid authentication errors."
|
|
465
|
-
)
|
|
466
|
-
|
|
467
|
-
def inner() -> Dict[str, str]:
|
|
468
|
-
token = token_source.token()
|
|
469
|
-
headers = {'Authorization': f'{token.token_type} {token.access_token}'}
|
|
470
|
-
add_workspace_id_header(cfg, headers)
|
|
471
|
-
if mgmt_token_source:
|
|
472
|
-
add_sp_management_token(mgmt_token_source, headers)
|
|
473
|
-
return headers
|
|
474
|
-
|
|
475
|
-
return inner
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
class DatabricksCliTokenSource(CliTokenSource):
|
|
479
|
-
""" Obtain the token granted by `databricks auth login` CLI command """
|
|
480
|
-
|
|
481
|
-
def __init__(self, cfg: 'Config'):
|
|
482
|
-
args = ['auth', 'token', '--host', cfg.host]
|
|
483
|
-
if cfg.is_account_client:
|
|
484
|
-
args += ['--account-id', cfg.account_id]
|
|
485
|
-
|
|
486
|
-
cli_path = cfg.databricks_cli_path
|
|
487
|
-
if not cli_path:
|
|
488
|
-
cli_path = 'databricks'
|
|
489
|
-
|
|
490
|
-
# If the path is unqualified, look it up in PATH.
|
|
491
|
-
if cli_path.count("/") == 0:
|
|
492
|
-
cli_path = self.__class__._find_executable(cli_path)
|
|
493
|
-
|
|
494
|
-
super().__init__(cmd=[cli_path, *args],
|
|
495
|
-
token_type_field='token_type',
|
|
496
|
-
access_token_field='access_token',
|
|
497
|
-
expiry_field='expiry')
|
|
498
|
-
|
|
499
|
-
@staticmethod
|
|
500
|
-
def _find_executable(name) -> str:
|
|
501
|
-
err = FileNotFoundError("Most likely the Databricks CLI is not installed")
|
|
502
|
-
for dir in os.getenv("PATH", default="").split(os.path.pathsep):
|
|
503
|
-
path = pathlib.Path(dir).joinpath(name).resolve()
|
|
504
|
-
if not path.is_file():
|
|
505
|
-
continue
|
|
506
|
-
|
|
507
|
-
# The new Databricks CLI is a single binary with size > 1MB.
|
|
508
|
-
# We use the size as a signal to determine which Databricks CLI is installed.
|
|
509
|
-
stat = path.stat()
|
|
510
|
-
if stat.st_size < (1024 * 1024):
|
|
511
|
-
err = FileNotFoundError("Databricks CLI version <0.100.0 detected")
|
|
512
|
-
continue
|
|
513
|
-
|
|
514
|
-
return str(path)
|
|
515
|
-
|
|
516
|
-
raise err
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
@credentials_provider('databricks-cli', ['host', 'is_aws'])
|
|
520
|
-
def databricks_cli(cfg: 'Config') -> Optional[HeaderFactory]:
|
|
521
|
-
try:
|
|
522
|
-
token_source = DatabricksCliTokenSource(cfg)
|
|
523
|
-
except FileNotFoundError as e:
|
|
524
|
-
logger.debug(e)
|
|
525
|
-
return None
|
|
526
|
-
|
|
527
|
-
try:
|
|
528
|
-
token_source.token()
|
|
529
|
-
except IOError as e:
|
|
530
|
-
if 'databricks OAuth is not' in str(e):
|
|
531
|
-
logger.debug(f'OAuth not configured or not available: {e}')
|
|
532
|
-
return None
|
|
533
|
-
raise e
|
|
534
|
-
|
|
535
|
-
logger.info("Using Databricks CLI authentication")
|
|
536
|
-
|
|
537
|
-
def inner() -> Dict[str, str]:
|
|
538
|
-
token = token_source.token()
|
|
539
|
-
return {'Authorization': f'{token.token_type} {token.access_token}'}
|
|
540
|
-
|
|
541
|
-
return inner
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
class MetadataServiceTokenSource(Refreshable):
|
|
545
|
-
""" Obtain the token granted by Databricks Metadata Service """
|
|
546
|
-
METADATA_SERVICE_VERSION = "1"
|
|
547
|
-
METADATA_SERVICE_VERSION_HEADER = "X-Databricks-Metadata-Version"
|
|
548
|
-
METADATA_SERVICE_HOST_HEADER = "X-Databricks-Host"
|
|
549
|
-
_metadata_service_timeout = 10 # seconds
|
|
550
|
-
|
|
551
|
-
def __init__(self, cfg: 'Config'):
|
|
552
|
-
super().__init__()
|
|
553
|
-
self.url = cfg.metadata_service_url
|
|
554
|
-
self.host = cfg.host
|
|
555
|
-
|
|
556
|
-
def refresh(self) -> Token:
|
|
557
|
-
resp = requests.get(self.url,
|
|
558
|
-
timeout=self._metadata_service_timeout,
|
|
559
|
-
headers={
|
|
560
|
-
self.METADATA_SERVICE_VERSION_HEADER: self.METADATA_SERVICE_VERSION,
|
|
561
|
-
self.METADATA_SERVICE_HOST_HEADER: self.host
|
|
562
|
-
})
|
|
563
|
-
json_resp: dict[str, Union[str, float]] = resp.json()
|
|
564
|
-
access_token = json_resp.get("access_token", None)
|
|
565
|
-
if access_token is None:
|
|
566
|
-
raise ValueError("Metadata Service returned empty token")
|
|
567
|
-
token_type = json_resp.get("token_type", None)
|
|
568
|
-
if token_type is None:
|
|
569
|
-
raise ValueError("Metadata Service returned empty token type")
|
|
570
|
-
if json_resp["expires_on"] in ["", None]:
|
|
571
|
-
raise ValueError("Metadata Service returned invalid expiry")
|
|
572
|
-
try:
|
|
573
|
-
expiry = datetime.fromtimestamp(json_resp["expires_on"])
|
|
574
|
-
except:
|
|
575
|
-
raise ValueError("Metadata Service returned invalid expiry")
|
|
576
|
-
|
|
577
|
-
return Token(access_token=access_token, token_type=token_type, expiry=expiry)
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
@credentials_provider('metadata-service', ['host', 'metadata_service_url'])
|
|
581
|
-
def metadata_service(cfg: 'Config') -> Optional[HeaderFactory]:
|
|
582
|
-
""" Adds refreshed token granted by Databricks Metadata Service to every request. """
|
|
583
|
-
|
|
584
|
-
token_source = MetadataServiceTokenSource(cfg)
|
|
585
|
-
token_source.token()
|
|
586
|
-
logger.info("Using Databricks Metadata Service authentication")
|
|
587
|
-
|
|
588
|
-
def inner() -> Dict[str, str]:
|
|
589
|
-
token = token_source.token()
|
|
590
|
-
return {'Authorization': f'{token.token_type} {token.access_token}'}
|
|
591
|
-
|
|
592
|
-
return inner
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
class DefaultCredentials:
|
|
596
|
-
""" Select the first applicable credential provider from the chain """
|
|
597
|
-
|
|
598
|
-
def __init__(self) -> None:
|
|
599
|
-
self._auth_type = 'default'
|
|
600
|
-
|
|
601
|
-
def auth_type(self) -> str:
|
|
602
|
-
return self._auth_type
|
|
603
|
-
|
|
604
|
-
def __call__(self, cfg: 'Config') -> HeaderFactory:
|
|
605
|
-
auth_providers = [
|
|
606
|
-
pat_auth, basic_auth, metadata_service, oauth_service_principal, azure_service_principal,
|
|
607
|
-
github_oidc_azure, azure_cli, external_browser, databricks_cli, runtime_native_auth,
|
|
608
|
-
google_credentials, google_id
|
|
609
|
-
]
|
|
610
|
-
for provider in auth_providers:
|
|
611
|
-
auth_type = provider.auth_type()
|
|
612
|
-
if cfg.auth_type and auth_type != cfg.auth_type:
|
|
613
|
-
# ignore other auth types if one is explicitly enforced
|
|
614
|
-
logger.debug(f"Ignoring {auth_type} auth, because {cfg.auth_type} is preferred")
|
|
615
|
-
continue
|
|
616
|
-
logger.debug(f'Attempting to configure auth: {auth_type}')
|
|
617
|
-
try:
|
|
618
|
-
header_factory = provider(cfg)
|
|
619
|
-
if not header_factory:
|
|
620
|
-
continue
|
|
621
|
-
self._auth_type = auth_type
|
|
622
|
-
return header_factory
|
|
623
|
-
except Exception as e:
|
|
624
|
-
raise ValueError(f'{auth_type}: {e}') from e
|
|
625
|
-
auth_flow_url = "https://docs.databricks.com/en/dev-tools/auth.html#databricks-client-unified-authentication"
|
|
626
|
-
raise ValueError(
|
|
627
|
-
f'cannot configure default credentials, please check {auth_flow_url} to configure credentials for your preferred authentication method.'
|
|
628
|
-
)
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
class ConfigAttribute:
|
|
632
|
-
""" Configuration attribute metadata and descriptor protocols. """
|
|
633
|
-
|
|
634
|
-
# name and transform are discovered from Config.__new__
|
|
635
|
-
name: str = None
|
|
636
|
-
transform: type = str
|
|
637
|
-
|
|
638
|
-
def __init__(self, env: str = None, auth: str = None, sensitive: bool = False):
|
|
639
|
-
self.env = env
|
|
640
|
-
self.auth = auth
|
|
641
|
-
self.sensitive = sensitive
|
|
642
|
-
|
|
643
|
-
def __get__(self, cfg: 'Config', owner):
|
|
644
|
-
if not cfg:
|
|
645
|
-
return None
|
|
646
|
-
return cfg._inner.get(self.name, None)
|
|
647
|
-
|
|
648
|
-
def __set__(self, cfg: 'Config', value: any):
|
|
649
|
-
cfg._inner[self.name] = self.transform(value)
|
|
650
|
-
|
|
651
|
-
def __repr__(self) -> str:
|
|
652
|
-
return f"<ConfigAttribute '{self.name}' {self.transform.__name__}>"
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
class Config:
|
|
656
|
-
host: str = ConfigAttribute(env='DATABRICKS_HOST')
|
|
657
|
-
account_id: str = ConfigAttribute(env='DATABRICKS_ACCOUNT_ID')
|
|
658
|
-
token: str = ConfigAttribute(env='DATABRICKS_TOKEN', auth='pat', sensitive=True)
|
|
659
|
-
username: str = ConfigAttribute(env='DATABRICKS_USERNAME', auth='basic')
|
|
660
|
-
password: str = ConfigAttribute(env='DATABRICKS_PASSWORD', auth='basic', sensitive=True)
|
|
661
|
-
client_id: str = ConfigAttribute(env='DATABRICKS_CLIENT_ID', auth='oauth')
|
|
662
|
-
client_secret: str = ConfigAttribute(env='DATABRICKS_CLIENT_SECRET', auth='oauth', sensitive=True)
|
|
663
|
-
profile: str = ConfigAttribute(env='DATABRICKS_CONFIG_PROFILE')
|
|
664
|
-
config_file: str = ConfigAttribute(env='DATABRICKS_CONFIG_FILE')
|
|
665
|
-
google_service_account: str = ConfigAttribute(env='DATABRICKS_GOOGLE_SERVICE_ACCOUNT', auth='google')
|
|
666
|
-
google_credentials: str = ConfigAttribute(env='GOOGLE_CREDENTIALS', auth='google', sensitive=True)
|
|
667
|
-
azure_workspace_resource_id: str = ConfigAttribute(env='DATABRICKS_AZURE_RESOURCE_ID', auth='azure')
|
|
668
|
-
azure_use_msi: bool = ConfigAttribute(env='ARM_USE_MSI', auth='azure')
|
|
669
|
-
azure_client_secret: str = ConfigAttribute(env='ARM_CLIENT_SECRET', auth='azure', sensitive=True)
|
|
670
|
-
azure_client_id: str = ConfigAttribute(env='ARM_CLIENT_ID', auth='azure')
|
|
671
|
-
azure_tenant_id: str = ConfigAttribute(env='ARM_TENANT_ID', auth='azure')
|
|
672
|
-
azure_environment: str = ConfigAttribute(env='ARM_ENVIRONMENT')
|
|
673
|
-
azure_login_app_id: str = ConfigAttribute(env='DATABRICKS_AZURE_LOGIN_APP_ID', auth='azure')
|
|
674
|
-
databricks_cli_path: str = ConfigAttribute(env='DATABRICKS_CLI_PATH')
|
|
675
|
-
auth_type: str = ConfigAttribute(env='DATABRICKS_AUTH_TYPE')
|
|
676
|
-
cluster_id: str = ConfigAttribute(env='DATABRICKS_CLUSTER_ID')
|
|
677
|
-
warehouse_id: str = ConfigAttribute(env='DATABRICKS_WAREHOUSE_ID')
|
|
678
|
-
skip_verify: bool = ConfigAttribute()
|
|
679
|
-
http_timeout_seconds: float = ConfigAttribute()
|
|
680
|
-
debug_truncate_bytes: int = ConfigAttribute(env='DATABRICKS_DEBUG_TRUNCATE_BYTES')
|
|
681
|
-
debug_headers: bool = ConfigAttribute(env='DATABRICKS_DEBUG_HEADERS')
|
|
682
|
-
rate_limit: int = ConfigAttribute(env='DATABRICKS_RATE_LIMIT')
|
|
683
|
-
retry_timeout_seconds: int = ConfigAttribute()
|
|
684
|
-
metadata_service_url = ConfigAttribute(env='DATABRICKS_METADATA_SERVICE_URL',
|
|
685
|
-
auth='metadata-service',
|
|
686
|
-
sensitive=True)
|
|
687
|
-
max_connection_pools: int = ConfigAttribute()
|
|
688
|
-
max_connections_per_pool: int = ConfigAttribute()
|
|
689
|
-
|
|
690
|
-
def __init__(self,
|
|
691
|
-
*,
|
|
692
|
-
credentials_provider: CredentialsProvider = None,
|
|
693
|
-
product="unknown",
|
|
694
|
-
product_version="0.0.0",
|
|
695
|
-
**kwargs):
|
|
696
|
-
self._inner = {}
|
|
697
|
-
self._user_agent_other_info = []
|
|
698
|
-
self._credentials_provider = credentials_provider if credentials_provider else DefaultCredentials()
|
|
699
|
-
try:
|
|
700
|
-
self._set_inner_config(kwargs)
|
|
701
|
-
self._load_from_env()
|
|
702
|
-
self._known_file_config_loader()
|
|
703
|
-
self._fix_host_if_needed()
|
|
704
|
-
self._validate()
|
|
705
|
-
self._init_auth()
|
|
706
|
-
self._product = product
|
|
707
|
-
self._product_version = product_version
|
|
708
|
-
except ValueError as e:
|
|
709
|
-
message = self.wrap_debug_info(str(e))
|
|
710
|
-
raise ValueError(message) from e
|
|
711
|
-
|
|
712
|
-
def wrap_debug_info(self, message: str) -> str:
|
|
713
|
-
debug_string = self.debug_string()
|
|
714
|
-
if debug_string:
|
|
715
|
-
message = f'{message.rstrip(".")}. {debug_string}'
|
|
716
|
-
return message
|
|
717
|
-
|
|
718
|
-
@staticmethod
|
|
719
|
-
def parse_dsn(dsn: str) -> 'Config':
|
|
720
|
-
uri = urllib.parse.urlparse(dsn)
|
|
721
|
-
if uri.scheme != 'databricks':
|
|
722
|
-
raise ValueError(f'Expected databricks:// scheme, got {uri.scheme}://')
|
|
723
|
-
kwargs = {'host': f'https://{uri.hostname}'}
|
|
724
|
-
if uri.username:
|
|
725
|
-
kwargs['username'] = uri.username
|
|
726
|
-
if uri.password:
|
|
727
|
-
kwargs['password'] = uri.password
|
|
728
|
-
query = dict(urllib.parse.parse_qsl(uri.query))
|
|
729
|
-
for attr in Config.attributes():
|
|
730
|
-
if attr.name not in query:
|
|
731
|
-
continue
|
|
732
|
-
kwargs[attr.name] = query[attr.name]
|
|
733
|
-
return Config(**kwargs)
|
|
734
|
-
|
|
735
|
-
def authenticate(self) -> Dict[str, str]:
|
|
736
|
-
""" Returns a list of fresh authentication headers """
|
|
737
|
-
return self._header_factory()
|
|
738
|
-
|
|
739
|
-
def as_dict(self) -> dict:
|
|
740
|
-
return self._inner
|
|
741
|
-
|
|
742
|
-
@property
|
|
743
|
-
def is_azure(self) -> bool:
|
|
744
|
-
has_resource_id = self.azure_workspace_resource_id is not None
|
|
745
|
-
has_host = self.host is not None
|
|
746
|
-
is_public_cloud = has_host and ".azuredatabricks.net" in self.host
|
|
747
|
-
is_china_cloud = has_host and ".databricks.azure.cn" in self.host
|
|
748
|
-
is_gov_cloud = has_host and ".databricks.azure.us" in self.host
|
|
749
|
-
is_valid_cloud = is_public_cloud or is_china_cloud or is_gov_cloud
|
|
750
|
-
return has_resource_id or (has_host and is_valid_cloud)
|
|
751
|
-
|
|
752
|
-
@property
|
|
753
|
-
def is_gcp(self) -> bool:
|
|
754
|
-
return self.host and ".gcp.databricks.com" in self.host
|
|
755
|
-
|
|
756
|
-
@property
|
|
757
|
-
def is_aws(self) -> bool:
|
|
758
|
-
return not self.is_azure and not self.is_gcp
|
|
759
|
-
|
|
760
|
-
@property
|
|
761
|
-
def is_account_client(self) -> bool:
|
|
762
|
-
if not self.host:
|
|
763
|
-
return False
|
|
764
|
-
return self.host.startswith("https://accounts.") or self.host.startswith("https://accounts-dod.")
|
|
765
|
-
|
|
766
|
-
@property
|
|
767
|
-
def arm_environment(self) -> AzureEnvironment:
|
|
768
|
-
env = self.azure_environment if self.azure_environment else "PUBLIC"
|
|
769
|
-
try:
|
|
770
|
-
return ENVIRONMENTS[env]
|
|
771
|
-
except KeyError:
|
|
772
|
-
raise ValueError(f"Cannot find Azure {env} Environment")
|
|
773
|
-
|
|
774
|
-
@property
|
|
775
|
-
def effective_azure_login_app_id(self):
|
|
776
|
-
app_id = self.azure_login_app_id
|
|
777
|
-
if app_id:
|
|
778
|
-
return app_id
|
|
779
|
-
return ARM_DATABRICKS_RESOURCE_ID
|
|
780
|
-
|
|
781
|
-
@property
|
|
782
|
-
def hostname(self) -> str:
|
|
783
|
-
url = urllib.parse.urlparse(self.host)
|
|
784
|
-
return url.netloc
|
|
785
|
-
|
|
786
|
-
@property
|
|
787
|
-
def is_any_auth_configured(self) -> bool:
|
|
788
|
-
for attr in Config.attributes():
|
|
789
|
-
if not attr.auth:
|
|
790
|
-
continue
|
|
791
|
-
value = self._inner.get(attr.name, None)
|
|
792
|
-
if value:
|
|
793
|
-
return True
|
|
794
|
-
return False
|
|
795
|
-
|
|
796
|
-
@property
|
|
797
|
-
def user_agent(self):
|
|
798
|
-
""" Returns User-Agent header used by this SDK """
|
|
799
|
-
py_version = platform.python_version()
|
|
800
|
-
os_name = platform.uname().system.lower()
|
|
801
|
-
|
|
802
|
-
ua = [
|
|
803
|
-
f"{self._product}/{self._product_version}", f"databricks-sdk-py/{__version__}",
|
|
804
|
-
f"python/{py_version}", f"os/{os_name}", f"auth/{self.auth_type}",
|
|
805
|
-
]
|
|
806
|
-
if len(self._user_agent_other_info) > 0:
|
|
807
|
-
ua.append(' '.join(self._user_agent_other_info))
|
|
808
|
-
if len(self._upstream_user_agent) > 0:
|
|
809
|
-
ua.append(self._upstream_user_agent)
|
|
810
|
-
if 'DATABRICKS_RUNTIME_VERSION' in os.environ:
|
|
811
|
-
runtime_version = os.environ['DATABRICKS_RUNTIME_VERSION']
|
|
812
|
-
if runtime_version != '':
|
|
813
|
-
runtime_version = self._sanitize_header_value(runtime_version)
|
|
814
|
-
ua.append(f'runtime/{runtime_version}')
|
|
815
|
-
|
|
816
|
-
return ' '.join(ua)
|
|
817
|
-
|
|
818
|
-
@staticmethod
|
|
819
|
-
def _sanitize_header_value(value: str) -> str:
|
|
820
|
-
value = value.replace(' ', '-')
|
|
821
|
-
value = value.replace('/', '-')
|
|
822
|
-
return value
|
|
823
|
-
|
|
824
|
-
@property
|
|
825
|
-
def _upstream_user_agent(self) -> str:
|
|
826
|
-
product = os.environ.get('DATABRICKS_SDK_UPSTREAM', None)
|
|
827
|
-
product_version = os.environ.get('DATABRICKS_SDK_UPSTREAM_VERSION', None)
|
|
828
|
-
if product is not None and product_version is not None:
|
|
829
|
-
return f"upstream/{product} upstream-version/{product_version}"
|
|
830
|
-
return ""
|
|
831
|
-
|
|
832
|
-
def with_user_agent_extra(self, key: str, value: str) -> 'Config':
|
|
833
|
-
self._user_agent_other_info.append(f"{key}/{value}")
|
|
834
|
-
return self
|
|
835
|
-
|
|
836
|
-
@property
|
|
837
|
-
def oidc_endpoints(self) -> Optional[OidcEndpoints]:
|
|
838
|
-
self._fix_host_if_needed()
|
|
839
|
-
if not self.host:
|
|
840
|
-
return None
|
|
841
|
-
if self.is_azure:
|
|
842
|
-
# Retrieve authorize endpoint to retrieve token endpoint after
|
|
843
|
-
res = requests.get(f'{self.host}/oidc/oauth2/v2.0/authorize', allow_redirects=False)
|
|
844
|
-
real_auth_url = res.headers.get('location')
|
|
845
|
-
if not real_auth_url:
|
|
846
|
-
return None
|
|
847
|
-
return OidcEndpoints(authorization_endpoint=real_auth_url,
|
|
848
|
-
token_endpoint=real_auth_url.replace('/authorize', '/token'))
|
|
849
|
-
if self.is_account_client and self.account_id:
|
|
850
|
-
prefix = f'{self.host}/oidc/accounts/{self.account_id}'
|
|
851
|
-
return OidcEndpoints(authorization_endpoint=f'{prefix}/v1/authorize',
|
|
852
|
-
token_endpoint=f'{prefix}/v1/token')
|
|
853
|
-
oidc = f'{self.host}/oidc/.well-known/oauth-authorization-server'
|
|
854
|
-
res = requests.get(oidc)
|
|
855
|
-
if res.status_code != 200:
|
|
856
|
-
return None
|
|
857
|
-
auth_metadata = res.json()
|
|
858
|
-
return OidcEndpoints(authorization_endpoint=auth_metadata.get('authorization_endpoint'),
|
|
859
|
-
token_endpoint=auth_metadata.get('token_endpoint'))
|
|
860
|
-
|
|
861
|
-
def debug_string(self) -> str:
|
|
862
|
-
""" Returns log-friendly representation of configured attributes """
|
|
863
|
-
buf = []
|
|
864
|
-
attrs_used = []
|
|
865
|
-
envs_used = []
|
|
866
|
-
for attr in Config.attributes():
|
|
867
|
-
if attr.env and os.environ.get(attr.env):
|
|
868
|
-
envs_used.append(attr.env)
|
|
869
|
-
value = getattr(self, attr.name)
|
|
870
|
-
if not value:
|
|
871
|
-
continue
|
|
872
|
-
safe = '***' if attr.sensitive else f'{value}'
|
|
873
|
-
attrs_used.append(f'{attr.name}={safe}')
|
|
874
|
-
if attrs_used:
|
|
875
|
-
buf.append(f'Config: {", ".join(attrs_used)}')
|
|
876
|
-
if envs_used:
|
|
877
|
-
buf.append(f'Env: {", ".join(envs_used)}')
|
|
878
|
-
return '. '.join(buf)
|
|
879
|
-
|
|
880
|
-
def to_dict(self) -> Dict[str, any]:
|
|
881
|
-
return self._inner
|
|
882
|
-
|
|
883
|
-
@property
|
|
884
|
-
def sql_http_path(self) -> Optional[str]:
|
|
885
|
-
"""(Experimental) Return HTTP path for SQL Drivers.
|
|
886
|
-
|
|
887
|
-
If `cluster_id` or `warehouse_id` are configured, return a valid HTTP Path argument
|
|
888
|
-
used in construction of JDBC/ODBC DSN string.
|
|
889
|
-
|
|
890
|
-
See https://docs.databricks.com/integrations/jdbc-odbc-bi.html
|
|
891
|
-
"""
|
|
892
|
-
if (not self.cluster_id) and (not self.warehouse_id):
|
|
893
|
-
return None
|
|
894
|
-
if self.cluster_id and self.warehouse_id:
|
|
895
|
-
raise ValueError('cannot have both cluster_id and warehouse_id')
|
|
896
|
-
headers = self.authenticate()
|
|
897
|
-
headers['User-Agent'] = f'{self.user_agent} sdk-feature/sql-http-path'
|
|
898
|
-
if self.cluster_id:
|
|
899
|
-
response = requests.get(f"{self.host}/api/2.0/preview/scim/v2/Me", headers=headers)
|
|
900
|
-
# get workspace ID from the response header
|
|
901
|
-
workspace_id = response.headers.get('x-databricks-org-id')
|
|
902
|
-
return f'sql/protocolv1/o/{workspace_id}/{self.cluster_id}'
|
|
903
|
-
if self.warehouse_id:
|
|
904
|
-
return f'/sql/1.0/warehouses/{self.warehouse_id}'
|
|
905
|
-
|
|
906
|
-
@classmethod
|
|
907
|
-
def attributes(cls) -> Iterable[ConfigAttribute]:
|
|
908
|
-
""" Returns a list of Databricks SDK configuration metadata """
|
|
909
|
-
if hasattr(cls, '_attributes'):
|
|
910
|
-
return cls._attributes
|
|
911
|
-
if sys.version_info[1] >= 10:
|
|
912
|
-
import inspect
|
|
913
|
-
anno = inspect.get_annotations(cls)
|
|
914
|
-
else:
|
|
915
|
-
# Python 3.7 compatibility: getting type hints require extra hop, as described in
|
|
916
|
-
# "Accessing The Annotations Dict Of An Object In Python 3.9 And Older" section of
|
|
917
|
-
# https://docs.python.org/3/howto/annotations.html
|
|
918
|
-
anno = cls.__dict__['__annotations__']
|
|
919
|
-
attrs = []
|
|
920
|
-
for name, v in cls.__dict__.items():
|
|
921
|
-
if type(v) != ConfigAttribute:
|
|
922
|
-
continue
|
|
923
|
-
v.name = name
|
|
924
|
-
v.transform = anno.get(name, str)
|
|
925
|
-
attrs.append(v)
|
|
926
|
-
cls._attributes = attrs
|
|
927
|
-
return cls._attributes
|
|
928
|
-
|
|
929
|
-
def _fix_host_if_needed(self):
|
|
930
|
-
if not self.host:
|
|
931
|
-
return
|
|
932
|
-
# fix url to remove trailing slash
|
|
933
|
-
o = urllib.parse.urlparse(self.host)
|
|
934
|
-
if not o.hostname:
|
|
935
|
-
# only hostname is specified
|
|
936
|
-
self.host = f"https://{self.host}"
|
|
937
|
-
else:
|
|
938
|
-
self.host = f"{o.scheme}://{o.netloc}"
|
|
939
|
-
|
|
940
|
-
def _set_inner_config(self, keyword_args: Dict[str, any]):
|
|
941
|
-
for attr in self.attributes():
|
|
942
|
-
if attr.name not in keyword_args:
|
|
943
|
-
continue
|
|
944
|
-
if keyword_args.get(attr.name, None) is None:
|
|
945
|
-
continue
|
|
946
|
-
self.__setattr__(attr.name, keyword_args[attr.name])
|
|
947
|
-
|
|
948
|
-
def _load_from_env(self):
|
|
949
|
-
found = False
|
|
950
|
-
for attr in self.attributes():
|
|
951
|
-
if not attr.env:
|
|
952
|
-
continue
|
|
953
|
-
if attr.name in self._inner:
|
|
954
|
-
continue
|
|
955
|
-
value = os.environ.get(attr.env)
|
|
956
|
-
if not value:
|
|
957
|
-
continue
|
|
958
|
-
self.__setattr__(attr.name, value)
|
|
959
|
-
found = True
|
|
960
|
-
if found:
|
|
961
|
-
logger.debug('Loaded from environment')
|
|
962
|
-
|
|
963
|
-
def _known_file_config_loader(self):
|
|
964
|
-
if not self.profile and (self.is_any_auth_configured or self.host
|
|
965
|
-
or self.azure_workspace_resource_id):
|
|
966
|
-
# skip loading configuration file if there's any auth configured
|
|
967
|
-
# directly as part of the Config() constructor.
|
|
968
|
-
return
|
|
969
|
-
config_file = self.config_file
|
|
970
|
-
if not config_file:
|
|
971
|
-
config_file = "~/.databrickscfg"
|
|
972
|
-
config_path = pathlib.Path(config_file).expanduser()
|
|
973
|
-
if not config_path.exists():
|
|
974
|
-
logger.debug("%s does not exist", config_path)
|
|
975
|
-
return
|
|
976
|
-
ini_file = configparser.ConfigParser()
|
|
977
|
-
ini_file.read(config_path)
|
|
978
|
-
profile = self.profile
|
|
979
|
-
has_explicit_profile = self.profile is not None
|
|
980
|
-
# In Go SDK, we skip merging the profile with DEFAULT section, though Python's ConfigParser.items()
|
|
981
|
-
# is returning profile key-value pairs _including those from DEFAULT_. This is not what we expect
|
|
982
|
-
# from Unified Auth test suite at the moment. Hence, the private variable access.
|
|
983
|
-
# See: https://docs.python.org/3/library/configparser.html#mapping-protocol-access
|
|
984
|
-
if not has_explicit_profile and not ini_file.defaults():
|
|
985
|
-
logger.debug(f'{config_path} has no DEFAULT profile configured')
|
|
986
|
-
return
|
|
987
|
-
if not has_explicit_profile:
|
|
988
|
-
profile = "DEFAULT"
|
|
989
|
-
profiles = ini_file._sections
|
|
990
|
-
if ini_file.defaults():
|
|
991
|
-
profiles['DEFAULT'] = ini_file.defaults()
|
|
992
|
-
if profile not in profiles:
|
|
993
|
-
raise ValueError(f'resolve: {config_path} has no {profile} profile configured')
|
|
994
|
-
raw_config = profiles[profile]
|
|
995
|
-
logger.info(f'loading {profile} profile from {config_file}: {", ".join(raw_config.keys())}')
|
|
996
|
-
for k, v in raw_config.items():
|
|
997
|
-
if k in self._inner:
|
|
998
|
-
# don't overwrite a value previously set
|
|
999
|
-
continue
|
|
1000
|
-
self.__setattr__(k, v)
|
|
1001
|
-
|
|
1002
|
-
def _validate(self):
|
|
1003
|
-
auths_used = set()
|
|
1004
|
-
for attr in Config.attributes():
|
|
1005
|
-
if attr.name not in self._inner:
|
|
1006
|
-
continue
|
|
1007
|
-
if not attr.auth:
|
|
1008
|
-
continue
|
|
1009
|
-
auths_used.add(attr.auth)
|
|
1010
|
-
if len(auths_used) <= 1:
|
|
1011
|
-
return
|
|
1012
|
-
if self.auth_type:
|
|
1013
|
-
# client has auth preference set
|
|
1014
|
-
return
|
|
1015
|
-
names = " and ".join(sorted(auths_used))
|
|
1016
|
-
raise ValueError(f'validate: more than one authorization method configured: {names}')
|
|
1017
|
-
|
|
1018
|
-
def _init_auth(self):
|
|
1019
|
-
try:
|
|
1020
|
-
self._header_factory = self._credentials_provider(self)
|
|
1021
|
-
self.auth_type = self._credentials_provider.auth_type()
|
|
1022
|
-
if not self._header_factory:
|
|
1023
|
-
raise ValueError('not configured')
|
|
1024
|
-
except ValueError as e:
|
|
1025
|
-
raise ValueError(f'{self._credentials_provider.auth_type()} auth: {e}') from e
|
|
1026
|
-
|
|
1027
|
-
def __repr__(self):
|
|
1028
|
-
return f'<{self.debug_string()}>'
|
|
1029
|
-
|
|
1030
|
-
def copy(self):
|
|
1031
|
-
"""Creates a copy of the config object.
|
|
1032
|
-
All the copies share most of their internal state (ie, shared reference to fields such as credential_provider).
|
|
1033
|
-
Copies have their own instances of the following fields
|
|
1034
|
-
- `_user_agent_other_info`
|
|
1035
|
-
"""
|
|
1036
|
-
cpy: Config = copy.copy(self)
|
|
1037
|
-
cpy._user_agent_other_info = copy.deepcopy(self._user_agent_other_info)
|
|
1038
|
-
return cpy
|
|
1039
|
-
|
|
1040
20
|
|
|
1041
21
|
class ApiClient:
|
|
1042
22
|
_cfg: Config
|
|
@@ -1143,7 +123,8 @@ class ApiClient:
|
|
|
1143
123
|
headers = {}
|
|
1144
124
|
headers['User-Agent'] = self._user_agent_base
|
|
1145
125
|
retryable = retried(timeout=timedelta(seconds=self._retry_timeout_seconds),
|
|
1146
|
-
is_retryable=self._is_retryable
|
|
126
|
+
is_retryable=self._is_retryable,
|
|
127
|
+
clock=self._cfg.clock)
|
|
1147
128
|
return retryable(self._perform)(method,
|
|
1148
129
|
path,
|
|
1149
130
|
query=query,
|