databricks-sdk 0.17.0__py3-none-any.whl → 0.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of databricks-sdk might be problematic. Click here for more details.

Files changed (36) hide show
  1. databricks/sdk/__init__.py +41 -5
  2. databricks/sdk/azure.py +17 -7
  3. databricks/sdk/clock.py +49 -0
  4. databricks/sdk/config.py +459 -0
  5. databricks/sdk/core.py +7 -1026
  6. databricks/sdk/credentials_provider.py +628 -0
  7. databricks/sdk/environments.py +72 -0
  8. databricks/sdk/errors/__init__.py +1 -1
  9. databricks/sdk/errors/mapper.py +5 -5
  10. databricks/sdk/mixins/workspace.py +3 -3
  11. databricks/sdk/oauth.py +2 -1
  12. databricks/sdk/retries.py +9 -5
  13. databricks/sdk/service/_internal.py +1 -1
  14. databricks/sdk/service/catalog.py +946 -82
  15. databricks/sdk/service/compute.py +106 -41
  16. databricks/sdk/service/files.py +145 -31
  17. databricks/sdk/service/iam.py +44 -40
  18. databricks/sdk/service/jobs.py +199 -20
  19. databricks/sdk/service/ml.py +33 -42
  20. databricks/sdk/service/oauth2.py +3 -4
  21. databricks/sdk/service/pipelines.py +51 -31
  22. databricks/sdk/service/serving.py +1 -2
  23. databricks/sdk/service/settings.py +377 -72
  24. databricks/sdk/service/sharing.py +3 -4
  25. databricks/sdk/service/sql.py +27 -19
  26. databricks/sdk/service/vectorsearch.py +13 -17
  27. databricks/sdk/service/workspace.py +20 -11
  28. databricks/sdk/version.py +1 -1
  29. {databricks_sdk-0.17.0.dist-info → databricks_sdk-0.19.0.dist-info}/METADATA +4 -4
  30. databricks_sdk-0.19.0.dist-info/RECORD +53 -0
  31. databricks_sdk-0.17.0.dist-info/RECORD +0 -49
  32. /databricks/sdk/errors/{mapping.py → platform.py} +0 -0
  33. {databricks_sdk-0.17.0.dist-info → databricks_sdk-0.19.0.dist-info}/LICENSE +0 -0
  34. {databricks_sdk-0.17.0.dist-info → databricks_sdk-0.19.0.dist-info}/NOTICE +0 -0
  35. {databricks_sdk-0.17.0.dist-info → databricks_sdk-0.19.0.dist-info}/WHEEL +0 -0
  36. {databricks_sdk-0.17.0.dist-info → databricks_sdk-0.19.0.dist-info}/top_level.txt +0 -0
databricks/sdk/core.py CHANGED
@@ -1,1042 +1,22 @@
1
- import abc
2
- import base64
3
- import configparser
4
- import copy
5
- import functools
6
- import io
7
- import json
8
- import logging
9
- import os
10
- import pathlib
11
- import platform
12
1
  import re
13
- import subprocess
14
- import sys
15
2
  import urllib.parse
16
- from datetime import datetime, timedelta
3
+ from datetime import timedelta
17
4
  from json import JSONDecodeError
18
5
  from types import TracebackType
19
- from typing import (Any, BinaryIO, Callable, Dict, Iterable, Iterator, List,
20
- Optional, Type, Union)
6
+ from typing import Any, BinaryIO, Iterator, Type
21
7
 
22
- import google.auth
23
- import requests
24
- from google.auth import impersonated_credentials
25
- from google.auth.transport.requests import Request
26
- from google.oauth2 import service_account
27
8
  from requests.adapters import HTTPAdapter
28
9
 
29
- from .azure import (ARM_DATABRICKS_RESOURCE_ID, ENVIRONMENTS, AzureEnvironment,
30
- add_sp_management_token, add_workspace_id_header)
10
+ from .config import *
11
+ # To preserve backwards compatibility (as these definitions were previously in this module)
12
+ from .credentials_provider import *
31
13
  from .errors import DatabricksError, error_mapper
32
- from .oauth import (ClientCredentials, OAuthClient, OidcEndpoints, Refreshable,
33
- Token, TokenCache, TokenSource)
34
14
  from .retries import retried
35
- from .version import __version__
36
15
 
37
16
  __all__ = ['Config', 'DatabricksError']
38
17
 
39
18
  logger = logging.getLogger('databricks.sdk')
40
19
 
41
- HeaderFactory = Callable[[], Dict[str, str]]
42
-
43
- GcpScopes = ["https://www.googleapis.com/auth/cloud-platform", "https://www.googleapis.com/auth/compute"]
44
-
45
-
46
- class CredentialsProvider(abc.ABC):
47
- """ CredentialsProvider is the protocol (call-side interface)
48
- for authenticating requests to Databricks REST APIs"""
49
-
50
- @abc.abstractmethod
51
- def auth_type(self) -> str:
52
- ...
53
-
54
- @abc.abstractmethod
55
- def __call__(self, cfg: 'Config') -> HeaderFactory:
56
- ...
57
-
58
-
59
- def credentials_provider(name: str, require: List[str]):
60
- """ Given the function that receives a Config and returns RequestVisitor,
61
- create CredentialsProvider with a given name and required configuration
62
- attribute names to be present for this function to be called. """
63
-
64
- def inner(func: Callable[['Config'], HeaderFactory]) -> CredentialsProvider:
65
-
66
- @functools.wraps(func)
67
- def wrapper(cfg: 'Config') -> Optional[HeaderFactory]:
68
- for attr in require:
69
- if not getattr(cfg, attr):
70
- return None
71
- return func(cfg)
72
-
73
- wrapper.auth_type = lambda: name
74
- return wrapper
75
-
76
- return inner
77
-
78
-
79
- @credentials_provider('basic', ['host', 'username', 'password'])
80
- def basic_auth(cfg: 'Config') -> HeaderFactory:
81
- """ Given username and password, add base64-encoded Basic credentials """
82
- encoded = base64.b64encode(f'{cfg.username}:{cfg.password}'.encode()).decode()
83
- static_credentials = {'Authorization': f'Basic {encoded}'}
84
-
85
- def inner() -> Dict[str, str]:
86
- return static_credentials
87
-
88
- return inner
89
-
90
-
91
- @credentials_provider('pat', ['host', 'token'])
92
- def pat_auth(cfg: 'Config') -> HeaderFactory:
93
- """ Adds Databricks Personal Access Token to every request """
94
- static_credentials = {'Authorization': f'Bearer {cfg.token}'}
95
-
96
- def inner() -> Dict[str, str]:
97
- return static_credentials
98
-
99
- return inner
100
-
101
-
102
- @credentials_provider('runtime', [])
103
- def runtime_native_auth(cfg: 'Config') -> Optional[HeaderFactory]:
104
- if 'DATABRICKS_RUNTIME_VERSION' not in os.environ:
105
- return None
106
-
107
- # This import MUST be after the "DATABRICKS_RUNTIME_VERSION" check
108
- # above, so that we are not throwing import errors when not in
109
- # runtime and no config variables are set.
110
- from databricks.sdk.runtime import (init_runtime_legacy_auth,
111
- init_runtime_native_auth,
112
- init_runtime_repl_auth)
113
- for init in [init_runtime_native_auth, init_runtime_repl_auth, init_runtime_legacy_auth]:
114
- if init is None:
115
- continue
116
- host, inner = init()
117
- if host is None:
118
- logger.debug(f'[{init.__name__}] no host detected')
119
- continue
120
- cfg.host = host
121
- logger.debug(f'[{init.__name__}] runtime native auth configured')
122
- return inner
123
- return None
124
-
125
-
126
- @credentials_provider('oauth-m2m', ['is_aws', 'host', 'client_id', 'client_secret'])
127
- def oauth_service_principal(cfg: 'Config') -> Optional[HeaderFactory]:
128
- """ Adds refreshed Databricks machine-to-machine OAuth Bearer token to every request,
129
- if /oidc/.well-known/oauth-authorization-server is available on the given host. """
130
- # TODO: Azure returns 404 for UC workspace after redirecting to
131
- # https://login.microsoftonline.com/{cfg.azure_tenant_id}/.well-known/oauth-authorization-server
132
- oidc = cfg.oidc_endpoints
133
- if oidc is None:
134
- return None
135
- token_source = ClientCredentials(client_id=cfg.client_id,
136
- client_secret=cfg.client_secret,
137
- token_url=oidc.token_endpoint,
138
- scopes=["all-apis"],
139
- use_header=True)
140
-
141
- def inner() -> Dict[str, str]:
142
- token = token_source.token()
143
- return {'Authorization': f'{token.token_type} {token.access_token}'}
144
-
145
- return inner
146
-
147
-
148
- @credentials_provider('external-browser', ['host', 'auth_type'])
149
- def external_browser(cfg: 'Config') -> Optional[HeaderFactory]:
150
- if cfg.auth_type != 'external-browser':
151
- return None
152
- if cfg.client_id:
153
- client_id = cfg.client_id
154
- elif cfg.is_aws:
155
- client_id = 'databricks-cli'
156
- elif cfg.is_azure:
157
- # Use Azure AD app for cases when Azure CLI is not available on the machine.
158
- # App has to be registered as Single-page multi-tenant to support PKCE
159
- # TODO: temporary app ID, change it later.
160
- client_id = '6128a518-99a9-425b-8333-4cc94f04cacd'
161
- else:
162
- raise ValueError(f'local browser SSO is not supported')
163
- oauth_client = OAuthClient(host=cfg.host,
164
- client_id=client_id,
165
- redirect_url='http://localhost:8020',
166
- client_secret=cfg.client_secret)
167
-
168
- # Load cached credentials from disk if they exist.
169
- # Note that these are local to the Python SDK and not reused by other SDKs.
170
- token_cache = TokenCache(oauth_client)
171
- credentials = token_cache.load()
172
- if credentials:
173
- # Force a refresh in case the loaded credentials are expired.
174
- credentials.token()
175
- else:
176
- consent = oauth_client.initiate_consent()
177
- if not consent:
178
- return None
179
- credentials = consent.launch_external_browser()
180
- token_cache.save(credentials)
181
- return credentials(cfg)
182
-
183
-
184
- def _ensure_host_present(cfg: 'Config', token_source_for: Callable[[str], TokenSource]):
185
- """ Resolves Azure Databricks workspace URL from ARM Resource ID """
186
- if cfg.host:
187
- return
188
- if not cfg.azure_workspace_resource_id:
189
- return
190
- arm = cfg.arm_environment.resource_manager_endpoint
191
- token = token_source_for(arm).token()
192
- resp = requests.get(f"{arm}{cfg.azure_workspace_resource_id}?api-version=2018-04-01",
193
- headers={"Authorization": f"Bearer {token.access_token}"})
194
- if not resp.ok:
195
- raise ValueError(f"Cannot resolve Azure Databricks workspace: {resp.content}")
196
- cfg.host = f"https://{resp.json()['properties']['workspaceUrl']}"
197
-
198
-
199
- @credentials_provider('azure-client-secret',
200
- ['is_azure', 'azure_client_id', 'azure_client_secret', 'azure_tenant_id'])
201
- def azure_service_principal(cfg: 'Config') -> HeaderFactory:
202
- """ Adds refreshed Azure Active Directory (AAD) Service Principal OAuth tokens
203
- to every request, while automatically resolving different Azure environment endpoints. """
204
-
205
- def token_source_for(resource: str) -> TokenSource:
206
- aad_endpoint = cfg.arm_environment.active_directory_endpoint
207
- return ClientCredentials(client_id=cfg.azure_client_id,
208
- client_secret=cfg.azure_client_secret,
209
- token_url=f"{aad_endpoint}{cfg.azure_tenant_id}/oauth2/token",
210
- endpoint_params={"resource": resource},
211
- use_params=True)
212
-
213
- _ensure_host_present(cfg, token_source_for)
214
- logger.info("Configured AAD token for Service Principal (%s)", cfg.azure_client_id)
215
- inner = token_source_for(cfg.effective_azure_login_app_id)
216
- cloud = token_source_for(cfg.arm_environment.service_management_endpoint)
217
-
218
- def refreshed_headers() -> Dict[str, str]:
219
- headers = {'Authorization': f"Bearer {inner.token().access_token}", }
220
- add_workspace_id_header(cfg, headers)
221
- add_sp_management_token(cloud, headers)
222
- return headers
223
-
224
- return refreshed_headers
225
-
226
-
227
- @credentials_provider('github-oidc-azure', ['host', 'azure_client_id'])
228
- def github_oidc_azure(cfg: 'Config') -> Optional[HeaderFactory]:
229
- if 'ACTIONS_ID_TOKEN_REQUEST_TOKEN' not in os.environ:
230
- # not in GitHub actions
231
- return None
232
-
233
- # Client ID is the minimal thing we need, as otherwise we get AADSTS700016: Application with
234
- # identifier 'https://token.actions.githubusercontent.com' was not found in the directory '...'.
235
- if not cfg.is_azure:
236
- return None
237
-
238
- # See https://docs.github.com/en/actions/deployment/security-hardening-your-deployments/configuring-openid-connect-in-cloud-providers
239
- headers = {'Authorization': f"Bearer {os.environ['ACTIONS_ID_TOKEN_REQUEST_TOKEN']}"}
240
- endpoint = f"{os.environ['ACTIONS_ID_TOKEN_REQUEST_URL']}&audience=api://AzureADTokenExchange"
241
- response = requests.get(endpoint, headers=headers)
242
- if not response.ok:
243
- return None
244
-
245
- # get the ID Token with aud=api://AzureADTokenExchange sub=repo:org/repo:environment:name
246
- response_json = response.json()
247
- if 'value' not in response_json:
248
- return None
249
-
250
- logger.info("Configured AAD token for GitHub Actions OIDC (%s)", cfg.azure_client_id)
251
- params = {
252
- 'client_assertion_type': 'urn:ietf:params:oauth:client-assertion-type:jwt-bearer',
253
- 'resource': cfg.effective_azure_login_app_id,
254
- 'client_assertion': response_json['value'],
255
- }
256
- aad_endpoint = cfg.arm_environment.active_directory_endpoint
257
- if not cfg.azure_tenant_id:
258
- # detect Azure AD Tenant ID if it's not specified directly
259
- token_endpoint = cfg.oidc_endpoints.token_endpoint
260
- cfg.azure_tenant_id = token_endpoint.replace(aad_endpoint, '').split('/')[0]
261
- inner = ClientCredentials(client_id=cfg.azure_client_id,
262
- client_secret="", # we have no (rotatable) secrets in OIDC flow
263
- token_url=f"{aad_endpoint}{cfg.azure_tenant_id}/oauth2/token",
264
- endpoint_params=params,
265
- use_params=True)
266
-
267
- def refreshed_headers() -> Dict[str, str]:
268
- token = inner.token()
269
- return {'Authorization': f'{token.token_type} {token.access_token}'}
270
-
271
- return refreshed_headers
272
-
273
-
274
- @credentials_provider('google-credentials', ['host', 'google_credentials'])
275
- def google_credentials(cfg: 'Config') -> Optional[HeaderFactory]:
276
- if not cfg.is_gcp:
277
- return None
278
- # Reads credentials as JSON. Credentials can be either a path to JSON file, or actual JSON string.
279
- # Obtain the id token by providing the json file path and target audience.
280
- if (os.path.isfile(cfg.google_credentials)):
281
- with io.open(cfg.google_credentials, "r", encoding="utf-8") as json_file:
282
- account_info = json.load(json_file)
283
- else:
284
- # If the file doesn't exist, assume that the config is the actual JSON content.
285
- account_info = json.loads(cfg.google_credentials)
286
-
287
- credentials = service_account.IDTokenCredentials.from_service_account_info(info=account_info,
288
- target_audience=cfg.host)
289
-
290
- request = Request()
291
-
292
- gcp_credentials = service_account.Credentials.from_service_account_info(info=account_info,
293
- scopes=GcpScopes)
294
-
295
- def refreshed_headers() -> Dict[str, str]:
296
- credentials.refresh(request)
297
- headers = {'Authorization': f'Bearer {credentials.token}'}
298
- if cfg.is_account_client:
299
- gcp_credentials.refresh(request)
300
- headers["X-Databricks-GCP-SA-Access-Token"] = gcp_credentials.token
301
- return headers
302
-
303
- return refreshed_headers
304
-
305
-
306
- @credentials_provider('google-id', ['host', 'google_service_account'])
307
- def google_id(cfg: 'Config') -> Optional[HeaderFactory]:
308
- if not cfg.is_gcp:
309
- return None
310
- credentials, _project_id = google.auth.default()
311
-
312
- # Create the impersonated credential.
313
- target_credentials = impersonated_credentials.Credentials(source_credentials=credentials,
314
- target_principal=cfg.google_service_account,
315
- target_scopes=[])
316
-
317
- # Set the impersonated credential, target audience and token options.
318
- id_creds = impersonated_credentials.IDTokenCredentials(target_credentials,
319
- target_audience=cfg.host,
320
- include_email=True)
321
-
322
- gcp_impersonated_credentials = impersonated_credentials.Credentials(
323
- source_credentials=credentials, target_principal=cfg.google_service_account, target_scopes=GcpScopes)
324
-
325
- request = Request()
326
-
327
- def refreshed_headers() -> Dict[str, str]:
328
- id_creds.refresh(request)
329
- headers = {'Authorization': f'Bearer {id_creds.token}'}
330
- if cfg.is_account_client:
331
- gcp_impersonated_credentials.refresh(request)
332
- headers["X-Databricks-GCP-SA-Access-Token"] = gcp_impersonated_credentials.token
333
- return headers
334
-
335
- return refreshed_headers
336
-
337
-
338
- class CliTokenSource(Refreshable):
339
-
340
- def __init__(self, cmd: List[str], token_type_field: str, access_token_field: str, expiry_field: str):
341
- super().__init__()
342
- self._cmd = cmd
343
- self._token_type_field = token_type_field
344
- self._access_token_field = access_token_field
345
- self._expiry_field = expiry_field
346
-
347
- @staticmethod
348
- def _parse_expiry(expiry: str) -> datetime:
349
- expiry = expiry.rstrip("Z").split(".")[0]
350
- for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S"):
351
- try:
352
- return datetime.strptime(expiry, fmt)
353
- except ValueError as e:
354
- last_e = e
355
- if last_e:
356
- raise last_e
357
-
358
- def refresh(self) -> Token:
359
- try:
360
- is_windows = sys.platform.startswith('win')
361
- # windows requires shell=True to be able to execute 'az login' or other commands
362
- # cannot use shell=True all the time, as it breaks macOS
363
- out = subprocess.run(self._cmd, capture_output=True, check=True, shell=is_windows)
364
- it = json.loads(out.stdout.decode())
365
- expires_on = self._parse_expiry(it[self._expiry_field])
366
- return Token(access_token=it[self._access_token_field],
367
- token_type=it[self._token_type_field],
368
- expiry=expires_on)
369
- except ValueError as e:
370
- raise ValueError(f"cannot unmarshal CLI result: {e}")
371
- except subprocess.CalledProcessError as e:
372
- stdout = e.stdout.decode().strip()
373
- stderr = e.stderr.decode().strip()
374
- message = stdout or stderr
375
- raise IOError(f'cannot get access token: {message}') from e
376
-
377
-
378
- class AzureCliTokenSource(CliTokenSource):
379
- """ Obtain the token granted by `az login` CLI command """
380
-
381
- def __init__(self, resource: str, subscription: str = ""):
382
- cmd = ["az", "account", "get-access-token", "--resource", resource, "--output", "json"]
383
- if subscription != "":
384
- cmd.append("--subscription")
385
- cmd.append(subscription)
386
- super().__init__(cmd=cmd,
387
- token_type_field='tokenType',
388
- access_token_field='accessToken',
389
- expiry_field='expiresOn')
390
-
391
- def is_human_user(self) -> bool:
392
- """The UPN claim is the username of the user, but not the Service Principal.
393
-
394
- Azure CLI can be authenticated by both human users (`az login`) and service principals. In case of service
395
- principals, it can be either OIDC from GitHub or login with a password:
396
-
397
- ~ $ az login --service-principal --user $clientID --password $clientSecret --tenant $tenantID
398
-
399
- Human users get more claims:
400
- - 'amr' - how the subject of the token was authenticated
401
- - 'name', 'family_name', 'given_name' - human-readable values that identifies the subject of the token
402
- - 'scp' with `user_impersonation` value, that shows the set of scopes exposed by your application for which
403
- the client application has requested (and received) consent
404
- - 'unique_name' - a human-readable value that identifies the subject of the token. This value is not
405
- guaranteed to be unique within a tenant and should be used only for display purposes.
406
- - 'upn' - The username of the user.
407
- """
408
- return 'upn' in self.token().jwt_claims()
409
-
410
- @staticmethod
411
- def for_resource(cfg: 'Config', resource: str) -> 'AzureCliTokenSource':
412
- subscription = AzureCliTokenSource.get_subscription(cfg)
413
- if subscription != "":
414
- token_source = AzureCliTokenSource(resource, subscription)
415
- try:
416
- # This will fail if the user has access to the workspace, but not to the subscription
417
- # itself.
418
- # In such case, we fall back to not using the subscription.
419
- token_source.token()
420
- return token_source
421
- except OSError:
422
- logger.warning("Failed to get token for subscription. Using resource only token.")
423
-
424
- token_source = AzureCliTokenSource(resource)
425
- token_source.token()
426
- return token_source
427
-
428
- @staticmethod
429
- def get_subscription(cfg: 'Config') -> str:
430
- resource = cfg.azure_workspace_resource_id
431
- if resource is None or resource == "":
432
- return ""
433
- components = resource.split('/')
434
- if len(components) < 3:
435
- logger.warning("Invalid azure workspace resource ID")
436
- return ""
437
- return components[2]
438
-
439
-
440
- @credentials_provider('azure-cli', ['is_azure'])
441
- def azure_cli(cfg: 'Config') -> Optional[HeaderFactory]:
442
- """ Adds refreshed OAuth token granted by `az login` command to every request. """
443
- token_source = None
444
- mgmt_token_source = None
445
- try:
446
- token_source = AzureCliTokenSource.for_resource(cfg, cfg.effective_azure_login_app_id)
447
- except FileNotFoundError:
448
- doc = 'https://docs.microsoft.com/en-us/cli/azure/?view=azure-cli-latest'
449
- logger.debug(f'Most likely Azure CLI is not installed. See {doc} for details')
450
- return None
451
- if not token_source.is_human_user():
452
- try:
453
- management_endpoint = cfg.arm_environment.service_management_endpoint
454
- mgmt_token_source = AzureCliTokenSource.for_resource(cfg, management_endpoint)
455
- except Exception as e:
456
- logger.debug(f'Not including service management token in headers', exc_info=e)
457
- mgmt_token_source = None
458
-
459
- _ensure_host_present(cfg, lambda resource: AzureCliTokenSource.for_resource(cfg, resource))
460
- logger.info("Using Azure CLI authentication with AAD tokens")
461
- if not cfg.is_account_client and AzureCliTokenSource.get_subscription(cfg) == "":
462
- logger.warning(
463
- "azure_workspace_resource_id field not provided. "
464
- "It is recommended to specify this field in the Databricks configuration to avoid authentication errors."
465
- )
466
-
467
- def inner() -> Dict[str, str]:
468
- token = token_source.token()
469
- headers = {'Authorization': f'{token.token_type} {token.access_token}'}
470
- add_workspace_id_header(cfg, headers)
471
- if mgmt_token_source:
472
- add_sp_management_token(mgmt_token_source, headers)
473
- return headers
474
-
475
- return inner
476
-
477
-
478
- class DatabricksCliTokenSource(CliTokenSource):
479
- """ Obtain the token granted by `databricks auth login` CLI command """
480
-
481
- def __init__(self, cfg: 'Config'):
482
- args = ['auth', 'token', '--host', cfg.host]
483
- if cfg.is_account_client:
484
- args += ['--account-id', cfg.account_id]
485
-
486
- cli_path = cfg.databricks_cli_path
487
- if not cli_path:
488
- cli_path = 'databricks'
489
-
490
- # If the path is unqualified, look it up in PATH.
491
- if cli_path.count("/") == 0:
492
- cli_path = self.__class__._find_executable(cli_path)
493
-
494
- super().__init__(cmd=[cli_path, *args],
495
- token_type_field='token_type',
496
- access_token_field='access_token',
497
- expiry_field='expiry')
498
-
499
- @staticmethod
500
- def _find_executable(name) -> str:
501
- err = FileNotFoundError("Most likely the Databricks CLI is not installed")
502
- for dir in os.getenv("PATH", default="").split(os.path.pathsep):
503
- path = pathlib.Path(dir).joinpath(name).resolve()
504
- if not path.is_file():
505
- continue
506
-
507
- # The new Databricks CLI is a single binary with size > 1MB.
508
- # We use the size as a signal to determine which Databricks CLI is installed.
509
- stat = path.stat()
510
- if stat.st_size < (1024 * 1024):
511
- err = FileNotFoundError("Databricks CLI version <0.100.0 detected")
512
- continue
513
-
514
- return str(path)
515
-
516
- raise err
517
-
518
-
519
- @credentials_provider('databricks-cli', ['host', 'is_aws'])
520
- def databricks_cli(cfg: 'Config') -> Optional[HeaderFactory]:
521
- try:
522
- token_source = DatabricksCliTokenSource(cfg)
523
- except FileNotFoundError as e:
524
- logger.debug(e)
525
- return None
526
-
527
- try:
528
- token_source.token()
529
- except IOError as e:
530
- if 'databricks OAuth is not' in str(e):
531
- logger.debug(f'OAuth not configured or not available: {e}')
532
- return None
533
- raise e
534
-
535
- logger.info("Using Databricks CLI authentication")
536
-
537
- def inner() -> Dict[str, str]:
538
- token = token_source.token()
539
- return {'Authorization': f'{token.token_type} {token.access_token}'}
540
-
541
- return inner
542
-
543
-
544
- class MetadataServiceTokenSource(Refreshable):
545
- """ Obtain the token granted by Databricks Metadata Service """
546
- METADATA_SERVICE_VERSION = "1"
547
- METADATA_SERVICE_VERSION_HEADER = "X-Databricks-Metadata-Version"
548
- METADATA_SERVICE_HOST_HEADER = "X-Databricks-Host"
549
- _metadata_service_timeout = 10 # seconds
550
-
551
- def __init__(self, cfg: 'Config'):
552
- super().__init__()
553
- self.url = cfg.metadata_service_url
554
- self.host = cfg.host
555
-
556
- def refresh(self) -> Token:
557
- resp = requests.get(self.url,
558
- timeout=self._metadata_service_timeout,
559
- headers={
560
- self.METADATA_SERVICE_VERSION_HEADER: self.METADATA_SERVICE_VERSION,
561
- self.METADATA_SERVICE_HOST_HEADER: self.host
562
- })
563
- json_resp: dict[str, Union[str, float]] = resp.json()
564
- access_token = json_resp.get("access_token", None)
565
- if access_token is None:
566
- raise ValueError("Metadata Service returned empty token")
567
- token_type = json_resp.get("token_type", None)
568
- if token_type is None:
569
- raise ValueError("Metadata Service returned empty token type")
570
- if json_resp["expires_on"] in ["", None]:
571
- raise ValueError("Metadata Service returned invalid expiry")
572
- try:
573
- expiry = datetime.fromtimestamp(json_resp["expires_on"])
574
- except:
575
- raise ValueError("Metadata Service returned invalid expiry")
576
-
577
- return Token(access_token=access_token, token_type=token_type, expiry=expiry)
578
-
579
-
580
- @credentials_provider('metadata-service', ['host', 'metadata_service_url'])
581
- def metadata_service(cfg: 'Config') -> Optional[HeaderFactory]:
582
- """ Adds refreshed token granted by Databricks Metadata Service to every request. """
583
-
584
- token_source = MetadataServiceTokenSource(cfg)
585
- token_source.token()
586
- logger.info("Using Databricks Metadata Service authentication")
587
-
588
- def inner() -> Dict[str, str]:
589
- token = token_source.token()
590
- return {'Authorization': f'{token.token_type} {token.access_token}'}
591
-
592
- return inner
593
-
594
-
595
- class DefaultCredentials:
596
- """ Select the first applicable credential provider from the chain """
597
-
598
- def __init__(self) -> None:
599
- self._auth_type = 'default'
600
-
601
- def auth_type(self) -> str:
602
- return self._auth_type
603
-
604
- def __call__(self, cfg: 'Config') -> HeaderFactory:
605
- auth_providers = [
606
- pat_auth, basic_auth, metadata_service, oauth_service_principal, azure_service_principal,
607
- github_oidc_azure, azure_cli, external_browser, databricks_cli, runtime_native_auth,
608
- google_credentials, google_id
609
- ]
610
- for provider in auth_providers:
611
- auth_type = provider.auth_type()
612
- if cfg.auth_type and auth_type != cfg.auth_type:
613
- # ignore other auth types if one is explicitly enforced
614
- logger.debug(f"Ignoring {auth_type} auth, because {cfg.auth_type} is preferred")
615
- continue
616
- logger.debug(f'Attempting to configure auth: {auth_type}')
617
- try:
618
- header_factory = provider(cfg)
619
- if not header_factory:
620
- continue
621
- self._auth_type = auth_type
622
- return header_factory
623
- except Exception as e:
624
- raise ValueError(f'{auth_type}: {e}') from e
625
- auth_flow_url = "https://docs.databricks.com/en/dev-tools/auth.html#databricks-client-unified-authentication"
626
- raise ValueError(
627
- f'cannot configure default credentials, please check {auth_flow_url} to configure credentials for your preferred authentication method.'
628
- )
629
-
630
-
631
- class ConfigAttribute:
632
- """ Configuration attribute metadata and descriptor protocols. """
633
-
634
- # name and transform are discovered from Config.__new__
635
- name: str = None
636
- transform: type = str
637
-
638
- def __init__(self, env: str = None, auth: str = None, sensitive: bool = False):
639
- self.env = env
640
- self.auth = auth
641
- self.sensitive = sensitive
642
-
643
- def __get__(self, cfg: 'Config', owner):
644
- if not cfg:
645
- return None
646
- return cfg._inner.get(self.name, None)
647
-
648
- def __set__(self, cfg: 'Config', value: any):
649
- cfg._inner[self.name] = self.transform(value)
650
-
651
- def __repr__(self) -> str:
652
- return f"<ConfigAttribute '{self.name}' {self.transform.__name__}>"
653
-
654
-
655
- class Config:
656
- host: str = ConfigAttribute(env='DATABRICKS_HOST')
657
- account_id: str = ConfigAttribute(env='DATABRICKS_ACCOUNT_ID')
658
- token: str = ConfigAttribute(env='DATABRICKS_TOKEN', auth='pat', sensitive=True)
659
- username: str = ConfigAttribute(env='DATABRICKS_USERNAME', auth='basic')
660
- password: str = ConfigAttribute(env='DATABRICKS_PASSWORD', auth='basic', sensitive=True)
661
- client_id: str = ConfigAttribute(env='DATABRICKS_CLIENT_ID', auth='oauth')
662
- client_secret: str = ConfigAttribute(env='DATABRICKS_CLIENT_SECRET', auth='oauth', sensitive=True)
663
- profile: str = ConfigAttribute(env='DATABRICKS_CONFIG_PROFILE')
664
- config_file: str = ConfigAttribute(env='DATABRICKS_CONFIG_FILE')
665
- google_service_account: str = ConfigAttribute(env='DATABRICKS_GOOGLE_SERVICE_ACCOUNT', auth='google')
666
- google_credentials: str = ConfigAttribute(env='GOOGLE_CREDENTIALS', auth='google', sensitive=True)
667
- azure_workspace_resource_id: str = ConfigAttribute(env='DATABRICKS_AZURE_RESOURCE_ID', auth='azure')
668
- azure_use_msi: bool = ConfigAttribute(env='ARM_USE_MSI', auth='azure')
669
- azure_client_secret: str = ConfigAttribute(env='ARM_CLIENT_SECRET', auth='azure', sensitive=True)
670
- azure_client_id: str = ConfigAttribute(env='ARM_CLIENT_ID', auth='azure')
671
- azure_tenant_id: str = ConfigAttribute(env='ARM_TENANT_ID', auth='azure')
672
- azure_environment: str = ConfigAttribute(env='ARM_ENVIRONMENT')
673
- azure_login_app_id: str = ConfigAttribute(env='DATABRICKS_AZURE_LOGIN_APP_ID', auth='azure')
674
- databricks_cli_path: str = ConfigAttribute(env='DATABRICKS_CLI_PATH')
675
- auth_type: str = ConfigAttribute(env='DATABRICKS_AUTH_TYPE')
676
- cluster_id: str = ConfigAttribute(env='DATABRICKS_CLUSTER_ID')
677
- warehouse_id: str = ConfigAttribute(env='DATABRICKS_WAREHOUSE_ID')
678
- skip_verify: bool = ConfigAttribute()
679
- http_timeout_seconds: float = ConfigAttribute()
680
- debug_truncate_bytes: int = ConfigAttribute(env='DATABRICKS_DEBUG_TRUNCATE_BYTES')
681
- debug_headers: bool = ConfigAttribute(env='DATABRICKS_DEBUG_HEADERS')
682
- rate_limit: int = ConfigAttribute(env='DATABRICKS_RATE_LIMIT')
683
- retry_timeout_seconds: int = ConfigAttribute()
684
- metadata_service_url = ConfigAttribute(env='DATABRICKS_METADATA_SERVICE_URL',
685
- auth='metadata-service',
686
- sensitive=True)
687
- max_connection_pools: int = ConfigAttribute()
688
- max_connections_per_pool: int = ConfigAttribute()
689
-
690
- def __init__(self,
691
- *,
692
- credentials_provider: CredentialsProvider = None,
693
- product="unknown",
694
- product_version="0.0.0",
695
- **kwargs):
696
- self._inner = {}
697
- self._user_agent_other_info = []
698
- self._credentials_provider = credentials_provider if credentials_provider else DefaultCredentials()
699
- try:
700
- self._set_inner_config(kwargs)
701
- self._load_from_env()
702
- self._known_file_config_loader()
703
- self._fix_host_if_needed()
704
- self._validate()
705
- self._init_auth()
706
- self._product = product
707
- self._product_version = product_version
708
- except ValueError as e:
709
- message = self.wrap_debug_info(str(e))
710
- raise ValueError(message) from e
711
-
712
- def wrap_debug_info(self, message: str) -> str:
713
- debug_string = self.debug_string()
714
- if debug_string:
715
- message = f'{message.rstrip(".")}. {debug_string}'
716
- return message
717
-
718
- @staticmethod
719
- def parse_dsn(dsn: str) -> 'Config':
720
- uri = urllib.parse.urlparse(dsn)
721
- if uri.scheme != 'databricks':
722
- raise ValueError(f'Expected databricks:// scheme, got {uri.scheme}://')
723
- kwargs = {'host': f'https://{uri.hostname}'}
724
- if uri.username:
725
- kwargs['username'] = uri.username
726
- if uri.password:
727
- kwargs['password'] = uri.password
728
- query = dict(urllib.parse.parse_qsl(uri.query))
729
- for attr in Config.attributes():
730
- if attr.name not in query:
731
- continue
732
- kwargs[attr.name] = query[attr.name]
733
- return Config(**kwargs)
734
-
735
- def authenticate(self) -> Dict[str, str]:
736
- """ Returns a list of fresh authentication headers """
737
- return self._header_factory()
738
-
739
- def as_dict(self) -> dict:
740
- return self._inner
741
-
742
- @property
743
- def is_azure(self) -> bool:
744
- has_resource_id = self.azure_workspace_resource_id is not None
745
- has_host = self.host is not None
746
- is_public_cloud = has_host and ".azuredatabricks.net" in self.host
747
- is_china_cloud = has_host and ".databricks.azure.cn" in self.host
748
- is_gov_cloud = has_host and ".databricks.azure.us" in self.host
749
- is_valid_cloud = is_public_cloud or is_china_cloud or is_gov_cloud
750
- return has_resource_id or (has_host and is_valid_cloud)
751
-
752
- @property
753
- def is_gcp(self) -> bool:
754
- return self.host and ".gcp.databricks.com" in self.host
755
-
756
- @property
757
- def is_aws(self) -> bool:
758
- return not self.is_azure and not self.is_gcp
759
-
760
- @property
761
- def is_account_client(self) -> bool:
762
- if not self.host:
763
- return False
764
- return self.host.startswith("https://accounts.") or self.host.startswith("https://accounts-dod.")
765
-
766
- @property
767
- def arm_environment(self) -> AzureEnvironment:
768
- env = self.azure_environment if self.azure_environment else "PUBLIC"
769
- try:
770
- return ENVIRONMENTS[env]
771
- except KeyError:
772
- raise ValueError(f"Cannot find Azure {env} Environment")
773
-
774
- @property
775
- def effective_azure_login_app_id(self):
776
- app_id = self.azure_login_app_id
777
- if app_id:
778
- return app_id
779
- return ARM_DATABRICKS_RESOURCE_ID
780
-
781
- @property
782
- def hostname(self) -> str:
783
- url = urllib.parse.urlparse(self.host)
784
- return url.netloc
785
-
786
- @property
787
- def is_any_auth_configured(self) -> bool:
788
- for attr in Config.attributes():
789
- if not attr.auth:
790
- continue
791
- value = self._inner.get(attr.name, None)
792
- if value:
793
- return True
794
- return False
795
-
796
- @property
797
- def user_agent(self):
798
- """ Returns User-Agent header used by this SDK """
799
- py_version = platform.python_version()
800
- os_name = platform.uname().system.lower()
801
-
802
- ua = [
803
- f"{self._product}/{self._product_version}", f"databricks-sdk-py/{__version__}",
804
- f"python/{py_version}", f"os/{os_name}", f"auth/{self.auth_type}",
805
- ]
806
- if len(self._user_agent_other_info) > 0:
807
- ua.append(' '.join(self._user_agent_other_info))
808
- if len(self._upstream_user_agent) > 0:
809
- ua.append(self._upstream_user_agent)
810
- if 'DATABRICKS_RUNTIME_VERSION' in os.environ:
811
- runtime_version = os.environ['DATABRICKS_RUNTIME_VERSION']
812
- if runtime_version != '':
813
- runtime_version = self._sanitize_header_value(runtime_version)
814
- ua.append(f'runtime/{runtime_version}')
815
-
816
- return ' '.join(ua)
817
-
818
- @staticmethod
819
- def _sanitize_header_value(value: str) -> str:
820
- value = value.replace(' ', '-')
821
- value = value.replace('/', '-')
822
- return value
823
-
824
- @property
825
- def _upstream_user_agent(self) -> str:
826
- product = os.environ.get('DATABRICKS_SDK_UPSTREAM', None)
827
- product_version = os.environ.get('DATABRICKS_SDK_UPSTREAM_VERSION', None)
828
- if product is not None and product_version is not None:
829
- return f"upstream/{product} upstream-version/{product_version}"
830
- return ""
831
-
832
- def with_user_agent_extra(self, key: str, value: str) -> 'Config':
833
- self._user_agent_other_info.append(f"{key}/{value}")
834
- return self
835
-
836
- @property
837
- def oidc_endpoints(self) -> Optional[OidcEndpoints]:
838
- self._fix_host_if_needed()
839
- if not self.host:
840
- return None
841
- if self.is_azure:
842
- # Retrieve authorize endpoint to retrieve token endpoint after
843
- res = requests.get(f'{self.host}/oidc/oauth2/v2.0/authorize', allow_redirects=False)
844
- real_auth_url = res.headers.get('location')
845
- if not real_auth_url:
846
- return None
847
- return OidcEndpoints(authorization_endpoint=real_auth_url,
848
- token_endpoint=real_auth_url.replace('/authorize', '/token'))
849
- if self.is_account_client and self.account_id:
850
- prefix = f'{self.host}/oidc/accounts/{self.account_id}'
851
- return OidcEndpoints(authorization_endpoint=f'{prefix}/v1/authorize',
852
- token_endpoint=f'{prefix}/v1/token')
853
- oidc = f'{self.host}/oidc/.well-known/oauth-authorization-server'
854
- res = requests.get(oidc)
855
- if res.status_code != 200:
856
- return None
857
- auth_metadata = res.json()
858
- return OidcEndpoints(authorization_endpoint=auth_metadata.get('authorization_endpoint'),
859
- token_endpoint=auth_metadata.get('token_endpoint'))
860
-
861
- def debug_string(self) -> str:
862
- """ Returns log-friendly representation of configured attributes """
863
- buf = []
864
- attrs_used = []
865
- envs_used = []
866
- for attr in Config.attributes():
867
- if attr.env and os.environ.get(attr.env):
868
- envs_used.append(attr.env)
869
- value = getattr(self, attr.name)
870
- if not value:
871
- continue
872
- safe = '***' if attr.sensitive else f'{value}'
873
- attrs_used.append(f'{attr.name}={safe}')
874
- if attrs_used:
875
- buf.append(f'Config: {", ".join(attrs_used)}')
876
- if envs_used:
877
- buf.append(f'Env: {", ".join(envs_used)}')
878
- return '. '.join(buf)
879
-
880
- def to_dict(self) -> Dict[str, any]:
881
- return self._inner
882
-
883
- @property
884
- def sql_http_path(self) -> Optional[str]:
885
- """(Experimental) Return HTTP path for SQL Drivers.
886
-
887
- If `cluster_id` or `warehouse_id` are configured, return a valid HTTP Path argument
888
- used in construction of JDBC/ODBC DSN string.
889
-
890
- See https://docs.databricks.com/integrations/jdbc-odbc-bi.html
891
- """
892
- if (not self.cluster_id) and (not self.warehouse_id):
893
- return None
894
- if self.cluster_id and self.warehouse_id:
895
- raise ValueError('cannot have both cluster_id and warehouse_id')
896
- headers = self.authenticate()
897
- headers['User-Agent'] = f'{self.user_agent} sdk-feature/sql-http-path'
898
- if self.cluster_id:
899
- response = requests.get(f"{self.host}/api/2.0/preview/scim/v2/Me", headers=headers)
900
- # get workspace ID from the response header
901
- workspace_id = response.headers.get('x-databricks-org-id')
902
- return f'sql/protocolv1/o/{workspace_id}/{self.cluster_id}'
903
- if self.warehouse_id:
904
- return f'/sql/1.0/warehouses/{self.warehouse_id}'
905
-
906
- @classmethod
907
- def attributes(cls) -> Iterable[ConfigAttribute]:
908
- """ Returns a list of Databricks SDK configuration metadata """
909
- if hasattr(cls, '_attributes'):
910
- return cls._attributes
911
- if sys.version_info[1] >= 10:
912
- import inspect
913
- anno = inspect.get_annotations(cls)
914
- else:
915
- # Python 3.7 compatibility: getting type hints require extra hop, as described in
916
- # "Accessing The Annotations Dict Of An Object In Python 3.9 And Older" section of
917
- # https://docs.python.org/3/howto/annotations.html
918
- anno = cls.__dict__['__annotations__']
919
- attrs = []
920
- for name, v in cls.__dict__.items():
921
- if type(v) != ConfigAttribute:
922
- continue
923
- v.name = name
924
- v.transform = anno.get(name, str)
925
- attrs.append(v)
926
- cls._attributes = attrs
927
- return cls._attributes
928
-
929
- def _fix_host_if_needed(self):
930
- if not self.host:
931
- return
932
- # fix url to remove trailing slash
933
- o = urllib.parse.urlparse(self.host)
934
- if not o.hostname:
935
- # only hostname is specified
936
- self.host = f"https://{self.host}"
937
- else:
938
- self.host = f"{o.scheme}://{o.netloc}"
939
-
940
- def _set_inner_config(self, keyword_args: Dict[str, any]):
941
- for attr in self.attributes():
942
- if attr.name not in keyword_args:
943
- continue
944
- if keyword_args.get(attr.name, None) is None:
945
- continue
946
- self.__setattr__(attr.name, keyword_args[attr.name])
947
-
948
- def _load_from_env(self):
949
- found = False
950
- for attr in self.attributes():
951
- if not attr.env:
952
- continue
953
- if attr.name in self._inner:
954
- continue
955
- value = os.environ.get(attr.env)
956
- if not value:
957
- continue
958
- self.__setattr__(attr.name, value)
959
- found = True
960
- if found:
961
- logger.debug('Loaded from environment')
962
-
963
- def _known_file_config_loader(self):
964
- if not self.profile and (self.is_any_auth_configured or self.host
965
- or self.azure_workspace_resource_id):
966
- # skip loading configuration file if there's any auth configured
967
- # directly as part of the Config() constructor.
968
- return
969
- config_file = self.config_file
970
- if not config_file:
971
- config_file = "~/.databrickscfg"
972
- config_path = pathlib.Path(config_file).expanduser()
973
- if not config_path.exists():
974
- logger.debug("%s does not exist", config_path)
975
- return
976
- ini_file = configparser.ConfigParser()
977
- ini_file.read(config_path)
978
- profile = self.profile
979
- has_explicit_profile = self.profile is not None
980
- # In Go SDK, we skip merging the profile with DEFAULT section, though Python's ConfigParser.items()
981
- # is returning profile key-value pairs _including those from DEFAULT_. This is not what we expect
982
- # from Unified Auth test suite at the moment. Hence, the private variable access.
983
- # See: https://docs.python.org/3/library/configparser.html#mapping-protocol-access
984
- if not has_explicit_profile and not ini_file.defaults():
985
- logger.debug(f'{config_path} has no DEFAULT profile configured')
986
- return
987
- if not has_explicit_profile:
988
- profile = "DEFAULT"
989
- profiles = ini_file._sections
990
- if ini_file.defaults():
991
- profiles['DEFAULT'] = ini_file.defaults()
992
- if profile not in profiles:
993
- raise ValueError(f'resolve: {config_path} has no {profile} profile configured')
994
- raw_config = profiles[profile]
995
- logger.info(f'loading {profile} profile from {config_file}: {", ".join(raw_config.keys())}')
996
- for k, v in raw_config.items():
997
- if k in self._inner:
998
- # don't overwrite a value previously set
999
- continue
1000
- self.__setattr__(k, v)
1001
-
1002
- def _validate(self):
1003
- auths_used = set()
1004
- for attr in Config.attributes():
1005
- if attr.name not in self._inner:
1006
- continue
1007
- if not attr.auth:
1008
- continue
1009
- auths_used.add(attr.auth)
1010
- if len(auths_used) <= 1:
1011
- return
1012
- if self.auth_type:
1013
- # client has auth preference set
1014
- return
1015
- names = " and ".join(sorted(auths_used))
1016
- raise ValueError(f'validate: more than one authorization method configured: {names}')
1017
-
1018
- def _init_auth(self):
1019
- try:
1020
- self._header_factory = self._credentials_provider(self)
1021
- self.auth_type = self._credentials_provider.auth_type()
1022
- if not self._header_factory:
1023
- raise ValueError('not configured')
1024
- except ValueError as e:
1025
- raise ValueError(f'{self._credentials_provider.auth_type()} auth: {e}') from e
1026
-
1027
- def __repr__(self):
1028
- return f'<{self.debug_string()}>'
1029
-
1030
- def copy(self):
1031
- """Creates a copy of the config object.
1032
- All the copies share most of their internal state (ie, shared reference to fields such as credential_provider).
1033
- Copies have their own instances of the following fields
1034
- - `_user_agent_other_info`
1035
- """
1036
- cpy: Config = copy.copy(self)
1037
- cpy._user_agent_other_info = copy.deepcopy(self._user_agent_other_info)
1038
- return cpy
1039
-
1040
20
 
1041
21
  class ApiClient:
1042
22
  _cfg: Config
@@ -1143,7 +123,8 @@ class ApiClient:
1143
123
  headers = {}
1144
124
  headers['User-Agent'] = self._user_agent_base
1145
125
  retryable = retried(timeout=timedelta(seconds=self._retry_timeout_seconds),
1146
- is_retryable=self._is_retryable)
126
+ is_retryable=self._is_retryable,
127
+ clock=self._cfg.clock)
1147
128
  return retryable(self._perform)(method,
1148
129
  path,
1149
130
  query=query,