databricks-sdk 0.17.0__py3-none-any.whl → 0.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of databricks-sdk might be problematic. Click here for more details.
- databricks/sdk/__init__.py +41 -5
- databricks/sdk/azure.py +17 -7
- databricks/sdk/clock.py +49 -0
- databricks/sdk/config.py +459 -0
- databricks/sdk/core.py +7 -1026
- databricks/sdk/credentials_provider.py +628 -0
- databricks/sdk/environments.py +72 -0
- databricks/sdk/errors/__init__.py +1 -1
- databricks/sdk/errors/mapper.py +5 -5
- databricks/sdk/mixins/workspace.py +3 -3
- databricks/sdk/oauth.py +2 -1
- databricks/sdk/retries.py +9 -5
- databricks/sdk/service/_internal.py +1 -1
- databricks/sdk/service/catalog.py +946 -82
- databricks/sdk/service/compute.py +106 -41
- databricks/sdk/service/files.py +145 -31
- databricks/sdk/service/iam.py +44 -40
- databricks/sdk/service/jobs.py +199 -20
- databricks/sdk/service/ml.py +33 -42
- databricks/sdk/service/oauth2.py +3 -4
- databricks/sdk/service/pipelines.py +51 -31
- databricks/sdk/service/serving.py +1 -2
- databricks/sdk/service/settings.py +377 -72
- databricks/sdk/service/sharing.py +3 -4
- databricks/sdk/service/sql.py +27 -19
- databricks/sdk/service/vectorsearch.py +13 -17
- databricks/sdk/service/workspace.py +20 -11
- databricks/sdk/version.py +1 -1
- {databricks_sdk-0.17.0.dist-info → databricks_sdk-0.19.0.dist-info}/METADATA +4 -4
- databricks_sdk-0.19.0.dist-info/RECORD +53 -0
- databricks_sdk-0.17.0.dist-info/RECORD +0 -49
- /databricks/sdk/errors/{mapping.py → platform.py} +0 -0
- {databricks_sdk-0.17.0.dist-info → databricks_sdk-0.19.0.dist-info}/LICENSE +0 -0
- {databricks_sdk-0.17.0.dist-info → databricks_sdk-0.19.0.dist-info}/NOTICE +0 -0
- {databricks_sdk-0.17.0.dist-info → databricks_sdk-0.19.0.dist-info}/WHEEL +0 -0
- {databricks_sdk-0.17.0.dist-info → databricks_sdk-0.19.0.dist-info}/top_level.txt +0 -0
databricks/sdk/__init__.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import databricks.sdk.core as client
|
|
2
2
|
import databricks.sdk.dbutils as dbutils
|
|
3
|
+
from databricks.sdk import azure
|
|
4
|
+
from databricks.sdk.credentials_provider import CredentialsProvider
|
|
3
5
|
from databricks.sdk.mixins.compute import ClustersExt
|
|
4
6
|
from databricks.sdk.mixins.files import DbfsExt
|
|
5
7
|
from databricks.sdk.mixins.workspace import WorkspaceExt
|
|
@@ -11,8 +13,8 @@ from databricks.sdk.service.catalog import (AccountMetastoreAssignmentsAPI,
|
|
|
11
13
|
ArtifactAllowlistsAPI, CatalogsAPI,
|
|
12
14
|
ConnectionsAPI,
|
|
13
15
|
ExternalLocationsAPI, FunctionsAPI,
|
|
14
|
-
GrantsAPI,
|
|
15
|
-
ModelVersionsAPI,
|
|
16
|
+
GrantsAPI, LakehouseMonitorsAPI,
|
|
17
|
+
MetastoresAPI, ModelVersionsAPI,
|
|
16
18
|
RegisteredModelsAPI, SchemasAPI,
|
|
17
19
|
StorageCredentialsAPI,
|
|
18
20
|
SystemSchemasAPI,
|
|
@@ -45,7 +47,7 @@ from databricks.sdk.service.provisioning import (CredentialsAPI,
|
|
|
45
47
|
EncryptionKeysAPI,
|
|
46
48
|
NetworksAPI, PrivateAccessAPI,
|
|
47
49
|
StorageAPI, VpcEndpointsAPI,
|
|
48
|
-
WorkspacesAPI)
|
|
50
|
+
Workspace, WorkspacesAPI)
|
|
49
51
|
from databricks.sdk.service.serving import AppsAPI, ServingEndpointsAPI
|
|
50
52
|
from databricks.sdk.service.settings import (AccountIpAccessListsAPI,
|
|
51
53
|
AccountSettingsAPI,
|
|
@@ -114,7 +116,7 @@ class WorkspaceClient:
|
|
|
114
116
|
debug_headers: bool = None,
|
|
115
117
|
product="unknown",
|
|
116
118
|
product_version="0.0.0",
|
|
117
|
-
credentials_provider:
|
|
119
|
+
credentials_provider: CredentialsProvider = None,
|
|
118
120
|
config: client.Config = None):
|
|
119
121
|
if not config:
|
|
120
122
|
config = client.Config(host=host,
|
|
@@ -172,6 +174,7 @@ class WorkspaceClient:
|
|
|
172
174
|
self._instance_profiles = InstanceProfilesAPI(self._api_client)
|
|
173
175
|
self._ip_access_lists = IpAccessListsAPI(self._api_client)
|
|
174
176
|
self._jobs = JobsAPI(self._api_client)
|
|
177
|
+
self._lakehouse_monitors = LakehouseMonitorsAPI(self._api_client)
|
|
175
178
|
self._lakeview = LakeviewAPI(self._api_client)
|
|
176
179
|
self._libraries = LibrariesAPI(self._api_client)
|
|
177
180
|
self._metastores = MetastoresAPI(self._api_client)
|
|
@@ -367,6 +370,11 @@ class WorkspaceClient:
|
|
|
367
370
|
"""The Jobs API allows you to create, edit, and delete jobs."""
|
|
368
371
|
return self._jobs
|
|
369
372
|
|
|
373
|
+
@property
|
|
374
|
+
def lakehouse_monitors(self) -> LakehouseMonitorsAPI:
|
|
375
|
+
"""A monitor computes and monitors data or model quality metrics for a table over time."""
|
|
376
|
+
return self._lakehouse_monitors
|
|
377
|
+
|
|
370
378
|
@property
|
|
371
379
|
def lakeview(self) -> LakeviewAPI:
|
|
372
380
|
"""These APIs provide specific management operations for Lakeview dashboards."""
|
|
@@ -585,7 +593,7 @@ class AccountClient:
|
|
|
585
593
|
debug_headers: bool = None,
|
|
586
594
|
product="unknown",
|
|
587
595
|
product_version="0.0.0",
|
|
588
|
-
credentials_provider:
|
|
596
|
+
credentials_provider: CredentialsProvider = None,
|
|
589
597
|
config: client.Config = None):
|
|
590
598
|
if not config:
|
|
591
599
|
config = client.Config(host=host,
|
|
@@ -772,5 +780,33 @@ class AccountClient:
|
|
|
772
780
|
"""These APIs manage workspaces for this account."""
|
|
773
781
|
return self._workspaces
|
|
774
782
|
|
|
783
|
+
def get_workspace_client(self, workspace: Workspace) -> WorkspaceClient:
|
|
784
|
+
"""Constructs a ``WorkspaceClient`` for the given workspace.
|
|
785
|
+
|
|
786
|
+
Returns a ``WorkspaceClient`` that is configured to use the same
|
|
787
|
+
credentials as this ``AccountClient``. The underlying config is
|
|
788
|
+
copied from this ``AccountClient``, but the ``host`` and
|
|
789
|
+
``azure_workspace_resource_id`` are overridden to match the
|
|
790
|
+
given workspace, and the ``account_id`` field is cleared.
|
|
791
|
+
|
|
792
|
+
Usage:
|
|
793
|
+
|
|
794
|
+
.. code-block::
|
|
795
|
+
|
|
796
|
+
wss = list(a.workspaces.list())
|
|
797
|
+
if len(wss) == 0:
|
|
798
|
+
pytest.skip("no workspaces")
|
|
799
|
+
w = a.get_workspace_client(wss[0])
|
|
800
|
+
assert w.current_user.me().active
|
|
801
|
+
|
|
802
|
+
:param workspace: The workspace to construct a client for.
|
|
803
|
+
:return: A ``WorkspaceClient`` for the given workspace.
|
|
804
|
+
"""
|
|
805
|
+
config = self._config.copy()
|
|
806
|
+
config.host = config.environment.deployment_url(workspace.deployment_name)
|
|
807
|
+
config.azure_workspace_resource_id = azure.get_azure_resource_id(workspace)
|
|
808
|
+
config.account_id = None
|
|
809
|
+
return WorkspaceClient(config=config)
|
|
810
|
+
|
|
775
811
|
def __repr__(self):
|
|
776
812
|
return f"AccountClient(account_id='{self._config.account_id}', auth_type='{self._config.auth_type}', ...)"
|
databricks/sdk/azure.py
CHANGED
|
@@ -2,6 +2,7 @@ from dataclasses import dataclass
|
|
|
2
2
|
from typing import Dict
|
|
3
3
|
|
|
4
4
|
from .oauth import TokenSource
|
|
5
|
+
from .service.provisioning import Workspace
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
@dataclass
|
|
@@ -15,19 +16,15 @@ class AzureEnvironment:
|
|
|
15
16
|
ARM_DATABRICKS_RESOURCE_ID = "2ff814a6-3304-4ab8-85cb-cd0e6f879c1d"
|
|
16
17
|
|
|
17
18
|
ENVIRONMENTS = dict(
|
|
18
|
-
PUBLIC=AzureEnvironment(name="
|
|
19
|
+
PUBLIC=AzureEnvironment(name="PUBLIC",
|
|
19
20
|
service_management_endpoint="https://management.core.windows.net/",
|
|
20
21
|
resource_manager_endpoint="https://management.azure.com/",
|
|
21
22
|
active_directory_endpoint="https://login.microsoftonline.com/"),
|
|
22
|
-
|
|
23
|
-
service_management_endpoint="https://management.core.cloudapi.de/",
|
|
24
|
-
resource_manager_endpoint="https://management.microsoftazure.de/",
|
|
25
|
-
active_directory_endpoint="https://login.microsoftonline.de/"),
|
|
26
|
-
USGOVERNMENT=AzureEnvironment(name="AzureUSGovernmentCloud",
|
|
23
|
+
USGOVERNMENT=AzureEnvironment(name="USGOVERNMENT",
|
|
27
24
|
service_management_endpoint="https://management.core.usgovcloudapi.net/",
|
|
28
25
|
resource_manager_endpoint="https://management.usgovcloudapi.net/",
|
|
29
26
|
active_directory_endpoint="https://login.microsoftonline.us/"),
|
|
30
|
-
CHINA=AzureEnvironment(name="
|
|
27
|
+
CHINA=AzureEnvironment(name="CHINA",
|
|
31
28
|
service_management_endpoint="https://management.core.chinacloudapi.cn/",
|
|
32
29
|
resource_manager_endpoint="https://management.chinacloudapi.cn/",
|
|
33
30
|
active_directory_endpoint="https://login.chinacloudapi.cn/"),
|
|
@@ -42,3 +39,16 @@ def add_workspace_id_header(cfg: 'Config', headers: Dict[str, str]):
|
|
|
42
39
|
def add_sp_management_token(token_source: 'TokenSource', headers: Dict[str, str]):
|
|
43
40
|
mgmt_token = token_source.token()
|
|
44
41
|
headers['X-Databricks-Azure-SP-Management-Token'] = mgmt_token.access_token
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def get_azure_resource_id(workspace: Workspace):
|
|
45
|
+
"""
|
|
46
|
+
Returns the Azure Resource ID for the given workspace, if it is an Azure workspace.
|
|
47
|
+
:param workspace:
|
|
48
|
+
:return:
|
|
49
|
+
"""
|
|
50
|
+
if workspace.azure_workspace_info is None:
|
|
51
|
+
return None
|
|
52
|
+
return (f'/subscriptions/{workspace.azure_workspace_info.subscription_id}'
|
|
53
|
+
f'/resourceGroups/{workspace.azure_workspace_info.resource_group}'
|
|
54
|
+
f'/providers/Microsoft.Databricks/workspaces/{workspace.workspace_name}')
|
databricks/sdk/clock.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class Clock(metaclass=abc.ABCMeta):
|
|
6
|
+
|
|
7
|
+
@abc.abstractmethod
|
|
8
|
+
def time(self) -> float:
|
|
9
|
+
"""
|
|
10
|
+
Return the current time in seconds since the Epoch.
|
|
11
|
+
Fractions of a second may be present if the system clock provides them.
|
|
12
|
+
|
|
13
|
+
:return: The current time in seconds since the Epoch.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
@abc.abstractmethod
|
|
17
|
+
def sleep(self, seconds: float) -> None:
|
|
18
|
+
"""
|
|
19
|
+
Delay execution for a given number of seconds. The argument may be
|
|
20
|
+
a floating point number for subsecond precision.
|
|
21
|
+
|
|
22
|
+
:param seconds: The duration to sleep in seconds.
|
|
23
|
+
:return:
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class RealClock(Clock):
|
|
28
|
+
"""
|
|
29
|
+
A real clock that uses the ``time`` module to get the current time and sleep.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def time(self) -> float:
|
|
33
|
+
"""
|
|
34
|
+
Return the current time in seconds since the Epoch.
|
|
35
|
+
Fractions of a second may be present if the system clock provides them.
|
|
36
|
+
|
|
37
|
+
:return: The current time in seconds since the Epoch.
|
|
38
|
+
"""
|
|
39
|
+
return time.time()
|
|
40
|
+
|
|
41
|
+
def sleep(self, seconds: float) -> None:
|
|
42
|
+
"""
|
|
43
|
+
Delay execution for a given number of seconds. The argument may be
|
|
44
|
+
a floating point number for subsecond precision.
|
|
45
|
+
|
|
46
|
+
:param seconds: The duration to sleep in seconds.
|
|
47
|
+
:return:
|
|
48
|
+
"""
|
|
49
|
+
time.sleep(seconds)
|
databricks/sdk/config.py
ADDED
|
@@ -0,0 +1,459 @@
|
|
|
1
|
+
import configparser
|
|
2
|
+
import copy
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import pathlib
|
|
6
|
+
import platform
|
|
7
|
+
import sys
|
|
8
|
+
import urllib.parse
|
|
9
|
+
from typing import Dict, Iterable, Optional
|
|
10
|
+
|
|
11
|
+
import requests
|
|
12
|
+
|
|
13
|
+
from .azure import AzureEnvironment
|
|
14
|
+
from .clock import Clock, RealClock
|
|
15
|
+
from .credentials_provider import CredentialsProvider, DefaultCredentials
|
|
16
|
+
from .environments import (ALL_ENVS, DEFAULT_ENVIRONMENT, Cloud,
|
|
17
|
+
DatabricksEnvironment)
|
|
18
|
+
from .oauth import OidcEndpoints
|
|
19
|
+
from .version import __version__
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger('databricks.sdk')
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ConfigAttribute:
|
|
25
|
+
""" Configuration attribute metadata and descriptor protocols. """
|
|
26
|
+
|
|
27
|
+
# name and transform are discovered from Config.__new__
|
|
28
|
+
name: str = None
|
|
29
|
+
transform: type = str
|
|
30
|
+
|
|
31
|
+
def __init__(self, env: str = None, auth: str = None, sensitive: bool = False):
|
|
32
|
+
self.env = env
|
|
33
|
+
self.auth = auth
|
|
34
|
+
self.sensitive = sensitive
|
|
35
|
+
|
|
36
|
+
def __get__(self, cfg: 'Config', owner):
|
|
37
|
+
if not cfg:
|
|
38
|
+
return None
|
|
39
|
+
return cfg._inner.get(self.name, None)
|
|
40
|
+
|
|
41
|
+
def __set__(self, cfg: 'Config', value: any):
|
|
42
|
+
cfg._inner[self.name] = self.transform(value)
|
|
43
|
+
|
|
44
|
+
def __repr__(self) -> str:
|
|
45
|
+
return f"<ConfigAttribute '{self.name}' {self.transform.__name__}>"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class Config:
|
|
49
|
+
host: str = ConfigAttribute(env='DATABRICKS_HOST')
|
|
50
|
+
account_id: str = ConfigAttribute(env='DATABRICKS_ACCOUNT_ID')
|
|
51
|
+
token: str = ConfigAttribute(env='DATABRICKS_TOKEN', auth='pat', sensitive=True)
|
|
52
|
+
username: str = ConfigAttribute(env='DATABRICKS_USERNAME', auth='basic')
|
|
53
|
+
password: str = ConfigAttribute(env='DATABRICKS_PASSWORD', auth='basic', sensitive=True)
|
|
54
|
+
client_id: str = ConfigAttribute(env='DATABRICKS_CLIENT_ID', auth='oauth')
|
|
55
|
+
client_secret: str = ConfigAttribute(env='DATABRICKS_CLIENT_SECRET', auth='oauth', sensitive=True)
|
|
56
|
+
profile: str = ConfigAttribute(env='DATABRICKS_CONFIG_PROFILE')
|
|
57
|
+
config_file: str = ConfigAttribute(env='DATABRICKS_CONFIG_FILE')
|
|
58
|
+
google_service_account: str = ConfigAttribute(env='DATABRICKS_GOOGLE_SERVICE_ACCOUNT', auth='google')
|
|
59
|
+
google_credentials: str = ConfigAttribute(env='GOOGLE_CREDENTIALS', auth='google', sensitive=True)
|
|
60
|
+
azure_workspace_resource_id: str = ConfigAttribute(env='DATABRICKS_AZURE_RESOURCE_ID', auth='azure')
|
|
61
|
+
azure_use_msi: bool = ConfigAttribute(env='ARM_USE_MSI', auth='azure')
|
|
62
|
+
azure_client_secret: str = ConfigAttribute(env='ARM_CLIENT_SECRET', auth='azure', sensitive=True)
|
|
63
|
+
azure_client_id: str = ConfigAttribute(env='ARM_CLIENT_ID', auth='azure')
|
|
64
|
+
azure_tenant_id: str = ConfigAttribute(env='ARM_TENANT_ID', auth='azure')
|
|
65
|
+
azure_environment: str = ConfigAttribute(env='ARM_ENVIRONMENT')
|
|
66
|
+
databricks_cli_path: str = ConfigAttribute(env='DATABRICKS_CLI_PATH')
|
|
67
|
+
auth_type: str = ConfigAttribute(env='DATABRICKS_AUTH_TYPE')
|
|
68
|
+
cluster_id: str = ConfigAttribute(env='DATABRICKS_CLUSTER_ID')
|
|
69
|
+
warehouse_id: str = ConfigAttribute(env='DATABRICKS_WAREHOUSE_ID')
|
|
70
|
+
skip_verify: bool = ConfigAttribute()
|
|
71
|
+
http_timeout_seconds: float = ConfigAttribute()
|
|
72
|
+
debug_truncate_bytes: int = ConfigAttribute(env='DATABRICKS_DEBUG_TRUNCATE_BYTES')
|
|
73
|
+
debug_headers: bool = ConfigAttribute(env='DATABRICKS_DEBUG_HEADERS')
|
|
74
|
+
rate_limit: int = ConfigAttribute(env='DATABRICKS_RATE_LIMIT')
|
|
75
|
+
retry_timeout_seconds: int = ConfigAttribute()
|
|
76
|
+
metadata_service_url = ConfigAttribute(env='DATABRICKS_METADATA_SERVICE_URL',
|
|
77
|
+
auth='metadata-service',
|
|
78
|
+
sensitive=True)
|
|
79
|
+
max_connection_pools: int = ConfigAttribute()
|
|
80
|
+
max_connections_per_pool: int = ConfigAttribute()
|
|
81
|
+
databricks_environment: Optional[DatabricksEnvironment] = None
|
|
82
|
+
|
|
83
|
+
def __init__(self,
|
|
84
|
+
*,
|
|
85
|
+
credentials_provider: CredentialsProvider = None,
|
|
86
|
+
product="unknown",
|
|
87
|
+
product_version="0.0.0",
|
|
88
|
+
clock: Clock = None,
|
|
89
|
+
**kwargs):
|
|
90
|
+
self._inner = {}
|
|
91
|
+
self._user_agent_other_info = []
|
|
92
|
+
self._credentials_provider = credentials_provider if credentials_provider else DefaultCredentials()
|
|
93
|
+
if 'databricks_environment' in kwargs:
|
|
94
|
+
self.databricks_environment = kwargs['databricks_environment']
|
|
95
|
+
del kwargs['databricks_environment']
|
|
96
|
+
self._clock = clock if clock is not None else RealClock()
|
|
97
|
+
try:
|
|
98
|
+
self._set_inner_config(kwargs)
|
|
99
|
+
self._load_from_env()
|
|
100
|
+
self._known_file_config_loader()
|
|
101
|
+
self._fix_host_if_needed()
|
|
102
|
+
self._validate()
|
|
103
|
+
self._init_auth()
|
|
104
|
+
self._product = product
|
|
105
|
+
self._product_version = product_version
|
|
106
|
+
except ValueError as e:
|
|
107
|
+
message = self.wrap_debug_info(str(e))
|
|
108
|
+
raise ValueError(message) from e
|
|
109
|
+
|
|
110
|
+
def wrap_debug_info(self, message: str) -> str:
|
|
111
|
+
debug_string = self.debug_string()
|
|
112
|
+
if debug_string:
|
|
113
|
+
message = f'{message.rstrip(".")}. {debug_string}'
|
|
114
|
+
return message
|
|
115
|
+
|
|
116
|
+
@staticmethod
|
|
117
|
+
def parse_dsn(dsn: str) -> 'Config':
|
|
118
|
+
uri = urllib.parse.urlparse(dsn)
|
|
119
|
+
if uri.scheme != 'databricks':
|
|
120
|
+
raise ValueError(f'Expected databricks:// scheme, got {uri.scheme}://')
|
|
121
|
+
kwargs = {'host': f'https://{uri.hostname}'}
|
|
122
|
+
if uri.username:
|
|
123
|
+
kwargs['username'] = uri.username
|
|
124
|
+
if uri.password:
|
|
125
|
+
kwargs['password'] = uri.password
|
|
126
|
+
query = dict(urllib.parse.parse_qsl(uri.query))
|
|
127
|
+
for attr in Config.attributes():
|
|
128
|
+
if attr.name not in query:
|
|
129
|
+
continue
|
|
130
|
+
kwargs[attr.name] = query[attr.name]
|
|
131
|
+
return Config(**kwargs)
|
|
132
|
+
|
|
133
|
+
def authenticate(self) -> Dict[str, str]:
|
|
134
|
+
""" Returns a list of fresh authentication headers """
|
|
135
|
+
return self._header_factory()
|
|
136
|
+
|
|
137
|
+
def as_dict(self) -> dict:
|
|
138
|
+
return self._inner
|
|
139
|
+
|
|
140
|
+
def _get_azure_environment_name(self) -> str:
|
|
141
|
+
if not self.azure_environment:
|
|
142
|
+
return "PUBLIC"
|
|
143
|
+
env = self.azure_environment.upper()
|
|
144
|
+
# Compatibility with older versions of the SDK that allowed users to specify AzurePublicCloud or AzureChinaCloud
|
|
145
|
+
if env.startswith("AZURE"):
|
|
146
|
+
env = env[len("AZURE"):]
|
|
147
|
+
if env.endswith("CLOUD"):
|
|
148
|
+
env = env[:-len("CLOUD")]
|
|
149
|
+
return env
|
|
150
|
+
|
|
151
|
+
@property
|
|
152
|
+
def environment(self) -> DatabricksEnvironment:
|
|
153
|
+
"""Returns the environment based on configuration."""
|
|
154
|
+
if self.databricks_environment:
|
|
155
|
+
return self.databricks_environment
|
|
156
|
+
if self.host:
|
|
157
|
+
for environment in ALL_ENVS:
|
|
158
|
+
if self.host.endswith(environment.dns_zone):
|
|
159
|
+
return environment
|
|
160
|
+
if self.azure_workspace_resource_id:
|
|
161
|
+
azure_env = self._get_azure_environment_name()
|
|
162
|
+
for environment in ALL_ENVS:
|
|
163
|
+
if environment.cloud != Cloud.AZURE:
|
|
164
|
+
continue
|
|
165
|
+
if environment.azure_environment.name != azure_env:
|
|
166
|
+
continue
|
|
167
|
+
if environment.dns_zone.startswith(".dev") or environment.dns_zone.startswith(".staging"):
|
|
168
|
+
continue
|
|
169
|
+
return environment
|
|
170
|
+
return DEFAULT_ENVIRONMENT
|
|
171
|
+
|
|
172
|
+
@property
|
|
173
|
+
def is_azure(self) -> bool:
|
|
174
|
+
return self.environment.cloud == Cloud.AZURE
|
|
175
|
+
|
|
176
|
+
@property
|
|
177
|
+
def is_gcp(self) -> bool:
|
|
178
|
+
return self.environment.cloud == Cloud.GCP
|
|
179
|
+
|
|
180
|
+
@property
|
|
181
|
+
def is_aws(self) -> bool:
|
|
182
|
+
return self.environment.cloud == Cloud.AWS
|
|
183
|
+
|
|
184
|
+
@property
|
|
185
|
+
def is_account_client(self) -> bool:
|
|
186
|
+
if not self.host:
|
|
187
|
+
return False
|
|
188
|
+
return self.host.startswith("https://accounts.") or self.host.startswith("https://accounts-dod.")
|
|
189
|
+
|
|
190
|
+
@property
|
|
191
|
+
def arm_environment(self) -> AzureEnvironment:
|
|
192
|
+
return self.environment.azure_environment
|
|
193
|
+
|
|
194
|
+
@property
|
|
195
|
+
def effective_azure_login_app_id(self):
|
|
196
|
+
return self.environment.azure_application_id
|
|
197
|
+
|
|
198
|
+
@property
|
|
199
|
+
def hostname(self) -> str:
|
|
200
|
+
url = urllib.parse.urlparse(self.host)
|
|
201
|
+
return url.netloc
|
|
202
|
+
|
|
203
|
+
@property
|
|
204
|
+
def is_any_auth_configured(self) -> bool:
|
|
205
|
+
for attr in Config.attributes():
|
|
206
|
+
if not attr.auth:
|
|
207
|
+
continue
|
|
208
|
+
value = self._inner.get(attr.name, None)
|
|
209
|
+
if value:
|
|
210
|
+
return True
|
|
211
|
+
return False
|
|
212
|
+
|
|
213
|
+
@property
|
|
214
|
+
def user_agent(self):
|
|
215
|
+
""" Returns User-Agent header used by this SDK """
|
|
216
|
+
py_version = platform.python_version()
|
|
217
|
+
os_name = platform.uname().system.lower()
|
|
218
|
+
|
|
219
|
+
ua = [
|
|
220
|
+
f"{self._product}/{self._product_version}", f"databricks-sdk-py/{__version__}",
|
|
221
|
+
f"python/{py_version}", f"os/{os_name}", f"auth/{self.auth_type}",
|
|
222
|
+
]
|
|
223
|
+
if len(self._user_agent_other_info) > 0:
|
|
224
|
+
ua.append(' '.join(self._user_agent_other_info))
|
|
225
|
+
if len(self._upstream_user_agent) > 0:
|
|
226
|
+
ua.append(self._upstream_user_agent)
|
|
227
|
+
if 'DATABRICKS_RUNTIME_VERSION' in os.environ:
|
|
228
|
+
runtime_version = os.environ['DATABRICKS_RUNTIME_VERSION']
|
|
229
|
+
if runtime_version != '':
|
|
230
|
+
runtime_version = self._sanitize_header_value(runtime_version)
|
|
231
|
+
ua.append(f'runtime/{runtime_version}')
|
|
232
|
+
|
|
233
|
+
return ' '.join(ua)
|
|
234
|
+
|
|
235
|
+
@staticmethod
|
|
236
|
+
def _sanitize_header_value(value: str) -> str:
|
|
237
|
+
value = value.replace(' ', '-')
|
|
238
|
+
value = value.replace('/', '-')
|
|
239
|
+
return value
|
|
240
|
+
|
|
241
|
+
@property
|
|
242
|
+
def _upstream_user_agent(self) -> str:
|
|
243
|
+
product = os.environ.get('DATABRICKS_SDK_UPSTREAM', None)
|
|
244
|
+
product_version = os.environ.get('DATABRICKS_SDK_UPSTREAM_VERSION', None)
|
|
245
|
+
if product is not None and product_version is not None:
|
|
246
|
+
return f"upstream/{product} upstream-version/{product_version}"
|
|
247
|
+
return ""
|
|
248
|
+
|
|
249
|
+
def with_user_agent_extra(self, key: str, value: str) -> 'Config':
|
|
250
|
+
self._user_agent_other_info.append(f"{key}/{value}")
|
|
251
|
+
return self
|
|
252
|
+
|
|
253
|
+
@property
|
|
254
|
+
def oidc_endpoints(self) -> Optional[OidcEndpoints]:
|
|
255
|
+
self._fix_host_if_needed()
|
|
256
|
+
if not self.host:
|
|
257
|
+
return None
|
|
258
|
+
if self.is_azure and self.azure_client_id:
|
|
259
|
+
# Retrieve authorize endpoint to retrieve token endpoint after
|
|
260
|
+
res = requests.get(f'{self.host}/oidc/oauth2/v2.0/authorize', allow_redirects=False)
|
|
261
|
+
real_auth_url = res.headers.get('location')
|
|
262
|
+
if not real_auth_url:
|
|
263
|
+
return None
|
|
264
|
+
return OidcEndpoints(authorization_endpoint=real_auth_url,
|
|
265
|
+
token_endpoint=real_auth_url.replace('/authorize', '/token'))
|
|
266
|
+
if self.is_account_client and self.account_id:
|
|
267
|
+
prefix = f'{self.host}/oidc/accounts/{self.account_id}'
|
|
268
|
+
return OidcEndpoints(authorization_endpoint=f'{prefix}/v1/authorize',
|
|
269
|
+
token_endpoint=f'{prefix}/v1/token')
|
|
270
|
+
oidc = f'{self.host}/oidc/.well-known/oauth-authorization-server'
|
|
271
|
+
res = requests.get(oidc)
|
|
272
|
+
if res.status_code != 200:
|
|
273
|
+
return None
|
|
274
|
+
auth_metadata = res.json()
|
|
275
|
+
return OidcEndpoints(authorization_endpoint=auth_metadata.get('authorization_endpoint'),
|
|
276
|
+
token_endpoint=auth_metadata.get('token_endpoint'))
|
|
277
|
+
|
|
278
|
+
def debug_string(self) -> str:
|
|
279
|
+
""" Returns log-friendly representation of configured attributes """
|
|
280
|
+
buf = []
|
|
281
|
+
attrs_used = []
|
|
282
|
+
envs_used = []
|
|
283
|
+
for attr in Config.attributes():
|
|
284
|
+
if attr.env and os.environ.get(attr.env):
|
|
285
|
+
envs_used.append(attr.env)
|
|
286
|
+
value = getattr(self, attr.name)
|
|
287
|
+
if not value:
|
|
288
|
+
continue
|
|
289
|
+
safe = '***' if attr.sensitive else f'{value}'
|
|
290
|
+
attrs_used.append(f'{attr.name}={safe}')
|
|
291
|
+
if attrs_used:
|
|
292
|
+
buf.append(f'Config: {", ".join(attrs_used)}')
|
|
293
|
+
if envs_used:
|
|
294
|
+
buf.append(f'Env: {", ".join(envs_used)}')
|
|
295
|
+
return '. '.join(buf)
|
|
296
|
+
|
|
297
|
+
def to_dict(self) -> Dict[str, any]:
|
|
298
|
+
return self._inner
|
|
299
|
+
|
|
300
|
+
@property
|
|
301
|
+
def sql_http_path(self) -> Optional[str]:
|
|
302
|
+
"""(Experimental) Return HTTP path for SQL Drivers.
|
|
303
|
+
|
|
304
|
+
If `cluster_id` or `warehouse_id` are configured, return a valid HTTP Path argument
|
|
305
|
+
used in construction of JDBC/ODBC DSN string.
|
|
306
|
+
|
|
307
|
+
See https://docs.databricks.com/integrations/jdbc-odbc-bi.html
|
|
308
|
+
"""
|
|
309
|
+
if (not self.cluster_id) and (not self.warehouse_id):
|
|
310
|
+
return None
|
|
311
|
+
if self.cluster_id and self.warehouse_id:
|
|
312
|
+
raise ValueError('cannot have both cluster_id and warehouse_id')
|
|
313
|
+
headers = self.authenticate()
|
|
314
|
+
headers['User-Agent'] = f'{self.user_agent} sdk-feature/sql-http-path'
|
|
315
|
+
if self.cluster_id:
|
|
316
|
+
response = requests.get(f"{self.host}/api/2.0/preview/scim/v2/Me", headers=headers)
|
|
317
|
+
# get workspace ID from the response header
|
|
318
|
+
workspace_id = response.headers.get('x-databricks-org-id')
|
|
319
|
+
return f'sql/protocolv1/o/{workspace_id}/{self.cluster_id}'
|
|
320
|
+
if self.warehouse_id:
|
|
321
|
+
return f'/sql/1.0/warehouses/{self.warehouse_id}'
|
|
322
|
+
|
|
323
|
+
@property
|
|
324
|
+
def clock(self) -> Clock:
|
|
325
|
+
return self._clock
|
|
326
|
+
|
|
327
|
+
@classmethod
|
|
328
|
+
def attributes(cls) -> Iterable[ConfigAttribute]:
|
|
329
|
+
""" Returns a list of Databricks SDK configuration metadata """
|
|
330
|
+
if hasattr(cls, '_attributes'):
|
|
331
|
+
return cls._attributes
|
|
332
|
+
if sys.version_info[1] >= 10:
|
|
333
|
+
import inspect
|
|
334
|
+
anno = inspect.get_annotations(cls)
|
|
335
|
+
else:
|
|
336
|
+
# Python 3.7 compatibility: getting type hints require extra hop, as described in
|
|
337
|
+
# "Accessing The Annotations Dict Of An Object In Python 3.9 And Older" section of
|
|
338
|
+
# https://docs.python.org/3/howto/annotations.html
|
|
339
|
+
anno = cls.__dict__['__annotations__']
|
|
340
|
+
attrs = []
|
|
341
|
+
for name, v in cls.__dict__.items():
|
|
342
|
+
if type(v) != ConfigAttribute:
|
|
343
|
+
continue
|
|
344
|
+
v.name = name
|
|
345
|
+
v.transform = anno.get(name, str)
|
|
346
|
+
attrs.append(v)
|
|
347
|
+
cls._attributes = attrs
|
|
348
|
+
return cls._attributes
|
|
349
|
+
|
|
350
|
+
def _fix_host_if_needed(self):
|
|
351
|
+
if not self.host:
|
|
352
|
+
return
|
|
353
|
+
# fix url to remove trailing slash
|
|
354
|
+
o = urllib.parse.urlparse(self.host)
|
|
355
|
+
if not o.hostname:
|
|
356
|
+
# only hostname is specified
|
|
357
|
+
self.host = f"https://{self.host}"
|
|
358
|
+
else:
|
|
359
|
+
self.host = f"{o.scheme}://{o.netloc}"
|
|
360
|
+
|
|
361
|
+
def _set_inner_config(self, keyword_args: Dict[str, any]):
|
|
362
|
+
for attr in self.attributes():
|
|
363
|
+
if attr.name not in keyword_args:
|
|
364
|
+
continue
|
|
365
|
+
if keyword_args.get(attr.name, None) is None:
|
|
366
|
+
continue
|
|
367
|
+
self.__setattr__(attr.name, keyword_args[attr.name])
|
|
368
|
+
|
|
369
|
+
def _load_from_env(self):
|
|
370
|
+
found = False
|
|
371
|
+
for attr in self.attributes():
|
|
372
|
+
if not attr.env:
|
|
373
|
+
continue
|
|
374
|
+
if attr.name in self._inner:
|
|
375
|
+
continue
|
|
376
|
+
value = os.environ.get(attr.env)
|
|
377
|
+
if not value:
|
|
378
|
+
continue
|
|
379
|
+
self.__setattr__(attr.name, value)
|
|
380
|
+
found = True
|
|
381
|
+
if found:
|
|
382
|
+
logger.debug('Loaded from environment')
|
|
383
|
+
|
|
384
|
+
def _known_file_config_loader(self):
|
|
385
|
+
if not self.profile and (self.is_any_auth_configured or self.host
|
|
386
|
+
or self.azure_workspace_resource_id):
|
|
387
|
+
# skip loading configuration file if there's any auth configured
|
|
388
|
+
# directly as part of the Config() constructor.
|
|
389
|
+
return
|
|
390
|
+
config_file = self.config_file
|
|
391
|
+
if not config_file:
|
|
392
|
+
config_file = "~/.databrickscfg"
|
|
393
|
+
config_path = pathlib.Path(config_file).expanduser()
|
|
394
|
+
if not config_path.exists():
|
|
395
|
+
logger.debug("%s does not exist", config_path)
|
|
396
|
+
return
|
|
397
|
+
ini_file = configparser.ConfigParser()
|
|
398
|
+
ini_file.read(config_path)
|
|
399
|
+
profile = self.profile
|
|
400
|
+
has_explicit_profile = self.profile is not None
|
|
401
|
+
# In Go SDK, we skip merging the profile with DEFAULT section, though Python's ConfigParser.items()
|
|
402
|
+
# is returning profile key-value pairs _including those from DEFAULT_. This is not what we expect
|
|
403
|
+
# from Unified Auth test suite at the moment. Hence, the private variable access.
|
|
404
|
+
# See: https://docs.python.org/3/library/configparser.html#mapping-protocol-access
|
|
405
|
+
if not has_explicit_profile and not ini_file.defaults():
|
|
406
|
+
logger.debug(f'{config_path} has no DEFAULT profile configured')
|
|
407
|
+
return
|
|
408
|
+
if not has_explicit_profile:
|
|
409
|
+
profile = "DEFAULT"
|
|
410
|
+
profiles = ini_file._sections
|
|
411
|
+
if ini_file.defaults():
|
|
412
|
+
profiles['DEFAULT'] = ini_file.defaults()
|
|
413
|
+
if profile not in profiles:
|
|
414
|
+
raise ValueError(f'resolve: {config_path} has no {profile} profile configured')
|
|
415
|
+
raw_config = profiles[profile]
|
|
416
|
+
logger.info(f'loading {profile} profile from {config_file}: {", ".join(raw_config.keys())}')
|
|
417
|
+
for k, v in raw_config.items():
|
|
418
|
+
if k in self._inner:
|
|
419
|
+
# don't overwrite a value previously set
|
|
420
|
+
continue
|
|
421
|
+
self.__setattr__(k, v)
|
|
422
|
+
|
|
423
|
+
def _validate(self):
|
|
424
|
+
auths_used = set()
|
|
425
|
+
for attr in Config.attributes():
|
|
426
|
+
if attr.name not in self._inner:
|
|
427
|
+
continue
|
|
428
|
+
if not attr.auth:
|
|
429
|
+
continue
|
|
430
|
+
auths_used.add(attr.auth)
|
|
431
|
+
if len(auths_used) <= 1:
|
|
432
|
+
return
|
|
433
|
+
if self.auth_type:
|
|
434
|
+
# client has auth preference set
|
|
435
|
+
return
|
|
436
|
+
names = " and ".join(sorted(auths_used))
|
|
437
|
+
raise ValueError(f'validate: more than one authorization method configured: {names}')
|
|
438
|
+
|
|
439
|
+
def _init_auth(self):
|
|
440
|
+
try:
|
|
441
|
+
self._header_factory = self._credentials_provider(self)
|
|
442
|
+
self.auth_type = self._credentials_provider.auth_type()
|
|
443
|
+
if not self._header_factory:
|
|
444
|
+
raise ValueError('not configured')
|
|
445
|
+
except ValueError as e:
|
|
446
|
+
raise ValueError(f'{self._credentials_provider.auth_type()} auth: {e}') from e
|
|
447
|
+
|
|
448
|
+
def __repr__(self):
|
|
449
|
+
return f'<{self.debug_string()}>'
|
|
450
|
+
|
|
451
|
+
def copy(self):
|
|
452
|
+
"""Creates a copy of the config object.
|
|
453
|
+
All the copies share most of their internal state (ie, shared reference to fields such as credential_provider).
|
|
454
|
+
Copies have their own instances of the following fields
|
|
455
|
+
- `_user_agent_other_info`
|
|
456
|
+
"""
|
|
457
|
+
cpy: Config = copy.copy(self)
|
|
458
|
+
cpy._user_agent_other_info = copy.deepcopy(self._user_agent_other_info)
|
|
459
|
+
return cpy
|