databricks-sql-connector 4.1.3__tar.gz → 4.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/CHANGELOG.md +10 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/PKG-INFO +7 -1
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/README.md +6 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/pyproject.toml +2 -1
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/__init__.py +4 -1
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/auth/auth.py +22 -6
- databricks_sql_connector-4.2.0/src/databricks/sql/auth/auth_utils.py +64 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/auth/common.py +2 -0
- databricks_sql_connector-4.2.0/src/databricks/sql/auth/token_federation.py +206 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/client.py +315 -9
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/common/unified_http_client.py +5 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/exc.py +17 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/session.py +21 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/telemetry/models/event.py +108 -1
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/telemetry/telemetry_client.py +1 -1
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/LICENSE +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/__init__.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/auth/__init__.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/auth/authenticators.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/auth/endpoint.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/auth/oauth.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/auth/oauth_http_handler.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/auth/retry.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/auth/thrift_http_client.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/backend/databricks_client.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/backend/sea/backend.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/backend/sea/models/__init__.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/backend/sea/models/base.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/backend/sea/models/requests.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/backend/sea/models/responses.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/backend/sea/queue.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/backend/sea/result_set.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/backend/sea/utils/constants.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/backend/sea/utils/conversion.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/backend/sea/utils/filters.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/backend/sea/utils/http_client.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/backend/sea/utils/normalize.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/backend/thrift_backend.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/backend/types.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/backend/utils/__init__.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/backend/utils/guid_utils.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/cloudfetch/download_manager.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/cloudfetch/downloader.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/common/feature_flag.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/common/http.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/common/http_utils.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/experimental/__init__.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/experimental/oauth_persistence.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/parameters/__init__.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/parameters/native.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/parameters/py.typed +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/py.typed +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/result_set.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/telemetry/latency_logger.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/telemetry/models/endpoint_models.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/telemetry/models/enums.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/telemetry/models/frontend_logs.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/telemetry/utils.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/thrift_api/TCLIService/TCLIService-remote +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/thrift_api/TCLIService/TCLIService.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/thrift_api/TCLIService/__init__.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/thrift_api/TCLIService/constants.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/thrift_api/TCLIService/ttypes.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/thrift_api/__init__.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/types.py +0 -0
- {databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/utils.py +0 -0
|
@@ -1,5 +1,15 @@
|
|
|
1
1
|
# Release History
|
|
2
2
|
|
|
3
|
+
# 4.2.0 (2025-11-14)
|
|
4
|
+
- Add multi-statement transaction support (databricks/databricks-sql-python#704 by @jayantsing-db)
|
|
5
|
+
- Add a workflow to parallelise the E2E tests (databricks/databricks-sql-python#697 by @msrathore-db)
|
|
6
|
+
- Bring Python telemetry event model consistent with JDBC (databricks/databricks-sql-python#701 by @nikhilsuri-db)
|
|
7
|
+
|
|
8
|
+
# 4.1.4 (2025-10-15)
|
|
9
|
+
- Add support for Token Federation (databricks/databricks-sql-python#691 by @madhav-db)
|
|
10
|
+
- Add metric view support (databricks/databricks-sql-python#688 by @shivam2680)
|
|
11
|
+
- Increased time limit for long running queries (databricks/databricks-sql-python#686 by @jprakash-db)
|
|
12
|
+
|
|
3
13
|
# 4.1.3 (2025-09-17)
|
|
4
14
|
- Query tags integration (databricks/databricks-sql-python#663 by @sreekanth-db)
|
|
5
15
|
- Add variant support (databricks/databricks-sql-python#560 by @shivam2680)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: databricks-sql-connector
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.2.0
|
|
4
4
|
Summary: Databricks SQL Connector for Python
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
License-File: LICENSE
|
|
@@ -102,6 +102,12 @@ or to a Databricks Runtime interactive cluster (e.g. /sql/protocolv1/o/123456789
|
|
|
102
102
|
> to authenticate the target Databricks user account and needs to open the browser for authentication. So it
|
|
103
103
|
> can only run on the user's machine.
|
|
104
104
|
|
|
105
|
+
## Transaction Support
|
|
106
|
+
|
|
107
|
+
The connector supports multi-statement transactions with manual commit/rollback control. Set `connection.autocommit = False` to disable autocommit mode, then use `connection.commit()` and `connection.rollback()` to control transactions.
|
|
108
|
+
|
|
109
|
+
For detailed documentation, examples, and best practices, see **[TRANSACTIONS.md](TRANSACTIONS.md)**.
|
|
110
|
+
|
|
105
111
|
## SQLAlchemy
|
|
106
112
|
Starting from `databricks-sql-connector` version 4.0.0 SQLAlchemy support has been extracted to a new library `databricks-sqlalchemy`.
|
|
107
113
|
|
|
@@ -67,6 +67,12 @@ or to a Databricks Runtime interactive cluster (e.g. /sql/protocolv1/o/123456789
|
|
|
67
67
|
> to authenticate the target Databricks user account and needs to open the browser for authentication. So it
|
|
68
68
|
> can only run on the user's machine.
|
|
69
69
|
|
|
70
|
+
## Transaction Support
|
|
71
|
+
|
|
72
|
+
The connector supports multi-statement transactions with manual commit/rollback control. Set `connection.autocommit = False` to disable autocommit mode, then use `connection.commit()` and `connection.rollback()` to control transactions.
|
|
73
|
+
|
|
74
|
+
For detailed documentation, examples, and best practices, see **[TRANSACTIONS.md](TRANSACTIONS.md)**.
|
|
75
|
+
|
|
70
76
|
## SQLAlchemy
|
|
71
77
|
Starting from `databricks-sql-connector` version 4.0.0 SQLAlchemy support has been extracted to a new library `databricks-sqlalchemy`.
|
|
72
78
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "databricks-sql-connector"
|
|
3
|
-
version = "4.
|
|
3
|
+
version = "4.2.0"
|
|
4
4
|
description = "Databricks SQL Connector for Python"
|
|
5
5
|
authors = ["Databricks <databricks-sql-connector-maintainers@databricks.com>"]
|
|
6
6
|
license = "Apache-2.0"
|
|
@@ -39,6 +39,7 @@ pylint = ">=2.12.0"
|
|
|
39
39
|
black = "^22.3.0"
|
|
40
40
|
pytest-dotenv = "^0.5.2"
|
|
41
41
|
pytest-cov = "^4.0.0"
|
|
42
|
+
pytest-xdist = "^3.0.0"
|
|
42
43
|
numpy = [
|
|
43
44
|
{ version = ">=1.16.6", python = ">=3.8,<3.11" },
|
|
44
45
|
{ version = ">=1.23.4", python = ">=3.11" },
|
{databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/__init__.py
RENAMED
|
@@ -8,6 +8,9 @@ threadsafety = 1 # Threads may share the module, but not connections.
|
|
|
8
8
|
|
|
9
9
|
paramstyle = "named"
|
|
10
10
|
|
|
11
|
+
# Transaction isolation level constants (extension to PEP 249)
|
|
12
|
+
TRANSACTION_ISOLATION_LEVEL_REPEATABLE_READ = "REPEATABLE_READ"
|
|
13
|
+
|
|
11
14
|
import re
|
|
12
15
|
|
|
13
16
|
from typing import TYPE_CHECKING
|
|
@@ -68,7 +71,7 @@ DATETIME = DBAPITypeObject("timestamp")
|
|
|
68
71
|
DATE = DBAPITypeObject("date")
|
|
69
72
|
ROWID = DBAPITypeObject()
|
|
70
73
|
|
|
71
|
-
__version__ = "4.
|
|
74
|
+
__version__ = "4.2.0"
|
|
72
75
|
USER_AGENT_NAME = "PyDatabricksSqlConnector"
|
|
73
76
|
|
|
74
77
|
# These two functions are pyhive legacy
|
{databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/auth/auth.py
RENAMED
|
@@ -8,13 +8,17 @@ from databricks.sql.auth.authenticators import (
|
|
|
8
8
|
AzureServicePrincipalCredentialProvider,
|
|
9
9
|
)
|
|
10
10
|
from databricks.sql.auth.common import AuthType, ClientContext
|
|
11
|
+
from databricks.sql.auth.token_federation import TokenFederationProvider
|
|
11
12
|
|
|
12
13
|
|
|
13
14
|
def get_auth_provider(cfg: ClientContext, http_client):
|
|
15
|
+
# Determine the base auth provider
|
|
16
|
+
base_provider: Optional[AuthProvider] = None
|
|
17
|
+
|
|
14
18
|
if cfg.credentials_provider:
|
|
15
|
-
|
|
19
|
+
base_provider = ExternalAuthProvider(cfg.credentials_provider)
|
|
16
20
|
elif cfg.auth_type == AuthType.AZURE_SP_M2M.value:
|
|
17
|
-
|
|
21
|
+
base_provider = ExternalAuthProvider(
|
|
18
22
|
AzureServicePrincipalCredentialProvider(
|
|
19
23
|
cfg.hostname,
|
|
20
24
|
cfg.azure_client_id,
|
|
@@ -29,7 +33,7 @@ def get_auth_provider(cfg: ClientContext, http_client):
|
|
|
29
33
|
assert cfg.oauth_client_id is not None
|
|
30
34
|
assert cfg.oauth_scopes is not None
|
|
31
35
|
|
|
32
|
-
|
|
36
|
+
base_provider = DatabricksOAuthProvider(
|
|
33
37
|
cfg.hostname,
|
|
34
38
|
cfg.oauth_persistence,
|
|
35
39
|
cfg.oauth_redirect_port_range,
|
|
@@ -39,17 +43,17 @@ def get_auth_provider(cfg: ClientContext, http_client):
|
|
|
39
43
|
cfg.auth_type,
|
|
40
44
|
)
|
|
41
45
|
elif cfg.access_token is not None:
|
|
42
|
-
|
|
46
|
+
base_provider = AccessTokenAuthProvider(cfg.access_token)
|
|
43
47
|
elif cfg.use_cert_as_auth and cfg.tls_client_cert_file:
|
|
44
48
|
# no op authenticator. authentication is performed using ssl certificate outside of headers
|
|
45
|
-
|
|
49
|
+
base_provider = AuthProvider()
|
|
46
50
|
else:
|
|
47
51
|
if (
|
|
48
52
|
cfg.oauth_redirect_port_range is not None
|
|
49
53
|
and cfg.oauth_client_id is not None
|
|
50
54
|
and cfg.oauth_scopes is not None
|
|
51
55
|
):
|
|
52
|
-
|
|
56
|
+
base_provider = DatabricksOAuthProvider(
|
|
53
57
|
cfg.hostname,
|
|
54
58
|
cfg.oauth_persistence,
|
|
55
59
|
cfg.oauth_redirect_port_range,
|
|
@@ -61,6 +65,17 @@ def get_auth_provider(cfg: ClientContext, http_client):
|
|
|
61
65
|
else:
|
|
62
66
|
raise RuntimeError("No valid authentication settings!")
|
|
63
67
|
|
|
68
|
+
# Always wrap with token federation (falls back gracefully if not needed)
|
|
69
|
+
if base_provider:
|
|
70
|
+
return TokenFederationProvider(
|
|
71
|
+
hostname=cfg.hostname,
|
|
72
|
+
external_provider=base_provider,
|
|
73
|
+
http_client=http_client,
|
|
74
|
+
identity_federation_client_id=cfg.identity_federation_client_id,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
return base_provider
|
|
78
|
+
|
|
64
79
|
|
|
65
80
|
PYSQL_OAUTH_SCOPES = ["sql", "offline_access"]
|
|
66
81
|
PYSQL_OAUTH_CLIENT_ID = "databricks-sql-python"
|
|
@@ -114,5 +129,6 @@ def get_python_sql_connector_auth_provider(hostname: str, http_client, **kwargs)
|
|
|
114
129
|
else redirect_port_range,
|
|
115
130
|
oauth_persistence=kwargs.get("experimental_oauth_persistence"),
|
|
116
131
|
credentials_provider=kwargs.get("credentials_provider"),
|
|
132
|
+
identity_federation_client_id=kwargs.get("identity_federation_client_id"),
|
|
117
133
|
)
|
|
118
134
|
return get_auth_provider(cfg, http_client)
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import jwt
|
|
3
|
+
from datetime import datetime, timedelta
|
|
4
|
+
from typing import Optional, Dict, Tuple
|
|
5
|
+
from urllib.parse import urlparse
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def parse_hostname(hostname: str) -> str:
|
|
11
|
+
"""
|
|
12
|
+
Normalize the hostname to include scheme and trailing slash.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
hostname: The hostname to normalize
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
Normalized hostname with scheme and trailing slash
|
|
19
|
+
"""
|
|
20
|
+
if not hostname.startswith("http://") and not hostname.startswith("https://"):
|
|
21
|
+
hostname = f"https://{hostname}"
|
|
22
|
+
if not hostname.endswith("/"):
|
|
23
|
+
hostname = f"{hostname}/"
|
|
24
|
+
return hostname
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def decode_token(access_token: str) -> Optional[Dict]:
|
|
28
|
+
"""
|
|
29
|
+
Decode a JWT token without verification to extract claims.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
access_token: The JWT access token to decode
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
Decoded token claims or None if decoding fails
|
|
36
|
+
"""
|
|
37
|
+
try:
|
|
38
|
+
return jwt.decode(access_token, options={"verify_signature": False})
|
|
39
|
+
except Exception as e:
|
|
40
|
+
logger.debug("Failed to decode JWT token: %s", e)
|
|
41
|
+
return None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def is_same_host(url1: str, url2: str) -> bool:
|
|
45
|
+
"""
|
|
46
|
+
Check if two URLs have the same host.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
url1: First URL
|
|
50
|
+
url2: Second URL
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
True if hosts are the same, False otherwise
|
|
54
|
+
"""
|
|
55
|
+
try:
|
|
56
|
+
host1 = urlparse(url1).netloc
|
|
57
|
+
host2 = urlparse(url2).netloc
|
|
58
|
+
# Handle port differences (e.g., example.com vs example.com:443)
|
|
59
|
+
host1_without_port = host1.split(":")[0]
|
|
60
|
+
host2_without_port = host2.split(":")[0]
|
|
61
|
+
return host1_without_port == host2_without_port
|
|
62
|
+
except Exception as e:
|
|
63
|
+
logger.debug("Failed to parse URLs: %s", e)
|
|
64
|
+
return False
|
{databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/auth/common.py
RENAMED
|
@@ -37,6 +37,7 @@ class ClientContext:
|
|
|
37
37
|
tls_client_cert_file: Optional[str] = None,
|
|
38
38
|
oauth_persistence=None,
|
|
39
39
|
credentials_provider=None,
|
|
40
|
+
identity_federation_client_id: Optional[str] = None,
|
|
40
41
|
# HTTP client configuration parameters
|
|
41
42
|
ssl_options=None, # SSLOptions type
|
|
42
43
|
socket_timeout: Optional[float] = None,
|
|
@@ -65,6 +66,7 @@ class ClientContext:
|
|
|
65
66
|
self.tls_client_cert_file = tls_client_cert_file
|
|
66
67
|
self.oauth_persistence = oauth_persistence
|
|
67
68
|
self.credentials_provider = credentials_provider
|
|
69
|
+
self.identity_federation_client_id = identity_federation_client_id
|
|
68
70
|
|
|
69
71
|
# HTTP client configuration
|
|
70
72
|
self.ssl_options = ssl_options
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import json
|
|
3
|
+
from datetime import datetime, timedelta
|
|
4
|
+
from typing import Optional, Dict, Tuple
|
|
5
|
+
from urllib.parse import urlencode
|
|
6
|
+
|
|
7
|
+
from databricks.sql.auth.authenticators import AuthProvider
|
|
8
|
+
from databricks.sql.auth.auth_utils import (
|
|
9
|
+
parse_hostname,
|
|
10
|
+
decode_token,
|
|
11
|
+
is_same_host,
|
|
12
|
+
)
|
|
13
|
+
from databricks.sql.common.http import HttpMethod
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Token:
|
|
19
|
+
"""
|
|
20
|
+
Represents an OAuth token with expiration management.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(self, access_token: str, token_type: str = "Bearer"):
|
|
24
|
+
"""
|
|
25
|
+
Initialize a token.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
access_token: The access token string
|
|
29
|
+
token_type: The token type (default: Bearer)
|
|
30
|
+
"""
|
|
31
|
+
self.access_token = access_token
|
|
32
|
+
self.token_type = token_type
|
|
33
|
+
self.expiry_time = self._calculate_expiry()
|
|
34
|
+
|
|
35
|
+
def _calculate_expiry(self) -> datetime:
|
|
36
|
+
"""
|
|
37
|
+
Calculate the token expiry time from JWT claims.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
The token expiry datetime
|
|
41
|
+
"""
|
|
42
|
+
decoded = decode_token(self.access_token)
|
|
43
|
+
if decoded and "exp" in decoded:
|
|
44
|
+
# Use JWT exp claim with 1 minute buffer
|
|
45
|
+
return datetime.fromtimestamp(decoded["exp"]) - timedelta(minutes=1)
|
|
46
|
+
# Default to 1 hour if no expiry info
|
|
47
|
+
return datetime.now() + timedelta(hours=1)
|
|
48
|
+
|
|
49
|
+
def is_expired(self) -> bool:
|
|
50
|
+
"""
|
|
51
|
+
Check if the token is expired.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
True if token is expired, False otherwise
|
|
55
|
+
"""
|
|
56
|
+
return datetime.now() >= self.expiry_time
|
|
57
|
+
|
|
58
|
+
def to_dict(self) -> Dict[str, str]:
|
|
59
|
+
"""
|
|
60
|
+
Convert token to dictionary format.
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
Dictionary with access_token and token_type
|
|
64
|
+
"""
|
|
65
|
+
return {
|
|
66
|
+
"access_token": self.access_token,
|
|
67
|
+
"token_type": self.token_type,
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class TokenFederationProvider(AuthProvider):
|
|
72
|
+
"""
|
|
73
|
+
Implementation of Token Federation for Databricks SQL Python driver.
|
|
74
|
+
|
|
75
|
+
This provider exchanges third-party access tokens for Databricks in-house tokens
|
|
76
|
+
when the token issuer is different from the Databricks host.
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
TOKEN_EXCHANGE_ENDPOINT = "/oidc/v1/token"
|
|
80
|
+
TOKEN_EXCHANGE_GRANT_TYPE = "urn:ietf:params:oauth:grant-type:token-exchange"
|
|
81
|
+
TOKEN_EXCHANGE_SUBJECT_TYPE = "urn:ietf:params:oauth:token-type:jwt"
|
|
82
|
+
|
|
83
|
+
def __init__(
|
|
84
|
+
self,
|
|
85
|
+
hostname: str,
|
|
86
|
+
external_provider: AuthProvider,
|
|
87
|
+
http_client,
|
|
88
|
+
identity_federation_client_id: Optional[str] = None,
|
|
89
|
+
):
|
|
90
|
+
"""
|
|
91
|
+
Initialize the Token Federation Provider.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
hostname: The Databricks workspace hostname
|
|
95
|
+
external_provider: The external authentication provider
|
|
96
|
+
http_client: HTTP client for making requests (required)
|
|
97
|
+
identity_federation_client_id: Optional client ID for token federation
|
|
98
|
+
"""
|
|
99
|
+
if not http_client:
|
|
100
|
+
raise ValueError("http_client is required for TokenFederationProvider")
|
|
101
|
+
|
|
102
|
+
self.hostname = parse_hostname(hostname)
|
|
103
|
+
self.external_provider = external_provider
|
|
104
|
+
self.http_client = http_client
|
|
105
|
+
self.identity_federation_client_id = identity_federation_client_id
|
|
106
|
+
|
|
107
|
+
self._cached_token: Optional[Token] = None
|
|
108
|
+
self._external_headers: Dict[str, str] = {}
|
|
109
|
+
|
|
110
|
+
def add_headers(self, request_headers: Dict[str, str]):
|
|
111
|
+
"""Add authentication headers to the request."""
|
|
112
|
+
|
|
113
|
+
if self._cached_token and not self._cached_token.is_expired():
|
|
114
|
+
request_headers[
|
|
115
|
+
"Authorization"
|
|
116
|
+
] = f"{self._cached_token.token_type} {self._cached_token.access_token}"
|
|
117
|
+
return
|
|
118
|
+
|
|
119
|
+
# Get the external headers first to check if we need token federation
|
|
120
|
+
self._external_headers = {}
|
|
121
|
+
self.external_provider.add_headers(self._external_headers)
|
|
122
|
+
|
|
123
|
+
# If no Authorization header from external provider, pass through all headers
|
|
124
|
+
if "Authorization" not in self._external_headers:
|
|
125
|
+
request_headers.update(self._external_headers)
|
|
126
|
+
return
|
|
127
|
+
|
|
128
|
+
token = self._get_token()
|
|
129
|
+
request_headers["Authorization"] = f"{token.token_type} {token.access_token}"
|
|
130
|
+
|
|
131
|
+
def _get_token(self) -> Token:
|
|
132
|
+
"""Get or refresh the authentication token."""
|
|
133
|
+
# Check if cached token is still valid
|
|
134
|
+
if self._cached_token and not self._cached_token.is_expired():
|
|
135
|
+
return self._cached_token
|
|
136
|
+
|
|
137
|
+
# Extract token from already-fetched headers
|
|
138
|
+
auth_header = self._external_headers.get("Authorization", "")
|
|
139
|
+
token_type, access_token = self._extract_token_from_header(auth_header)
|
|
140
|
+
|
|
141
|
+
# Check if token exchange is needed
|
|
142
|
+
if self._should_exchange_token(access_token):
|
|
143
|
+
try:
|
|
144
|
+
token = self._exchange_token(access_token)
|
|
145
|
+
self._cached_token = token
|
|
146
|
+
return token
|
|
147
|
+
except Exception as e:
|
|
148
|
+
logger.warning("Token exchange failed, using external token: %s", e)
|
|
149
|
+
|
|
150
|
+
# Use external token directly
|
|
151
|
+
token = Token(access_token, token_type)
|
|
152
|
+
self._cached_token = token
|
|
153
|
+
return token
|
|
154
|
+
|
|
155
|
+
def _should_exchange_token(self, access_token: str) -> bool:
|
|
156
|
+
"""Check if the token should be exchanged based on issuer."""
|
|
157
|
+
decoded = decode_token(access_token)
|
|
158
|
+
if not decoded:
|
|
159
|
+
return False
|
|
160
|
+
|
|
161
|
+
issuer = decoded.get("iss", "")
|
|
162
|
+
# Check if issuer host is different from Databricks host
|
|
163
|
+
return not is_same_host(issuer, self.hostname)
|
|
164
|
+
|
|
165
|
+
def _exchange_token(self, access_token: str) -> Token:
|
|
166
|
+
"""Exchange the external token for a Databricks token."""
|
|
167
|
+
token_url = f"{self.hostname.rstrip('/')}{self.TOKEN_EXCHANGE_ENDPOINT}"
|
|
168
|
+
|
|
169
|
+
data = {
|
|
170
|
+
"grant_type": self.TOKEN_EXCHANGE_GRANT_TYPE,
|
|
171
|
+
"subject_token": access_token,
|
|
172
|
+
"subject_token_type": self.TOKEN_EXCHANGE_SUBJECT_TYPE,
|
|
173
|
+
"scope": "sql",
|
|
174
|
+
"return_original_token_if_authenticated": "true",
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
if self.identity_federation_client_id:
|
|
178
|
+
data["client_id"] = self.identity_federation_client_id
|
|
179
|
+
|
|
180
|
+
headers = {
|
|
181
|
+
"Content-Type": "application/x-www-form-urlencoded",
|
|
182
|
+
"Accept": "*/*",
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
body = urlencode(data)
|
|
186
|
+
|
|
187
|
+
response = self.http_client.request(
|
|
188
|
+
HttpMethod.POST, url=token_url, body=body, headers=headers
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
token_response = json.loads(response.data.decode())
|
|
192
|
+
|
|
193
|
+
return Token(
|
|
194
|
+
token_response["access_token"], token_response.get("token_type", "Bearer")
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
def _extract_token_from_header(self, auth_header: str) -> Tuple[str, str]:
|
|
198
|
+
"""Extract token type and access token from Authorization header."""
|
|
199
|
+
if not auth_header:
|
|
200
|
+
raise ValueError("Authorization header is missing")
|
|
201
|
+
|
|
202
|
+
parts = auth_header.split(" ", 1)
|
|
203
|
+
if len(parts) != 2:
|
|
204
|
+
raise ValueError("Invalid Authorization header format")
|
|
205
|
+
|
|
206
|
+
return parts[0], parts[1]
|
{databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/client.py
RENAMED
|
@@ -9,6 +9,7 @@ except ImportError:
|
|
|
9
9
|
import json
|
|
10
10
|
import os
|
|
11
11
|
import decimal
|
|
12
|
+
from urllib.parse import urlparse
|
|
12
13
|
from uuid import UUID
|
|
13
14
|
|
|
14
15
|
from databricks.sql import __version__
|
|
@@ -20,6 +21,8 @@ from databricks.sql.exc import (
|
|
|
20
21
|
InterfaceError,
|
|
21
22
|
NotSupportedError,
|
|
22
23
|
ProgrammingError,
|
|
24
|
+
TransactionError,
|
|
25
|
+
DatabaseError,
|
|
23
26
|
)
|
|
24
27
|
|
|
25
28
|
from databricks.sql.thrift_api.TCLIService import ttypes
|
|
@@ -86,6 +89,9 @@ DEFAULT_ARRAY_SIZE = 100000
|
|
|
86
89
|
|
|
87
90
|
NO_NATIVE_PARAMS: List = []
|
|
88
91
|
|
|
92
|
+
# Transaction isolation level constants (extension to PEP 249)
|
|
93
|
+
TRANSACTION_ISOLATION_LEVEL_REPEATABLE_READ = "REPEATABLE_READ"
|
|
94
|
+
|
|
89
95
|
|
|
90
96
|
class Connection:
|
|
91
97
|
def __init__(
|
|
@@ -200,6 +206,17 @@ class Connection:
|
|
|
200
206
|
STRUCT is returned as Dict[str, Any]
|
|
201
207
|
ARRAY is returned as numpy.ndarray
|
|
202
208
|
When False, complex types are returned as a strings. These are generally deserializable as JSON.
|
|
209
|
+
:param enable_metric_view_metadata: `bool`, optional (default is False)
|
|
210
|
+
When True, enables metric view metadata support by setting the
|
|
211
|
+
spark.sql.thriftserver.metadata.metricview.enabled session configuration.
|
|
212
|
+
This allows
|
|
213
|
+
1. cursor.tables() to return METRIC_VIEW table type
|
|
214
|
+
2. cursor.columns() to return "measure" column type
|
|
215
|
+
:param fetch_autocommit_from_server: `bool`, optional (default is False)
|
|
216
|
+
When True, the connection.autocommit property queries the server for current state
|
|
217
|
+
using SET AUTOCOMMIT instead of returning cached value.
|
|
218
|
+
Set to True if autocommit might be changed by external means (e.g., external SQL commands).
|
|
219
|
+
When False (default), uses cached state for better performance.
|
|
203
220
|
"""
|
|
204
221
|
|
|
205
222
|
# Internal arguments in **kwargs:
|
|
@@ -248,6 +265,14 @@ class Connection:
|
|
|
248
265
|
access_token_kv = {"access_token": access_token}
|
|
249
266
|
kwargs = {**kwargs, **access_token_kv}
|
|
250
267
|
|
|
268
|
+
enable_metric_view_metadata = kwargs.get("enable_metric_view_metadata", False)
|
|
269
|
+
if enable_metric_view_metadata:
|
|
270
|
+
if session_configuration is None:
|
|
271
|
+
session_configuration = {}
|
|
272
|
+
session_configuration[
|
|
273
|
+
"spark.sql.thriftserver.metadata.metricview.enabled"
|
|
274
|
+
] = "true"
|
|
275
|
+
|
|
251
276
|
self.disable_pandas = kwargs.get("_disable_pandas", False)
|
|
252
277
|
self.lz4_compression = kwargs.get("enable_query_result_lz4_compression", True)
|
|
253
278
|
self.use_cloud_fetch = kwargs.get("use_cloud_fetch", True)
|
|
@@ -290,6 +315,9 @@ class Connection:
|
|
|
290
315
|
kwargs.get("use_inline_params", False)
|
|
291
316
|
)
|
|
292
317
|
self.staging_allowed_local_path = kwargs.get("staging_allowed_local_path", None)
|
|
318
|
+
self._fetch_autocommit_from_server = kwargs.get(
|
|
319
|
+
"fetch_autocommit_from_server", False
|
|
320
|
+
)
|
|
293
321
|
|
|
294
322
|
self.force_enable_telemetry = kwargs.get("force_enable_telemetry", False)
|
|
295
323
|
self.enable_telemetry = kwargs.get("enable_telemetry", False)
|
|
@@ -308,6 +336,20 @@ class Connection:
|
|
|
308
336
|
session_id_hex=self.get_session_id_hex()
|
|
309
337
|
)
|
|
310
338
|
|
|
339
|
+
# Determine proxy usage
|
|
340
|
+
use_proxy = self.http_client.using_proxy()
|
|
341
|
+
proxy_host_info = None
|
|
342
|
+
if (
|
|
343
|
+
use_proxy
|
|
344
|
+
and self.http_client.proxy_uri
|
|
345
|
+
and isinstance(self.http_client.proxy_uri, str)
|
|
346
|
+
):
|
|
347
|
+
parsed = urlparse(self.http_client.proxy_uri)
|
|
348
|
+
proxy_host_info = HostDetails(
|
|
349
|
+
host_url=parsed.hostname or self.http_client.proxy_uri,
|
|
350
|
+
port=parsed.port or 8080,
|
|
351
|
+
)
|
|
352
|
+
|
|
311
353
|
driver_connection_params = DriverConnectionParameters(
|
|
312
354
|
http_path=http_path,
|
|
313
355
|
mode=DatabricksClientType.SEA
|
|
@@ -317,13 +359,31 @@ class Connection:
|
|
|
317
359
|
auth_mech=TelemetryHelper.get_auth_mechanism(self.session.auth_provider),
|
|
318
360
|
auth_flow=TelemetryHelper.get_auth_flow(self.session.auth_provider),
|
|
319
361
|
socket_timeout=kwargs.get("_socket_timeout", None),
|
|
362
|
+
azure_workspace_resource_id=kwargs.get("azure_workspace_resource_id", None),
|
|
363
|
+
azure_tenant_id=kwargs.get("azure_tenant_id", None),
|
|
364
|
+
use_proxy=use_proxy,
|
|
365
|
+
use_system_proxy=use_proxy,
|
|
366
|
+
proxy_host_info=proxy_host_info,
|
|
367
|
+
use_cf_proxy=False, # CloudFlare proxy not yet supported in Python
|
|
368
|
+
cf_proxy_host_info=None, # CloudFlare proxy not yet supported in Python
|
|
369
|
+
non_proxy_hosts=None,
|
|
370
|
+
allow_self_signed_support=kwargs.get("_tls_no_verify", False),
|
|
371
|
+
use_system_trust_store=True, # Python uses system SSL by default
|
|
372
|
+
enable_arrow=pyarrow is not None,
|
|
373
|
+
enable_direct_results=True, # Always enabled in Python
|
|
374
|
+
enable_sea_hybrid_results=kwargs.get("use_hybrid_disposition", False),
|
|
375
|
+
http_connection_pool_size=kwargs.get("pool_maxsize", None),
|
|
376
|
+
rows_fetched_per_block=DEFAULT_ARRAY_SIZE,
|
|
377
|
+
async_poll_interval_millis=2000, # Default polling interval
|
|
378
|
+
support_many_parameters=True, # Native parameters supported
|
|
379
|
+
enable_complex_datatype_support=_use_arrow_native_complex_types,
|
|
380
|
+
allowed_volume_ingestion_paths=self.staging_allowed_local_path,
|
|
320
381
|
)
|
|
321
382
|
|
|
322
383
|
self._telemetry_client.export_initial_telemetry_log(
|
|
323
384
|
driver_connection_params=driver_connection_params,
|
|
324
385
|
user_agent=self.session.useragent_header,
|
|
325
386
|
)
|
|
326
|
-
self.staging_allowed_local_path = kwargs.get("staging_allowed_local_path", None)
|
|
327
387
|
|
|
328
388
|
def _set_use_inline_params_with_warning(self, value: Union[bool, str]):
|
|
329
389
|
"""Valid values are True, False, and "silent"
|
|
@@ -459,15 +519,261 @@ class Connection:
|
|
|
459
519
|
if self.http_client:
|
|
460
520
|
self.http_client.close()
|
|
461
521
|
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
522
|
+
@property
|
|
523
|
+
def autocommit(self) -> bool:
|
|
524
|
+
"""
|
|
525
|
+
Get auto-commit mode for this connection.
|
|
465
526
|
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
527
|
+
Extension to PEP 249. Returns cached value by default.
|
|
528
|
+
If fetch_autocommit_from_server=True was set during connection,
|
|
529
|
+
queries server for current state.
|
|
530
|
+
|
|
531
|
+
Returns:
|
|
532
|
+
bool: True if auto-commit is enabled, False otherwise
|
|
533
|
+
|
|
534
|
+
Raises:
|
|
535
|
+
InterfaceError: If connection is closed
|
|
536
|
+
TransactionError: If fetch_autocommit_from_server=True and query fails
|
|
537
|
+
"""
|
|
538
|
+
if not self.open:
|
|
539
|
+
raise InterfaceError(
|
|
540
|
+
"Cannot get autocommit on closed connection",
|
|
541
|
+
session_id_hex=self.get_session_id_hex(),
|
|
542
|
+
)
|
|
543
|
+
|
|
544
|
+
if self._fetch_autocommit_from_server:
|
|
545
|
+
return self._fetch_autocommit_state_from_server()
|
|
546
|
+
|
|
547
|
+
return self.session.get_autocommit()
|
|
548
|
+
|
|
549
|
+
@autocommit.setter
|
|
550
|
+
def autocommit(self, value: bool) -> None:
|
|
551
|
+
"""
|
|
552
|
+
Set auto-commit mode for this connection.
|
|
553
|
+
|
|
554
|
+
Extension to PEP 249. Executes SET AUTOCOMMIT command on server.
|
|
555
|
+
|
|
556
|
+
Args:
|
|
557
|
+
value: True to enable auto-commit, False to disable
|
|
558
|
+
|
|
559
|
+
Raises:
|
|
560
|
+
InterfaceError: If connection is closed
|
|
561
|
+
TransactionError: If server rejects the change
|
|
562
|
+
"""
|
|
563
|
+
if not self.open:
|
|
564
|
+
raise InterfaceError(
|
|
565
|
+
"Cannot set autocommit on closed connection",
|
|
566
|
+
session_id_hex=self.get_session_id_hex(),
|
|
567
|
+
)
|
|
568
|
+
|
|
569
|
+
# Create internal cursor for transaction control
|
|
570
|
+
cursor = None
|
|
571
|
+
try:
|
|
572
|
+
cursor = self.cursor()
|
|
573
|
+
sql = f"SET AUTOCOMMIT = {'TRUE' if value else 'FALSE'}"
|
|
574
|
+
cursor.execute(sql)
|
|
575
|
+
|
|
576
|
+
# Update cached state on success
|
|
577
|
+
self.session.set_autocommit(value)
|
|
578
|
+
|
|
579
|
+
except DatabaseError as e:
|
|
580
|
+
# Wrap in TransactionError with context
|
|
581
|
+
raise TransactionError(
|
|
582
|
+
f"Failed to set autocommit to {value}: {e.message}",
|
|
583
|
+
context={
|
|
584
|
+
**e.context,
|
|
585
|
+
"operation": "set_autocommit",
|
|
586
|
+
"autocommit_value": value,
|
|
587
|
+
},
|
|
588
|
+
session_id_hex=self.get_session_id_hex(),
|
|
589
|
+
) from e
|
|
590
|
+
finally:
|
|
591
|
+
if cursor:
|
|
592
|
+
cursor.close()
|
|
593
|
+
|
|
594
|
+
def _fetch_autocommit_state_from_server(self) -> bool:
|
|
595
|
+
"""
|
|
596
|
+
Query server for current autocommit state using SET AUTOCOMMIT.
|
|
597
|
+
|
|
598
|
+
Returns:
|
|
599
|
+
bool: Server's autocommit state
|
|
600
|
+
|
|
601
|
+
Raises:
|
|
602
|
+
TransactionError: If query fails
|
|
603
|
+
"""
|
|
604
|
+
cursor = None
|
|
605
|
+
try:
|
|
606
|
+
cursor = self.cursor()
|
|
607
|
+
cursor.execute("SET AUTOCOMMIT")
|
|
608
|
+
|
|
609
|
+
# Fetch result: should return row with value column
|
|
610
|
+
result = cursor.fetchone()
|
|
611
|
+
if result is None:
|
|
612
|
+
raise TransactionError(
|
|
613
|
+
"No result returned from SET AUTOCOMMIT query",
|
|
614
|
+
context={"operation": "fetch_autocommit"},
|
|
615
|
+
session_id_hex=self.get_session_id_hex(),
|
|
616
|
+
)
|
|
617
|
+
|
|
618
|
+
# Parse value (first column should be "true" or "false")
|
|
619
|
+
value_str = str(result[0]).lower()
|
|
620
|
+
autocommit_state = value_str == "true"
|
|
621
|
+
|
|
622
|
+
# Update cache
|
|
623
|
+
self.session.set_autocommit(autocommit_state)
|
|
624
|
+
|
|
625
|
+
return autocommit_state
|
|
626
|
+
|
|
627
|
+
except TransactionError:
|
|
628
|
+
# Re-raise TransactionError as-is
|
|
629
|
+
raise
|
|
630
|
+
except DatabaseError as e:
|
|
631
|
+
# Wrap other DatabaseErrors
|
|
632
|
+
raise TransactionError(
|
|
633
|
+
f"Failed to fetch autocommit state from server: {e.message}",
|
|
634
|
+
context={**e.context, "operation": "fetch_autocommit"},
|
|
635
|
+
session_id_hex=self.get_session_id_hex(),
|
|
636
|
+
) from e
|
|
637
|
+
finally:
|
|
638
|
+
if cursor:
|
|
639
|
+
cursor.close()
|
|
640
|
+
|
|
641
|
+
def commit(self) -> None:
|
|
642
|
+
"""
|
|
643
|
+
Commit the current transaction.
|
|
644
|
+
|
|
645
|
+
Per PEP 249. Should be called only when autocommit is disabled.
|
|
646
|
+
|
|
647
|
+
When autocommit is False:
|
|
648
|
+
- Commits the current transaction
|
|
649
|
+
- Server automatically starts new transaction
|
|
650
|
+
|
|
651
|
+
When autocommit is True:
|
|
652
|
+
- Server may throw error if no active transaction
|
|
653
|
+
|
|
654
|
+
Raises:
|
|
655
|
+
InterfaceError: If connection is closed
|
|
656
|
+
TransactionError: If commit fails (e.g., no active transaction)
|
|
657
|
+
"""
|
|
658
|
+
if not self.open:
|
|
659
|
+
raise InterfaceError(
|
|
660
|
+
"Cannot commit on closed connection",
|
|
661
|
+
session_id_hex=self.get_session_id_hex(),
|
|
662
|
+
)
|
|
663
|
+
|
|
664
|
+
cursor = None
|
|
665
|
+
try:
|
|
666
|
+
cursor = self.cursor()
|
|
667
|
+
cursor.execute("COMMIT")
|
|
668
|
+
|
|
669
|
+
except DatabaseError as e:
|
|
670
|
+
raise TransactionError(
|
|
671
|
+
f"Failed to commit transaction: {e.message}",
|
|
672
|
+
context={**e.context, "operation": "commit"},
|
|
673
|
+
session_id_hex=self.get_session_id_hex(),
|
|
674
|
+
) from e
|
|
675
|
+
finally:
|
|
676
|
+
if cursor:
|
|
677
|
+
cursor.close()
|
|
678
|
+
|
|
679
|
+
def rollback(self) -> None:
|
|
680
|
+
"""
|
|
681
|
+
Rollback the current transaction.
|
|
682
|
+
|
|
683
|
+
Per PEP 249. Should be called only when autocommit is disabled.
|
|
684
|
+
|
|
685
|
+
When autocommit is False:
|
|
686
|
+
- Rolls back the current transaction
|
|
687
|
+
- Server automatically starts new transaction
|
|
688
|
+
|
|
689
|
+
When autocommit is True:
|
|
690
|
+
- ROLLBACK is forgiving (no-op, doesn't throw exception)
|
|
691
|
+
|
|
692
|
+
Note: ROLLBACK is safe to call even without active transaction.
|
|
693
|
+
|
|
694
|
+
Raises:
|
|
695
|
+
InterfaceError: If connection is closed
|
|
696
|
+
TransactionError: If rollback fails
|
|
697
|
+
"""
|
|
698
|
+
if not self.open:
|
|
699
|
+
raise InterfaceError(
|
|
700
|
+
"Cannot rollback on closed connection",
|
|
701
|
+
session_id_hex=self.get_session_id_hex(),
|
|
702
|
+
)
|
|
703
|
+
|
|
704
|
+
cursor = None
|
|
705
|
+
try:
|
|
706
|
+
cursor = self.cursor()
|
|
707
|
+
cursor.execute("ROLLBACK")
|
|
708
|
+
|
|
709
|
+
except DatabaseError as e:
|
|
710
|
+
raise TransactionError(
|
|
711
|
+
f"Failed to rollback transaction: {e.message}",
|
|
712
|
+
context={**e.context, "operation": "rollback"},
|
|
713
|
+
session_id_hex=self.get_session_id_hex(),
|
|
714
|
+
) from e
|
|
715
|
+
finally:
|
|
716
|
+
if cursor:
|
|
717
|
+
cursor.close()
|
|
718
|
+
|
|
719
|
+
def get_transaction_isolation(self) -> str:
|
|
720
|
+
"""
|
|
721
|
+
Get the transaction isolation level.
|
|
722
|
+
|
|
723
|
+
Extension to PEP 249.
|
|
724
|
+
|
|
725
|
+
Databricks supports REPEATABLE_READ isolation level (Snapshot Isolation),
|
|
726
|
+
which is the default and only supported level.
|
|
727
|
+
|
|
728
|
+
Returns:
|
|
729
|
+
str: "REPEATABLE_READ" - the transaction isolation level constant
|
|
730
|
+
|
|
731
|
+
Raises:
|
|
732
|
+
InterfaceError: If connection is closed
|
|
733
|
+
"""
|
|
734
|
+
if not self.open:
|
|
735
|
+
raise InterfaceError(
|
|
736
|
+
"Cannot get transaction isolation on closed connection",
|
|
737
|
+
session_id_hex=self.get_session_id_hex(),
|
|
738
|
+
)
|
|
739
|
+
|
|
740
|
+
return TRANSACTION_ISOLATION_LEVEL_REPEATABLE_READ
|
|
741
|
+
|
|
742
|
+
def set_transaction_isolation(self, level: str) -> None:
|
|
743
|
+
"""
|
|
744
|
+
Set transaction isolation level.
|
|
745
|
+
|
|
746
|
+
Extension to PEP 249.
|
|
747
|
+
|
|
748
|
+
Databricks supports only REPEATABLE_READ isolation level (Snapshot Isolation).
|
|
749
|
+
This method validates that the requested level is supported but does not
|
|
750
|
+
execute any SQL, as REPEATABLE_READ is the default server behavior.
|
|
751
|
+
|
|
752
|
+
Args:
|
|
753
|
+
level: Isolation level. Must be "REPEATABLE_READ" or "REPEATABLE READ"
|
|
754
|
+
(case-insensitive, underscores and spaces are interchangeable)
|
|
755
|
+
|
|
756
|
+
Raises:
|
|
757
|
+
InterfaceError: If connection is closed
|
|
758
|
+
NotSupportedError: If isolation level not supported
|
|
759
|
+
"""
|
|
760
|
+
if not self.open:
|
|
761
|
+
raise InterfaceError(
|
|
762
|
+
"Cannot set transaction isolation on closed connection",
|
|
763
|
+
session_id_hex=self.get_session_id_hex(),
|
|
764
|
+
)
|
|
765
|
+
|
|
766
|
+
# Normalize and validate isolation level
|
|
767
|
+
normalized_level = level.upper().replace("_", " ")
|
|
768
|
+
|
|
769
|
+
if normalized_level != TRANSACTION_ISOLATION_LEVEL_REPEATABLE_READ.replace(
|
|
770
|
+
"_", " "
|
|
771
|
+
):
|
|
772
|
+
raise NotSupportedError(
|
|
773
|
+
f"Setting transaction isolation level '{level}' is not supported. "
|
|
774
|
+
f"Only {TRANSACTION_ISOLATION_LEVEL_REPEATABLE_READ} is supported.",
|
|
775
|
+
session_id_hex=self.get_session_id_hex(),
|
|
776
|
+
)
|
|
471
777
|
|
|
472
778
|
|
|
473
779
|
class Cursor:
|
|
@@ -301,6 +301,11 @@ class UnifiedHttpClient:
|
|
|
301
301
|
"""Check if proxy support is available (not whether it's being used for a specific request)."""
|
|
302
302
|
return self._proxy_pool_manager is not None
|
|
303
303
|
|
|
304
|
+
@property
|
|
305
|
+
def proxy_uri(self) -> Optional[str]:
|
|
306
|
+
"""Get the configured proxy URI, if any."""
|
|
307
|
+
return self._proxy_uri
|
|
308
|
+
|
|
304
309
|
def close(self):
|
|
305
310
|
"""Close the underlying connection pools."""
|
|
306
311
|
if self._direct_pool_manager:
|
|
@@ -70,6 +70,23 @@ class NotSupportedError(DatabaseError):
|
|
|
70
70
|
pass
|
|
71
71
|
|
|
72
72
|
|
|
73
|
+
class TransactionError(DatabaseError):
|
|
74
|
+
"""
|
|
75
|
+
Exception raised for transaction-specific errors.
|
|
76
|
+
|
|
77
|
+
This exception is used when transaction control operations fail, such as:
|
|
78
|
+
- Setting autocommit mode (AUTOCOMMIT_SET_DURING_ACTIVE_TRANSACTION)
|
|
79
|
+
- Committing a transaction (MULTI_STATEMENT_TRANSACTION_NO_ACTIVE_TRANSACTION)
|
|
80
|
+
- Rolling back a transaction
|
|
81
|
+
- Setting transaction isolation level
|
|
82
|
+
|
|
83
|
+
The exception includes context about which transaction operation failed
|
|
84
|
+
and preserves the underlying cause via exception chaining.
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
pass
|
|
88
|
+
|
|
89
|
+
|
|
73
90
|
### Custom error classes ###
|
|
74
91
|
class InvalidServerResponseError(OperationalError):
|
|
75
92
|
"""Thrown if the server does not set the initial namespace correctly"""
|
{databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/session.py
RENAMED
|
@@ -45,6 +45,9 @@ class Session:
|
|
|
45
45
|
self.schema = schema
|
|
46
46
|
self.http_path = http_path
|
|
47
47
|
|
|
48
|
+
# Initialize autocommit state (JDBC default is True)
|
|
49
|
+
self._autocommit = True
|
|
50
|
+
|
|
48
51
|
user_agent_entry = kwargs.get("user_agent_entry")
|
|
49
52
|
if user_agent_entry is None:
|
|
50
53
|
user_agent_entry = kwargs.get("_user_agent_entry")
|
|
@@ -168,6 +171,24 @@ class Session:
|
|
|
168
171
|
"""Get the session ID in hex format"""
|
|
169
172
|
return self._session_id.hex_guid
|
|
170
173
|
|
|
174
|
+
def get_autocommit(self) -> bool:
|
|
175
|
+
"""
|
|
176
|
+
Get the cached autocommit state for this session.
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
bool: True if autocommit is enabled, False otherwise
|
|
180
|
+
"""
|
|
181
|
+
return self._autocommit
|
|
182
|
+
|
|
183
|
+
def set_autocommit(self, value: bool) -> None:
|
|
184
|
+
"""
|
|
185
|
+
Update the cached autocommit state for this session.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
value: True to cache autocommit as enabled, False as disabled
|
|
189
|
+
"""
|
|
190
|
+
self._autocommit = value
|
|
191
|
+
|
|
171
192
|
def close(self) -> None:
|
|
172
193
|
"""Close the underlying session."""
|
|
173
194
|
logger.info("Closing session %s", self.guid_hex)
|
|
@@ -38,6 +38,25 @@ class DriverConnectionParameters(JsonSerializableMixin):
|
|
|
38
38
|
auth_mech (AuthMech): The authentication mechanism used
|
|
39
39
|
auth_flow (AuthFlow): The authentication flow type
|
|
40
40
|
socket_timeout (int): Connection timeout in milliseconds
|
|
41
|
+
azure_workspace_resource_id (str): Azure workspace resource ID
|
|
42
|
+
azure_tenant_id (str): Azure tenant ID
|
|
43
|
+
use_proxy (bool): Whether proxy is being used
|
|
44
|
+
use_system_proxy (bool): Whether system proxy is being used
|
|
45
|
+
proxy_host_info (HostDetails): Proxy host details if configured
|
|
46
|
+
use_cf_proxy (bool): Whether CloudFlare proxy is being used
|
|
47
|
+
cf_proxy_host_info (HostDetails): CloudFlare proxy host details if configured
|
|
48
|
+
non_proxy_hosts (list): List of hosts that bypass proxy
|
|
49
|
+
allow_self_signed_support (bool): Whether self-signed certificates are allowed
|
|
50
|
+
use_system_trust_store (bool): Whether system trust store is used
|
|
51
|
+
enable_arrow (bool): Whether Arrow format is enabled
|
|
52
|
+
enable_direct_results (bool): Whether direct results are enabled
|
|
53
|
+
enable_sea_hybrid_results (bool): Whether SEA hybrid results are enabled
|
|
54
|
+
http_connection_pool_size (int): HTTP connection pool size
|
|
55
|
+
rows_fetched_per_block (int): Number of rows fetched per block
|
|
56
|
+
async_poll_interval_millis (int): Async polling interval in milliseconds
|
|
57
|
+
support_many_parameters (bool): Whether many parameters are supported
|
|
58
|
+
enable_complex_datatype_support (bool): Whether complex datatypes are supported
|
|
59
|
+
allowed_volume_ingestion_paths (str): Allowed paths for volume ingestion
|
|
41
60
|
"""
|
|
42
61
|
|
|
43
62
|
http_path: str
|
|
@@ -46,6 +65,25 @@ class DriverConnectionParameters(JsonSerializableMixin):
|
|
|
46
65
|
auth_mech: Optional[AuthMech] = None
|
|
47
66
|
auth_flow: Optional[AuthFlow] = None
|
|
48
67
|
socket_timeout: Optional[int] = None
|
|
68
|
+
azure_workspace_resource_id: Optional[str] = None
|
|
69
|
+
azure_tenant_id: Optional[str] = None
|
|
70
|
+
use_proxy: Optional[bool] = None
|
|
71
|
+
use_system_proxy: Optional[bool] = None
|
|
72
|
+
proxy_host_info: Optional[HostDetails] = None
|
|
73
|
+
use_cf_proxy: Optional[bool] = None
|
|
74
|
+
cf_proxy_host_info: Optional[HostDetails] = None
|
|
75
|
+
non_proxy_hosts: Optional[list] = None
|
|
76
|
+
allow_self_signed_support: Optional[bool] = None
|
|
77
|
+
use_system_trust_store: Optional[bool] = None
|
|
78
|
+
enable_arrow: Optional[bool] = None
|
|
79
|
+
enable_direct_results: Optional[bool] = None
|
|
80
|
+
enable_sea_hybrid_results: Optional[bool] = None
|
|
81
|
+
http_connection_pool_size: Optional[int] = None
|
|
82
|
+
rows_fetched_per_block: Optional[int] = None
|
|
83
|
+
async_poll_interval_millis: Optional[int] = None
|
|
84
|
+
support_many_parameters: Optional[bool] = None
|
|
85
|
+
enable_complex_datatype_support: Optional[bool] = None
|
|
86
|
+
allowed_volume_ingestion_paths: Optional[str] = None
|
|
49
87
|
|
|
50
88
|
|
|
51
89
|
@dataclass
|
|
@@ -111,6 +149,69 @@ class DriverErrorInfo(JsonSerializableMixin):
|
|
|
111
149
|
stack_trace: str
|
|
112
150
|
|
|
113
151
|
|
|
152
|
+
@dataclass
|
|
153
|
+
class ChunkDetails(JsonSerializableMixin):
|
|
154
|
+
"""
|
|
155
|
+
Contains detailed metrics about chunk downloads during result fetching.
|
|
156
|
+
|
|
157
|
+
These metrics are accumulated across all chunk downloads for a single statement.
|
|
158
|
+
|
|
159
|
+
Attributes:
|
|
160
|
+
initial_chunk_latency_millis (int): Latency of the first chunk download
|
|
161
|
+
slowest_chunk_latency_millis (int): Latency of the slowest chunk download
|
|
162
|
+
total_chunks_present (int): Total number of chunks available
|
|
163
|
+
total_chunks_iterated (int): Number of chunks actually downloaded
|
|
164
|
+
sum_chunks_download_time_millis (int): Total time spent downloading all chunks
|
|
165
|
+
"""
|
|
166
|
+
|
|
167
|
+
initial_chunk_latency_millis: Optional[int] = None
|
|
168
|
+
slowest_chunk_latency_millis: Optional[int] = None
|
|
169
|
+
total_chunks_present: Optional[int] = None
|
|
170
|
+
total_chunks_iterated: Optional[int] = None
|
|
171
|
+
sum_chunks_download_time_millis: Optional[int] = None
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
@dataclass
|
|
175
|
+
class ResultLatency(JsonSerializableMixin):
|
|
176
|
+
"""
|
|
177
|
+
Contains latency metrics for different phases of query execution.
|
|
178
|
+
|
|
179
|
+
This tracks two distinct phases:
|
|
180
|
+
1. result_set_ready_latency_millis: Time from query submission until results are available (execute phase)
|
|
181
|
+
- Set when execute() completes
|
|
182
|
+
2. result_set_consumption_latency_millis: Time spent iterating/fetching results (fetch phase)
|
|
183
|
+
- Measured from first fetch call until no more rows available
|
|
184
|
+
- In Java: tracked via markResultSetConsumption(hasNext) method
|
|
185
|
+
- Records start time on first fetch, calculates total on last fetch
|
|
186
|
+
|
|
187
|
+
Attributes:
|
|
188
|
+
result_set_ready_latency_millis (int): Time until query results are ready (execution phase)
|
|
189
|
+
result_set_consumption_latency_millis (int): Time spent fetching/consuming results (fetch phase)
|
|
190
|
+
|
|
191
|
+
"""
|
|
192
|
+
|
|
193
|
+
result_set_ready_latency_millis: Optional[int] = None
|
|
194
|
+
result_set_consumption_latency_millis: Optional[int] = None
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
@dataclass
|
|
198
|
+
class OperationDetail(JsonSerializableMixin):
|
|
199
|
+
"""
|
|
200
|
+
Contains detailed information about the operation being performed.
|
|
201
|
+
|
|
202
|
+
Attributes:
|
|
203
|
+
n_operation_status_calls (int): Number of status polling calls made
|
|
204
|
+
operation_status_latency_millis (int): Total latency of all status calls
|
|
205
|
+
operation_type (str): Specific operation type (e.g., EXECUTE_STATEMENT, LIST_TABLES, CANCEL_STATEMENT)
|
|
206
|
+
is_internal_call (bool): Whether this is an internal driver operation
|
|
207
|
+
"""
|
|
208
|
+
|
|
209
|
+
n_operation_status_calls: Optional[int] = None
|
|
210
|
+
operation_status_latency_millis: Optional[int] = None
|
|
211
|
+
operation_type: Optional[str] = None
|
|
212
|
+
is_internal_call: Optional[bool] = None
|
|
213
|
+
|
|
214
|
+
|
|
114
215
|
@dataclass
|
|
115
216
|
class SqlExecutionEvent(JsonSerializableMixin):
|
|
116
217
|
"""
|
|
@@ -122,7 +223,10 @@ class SqlExecutionEvent(JsonSerializableMixin):
|
|
|
122
223
|
is_compressed (bool): Whether the result is compressed
|
|
123
224
|
execution_result (ExecutionResultFormat): Format of the execution result
|
|
124
225
|
retry_count (int): Number of retry attempts made
|
|
125
|
-
chunk_id (int): ID of the chunk if applicable
|
|
226
|
+
chunk_id (int): ID of the chunk if applicable (used for error tracking)
|
|
227
|
+
chunk_details (ChunkDetails): Aggregated chunk download metrics
|
|
228
|
+
result_latency (ResultLatency): Latency breakdown by execution phase
|
|
229
|
+
operation_detail (OperationDetail): Detailed operation information
|
|
126
230
|
"""
|
|
127
231
|
|
|
128
232
|
statement_type: StatementType
|
|
@@ -130,6 +234,9 @@ class SqlExecutionEvent(JsonSerializableMixin):
|
|
|
130
234
|
execution_result: ExecutionResultFormat
|
|
131
235
|
retry_count: Optional[int]
|
|
132
236
|
chunk_id: Optional[int]
|
|
237
|
+
chunk_details: Optional[ChunkDetails] = None
|
|
238
|
+
result_latency: Optional[ResultLatency] = None
|
|
239
|
+
operation_detail: Optional[OperationDetail] = None
|
|
133
240
|
|
|
134
241
|
|
|
135
242
|
@dataclass
|
|
File without changes
|
{databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/auth/oauth.py
RENAMED
|
File without changes
|
|
File without changes
|
{databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/auth/retry.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/common/http.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/py.typed
RENAMED
|
File without changes
|
{databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/result_set.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/types.py
RENAMED
|
File without changes
|
{databricks_sql_connector-4.1.3 → databricks_sql_connector-4.2.0}/src/databricks/sql/utils.py
RENAMED
|
File without changes
|