databricks-sql-connector 3.3.0__tar.gz → 3.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/CHANGELOG.md +11 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/PKG-INFO +8 -9
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/README.md +5 -7
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/pyproject.toml +2 -2
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/__init__.py +1 -1
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/auth.py +14 -1
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/thrift_http_client.py +25 -16
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/client.py +110 -12
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/cloudfetch/download_manager.py +5 -4
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/cloudfetch/downloader.py +5 -7
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/thrift_backend.py +22 -45
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/types.py +48 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/utils.py +168 -26
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/LICENSE +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/__init__.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/__init__.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/authenticators.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/endpoint.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/oauth.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/oauth_http_handler.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/retry.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/exc.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/experimental/__init__.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/experimental/oauth_persistence.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/parameters/__init__.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/parameters/native.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/parameters/py.typed +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/py.typed +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/thrift_api/TCLIService/TCLIService-remote +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/thrift_api/TCLIService/TCLIService.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/thrift_api/TCLIService/__init__.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/thrift_api/TCLIService/constants.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/thrift_api/TCLIService/ttypes.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/thrift_api/__init__.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/README.sqlalchemy.md +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/README.tests.md +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/__init__.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/_ddl.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/_parse.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/_types.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/base.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/py.typed +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/requirements.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/setup.cfg +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test/_extra.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test/_future.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test/_regression.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test/_unsupported.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test/conftest.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test/overrides/_componentreflectiontest.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test/overrides/_ctetest.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test/test_suite.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test_local/__init__.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test_local/conftest.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test_local/e2e/MOCK_DATA.xlsx +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test_local/e2e/test_basic.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test_local/test_ddl.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test_local/test_parsing.py +0 -0
- {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test_local/test_types.py +0 -0
|
@@ -1,5 +1,16 @@
|
|
|
1
1
|
# Release History
|
|
2
2
|
|
|
3
|
+
# 3.5.0 (2024-10-18)
|
|
4
|
+
|
|
5
|
+
- Create a non pyarrow flow to handle small results for the column set (databricks/databricks-sql-python#440 by @jprakash-db)
|
|
6
|
+
- Fix: On non-retryable error, ensure PySQL includes useful information in error (databricks/databricks-sql-python#447 by @shivam2680)
|
|
7
|
+
|
|
8
|
+
# 3.4.0 (2024-08-27)
|
|
9
|
+
|
|
10
|
+
- Unpin pandas to support v2.2.2 (databricks/databricks-sql-python#416 by @kfollesdal)
|
|
11
|
+
- Make OAuth as the default authenticator if no authentication setting is provided (databricks/databricks-sql-python#419 by @jackyhu-db)
|
|
12
|
+
- Fix (regression): use SSL options with HTTPS connection pool (databricks/databricks-sql-python#425 by @kravets-levko)
|
|
13
|
+
|
|
3
14
|
# 3.3.0 (2024-07-18)
|
|
4
15
|
|
|
5
16
|
- Don't retry requests that fail with HTTP code 401 (databricks/databricks-sql-python#408 by @Hodnebo)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: databricks-sql-connector
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.5.0
|
|
4
4
|
Summary: Databricks SQL Connector for Python
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Author: Databricks
|
|
@@ -13,6 +13,7 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.10
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.11
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
17
|
Provides-Extra: alembic
|
|
17
18
|
Provides-Extra: sqlalchemy
|
|
18
19
|
Requires-Dist: alembic (>=1.0.11,<2.0.0) ; extra == "alembic"
|
|
@@ -21,7 +22,7 @@ Requires-Dist: numpy (>=1.16.6,<2.0.0) ; python_version >= "3.8" and python_vers
|
|
|
21
22
|
Requires-Dist: numpy (>=1.23.4,<2.0.0) ; python_version >= "3.11"
|
|
22
23
|
Requires-Dist: oauthlib (>=3.1.0,<4.0.0)
|
|
23
24
|
Requires-Dist: openpyxl (>=3.0.10,<4.0.0)
|
|
24
|
-
Requires-Dist: pandas (>=1.2.5,<2.
|
|
25
|
+
Requires-Dist: pandas (>=1.2.5,<2.3.0) ; python_version >= "3.8"
|
|
25
26
|
Requires-Dist: pyarrow (>=14.0.1,<17)
|
|
26
27
|
Requires-Dist: requests (>=2.18.1,<3.0.0)
|
|
27
28
|
Requires-Dist: sqlalchemy (>=2.0.21) ; extra == "sqlalchemy" or extra == "alembic"
|
|
@@ -57,12 +58,9 @@ For the latest documentation, see
|
|
|
57
58
|
|
|
58
59
|
Install the library with `pip install databricks-sql-connector`
|
|
59
60
|
|
|
60
|
-
Note: Don't hard-code authentication secrets into your Python. Use environment variables
|
|
61
|
-
|
|
62
61
|
```bash
|
|
63
62
|
export DATABRICKS_HOST=********.databricks.com
|
|
64
63
|
export DATABRICKS_HTTP_PATH=/sql/1.0/endpoints/****************
|
|
65
|
-
export DATABRICKS_TOKEN=dapi********************************
|
|
66
64
|
```
|
|
67
65
|
|
|
68
66
|
Example usage:
|
|
@@ -72,12 +70,10 @@ from databricks import sql
|
|
|
72
70
|
|
|
73
71
|
host = os.getenv("DATABRICKS_HOST")
|
|
74
72
|
http_path = os.getenv("DATABRICKS_HTTP_PATH")
|
|
75
|
-
access_token = os.getenv("DATABRICKS_TOKEN")
|
|
76
73
|
|
|
77
74
|
connection = sql.connect(
|
|
78
75
|
server_hostname=host,
|
|
79
|
-
http_path=http_path
|
|
80
|
-
access_token=access_token)
|
|
76
|
+
http_path=http_path)
|
|
81
77
|
|
|
82
78
|
cursor = connection.cursor()
|
|
83
79
|
cursor.execute('SELECT :param `p`, * FROM RANGE(10)', {"param": "foo"})
|
|
@@ -93,7 +89,10 @@ In the above example:
|
|
|
93
89
|
- `server-hostname` is the Databricks instance host name.
|
|
94
90
|
- `http-path` is the HTTP Path either to a Databricks SQL endpoint (e.g. /sql/1.0/endpoints/1234567890abcdef),
|
|
95
91
|
or to a Databricks Runtime interactive cluster (e.g. /sql/protocolv1/o/1234567890123456/1234-123456-slid123)
|
|
96
|
-
|
|
92
|
+
|
|
93
|
+
> Note: This example uses [Databricks OAuth U2M](https://docs.databricks.com/en/dev-tools/auth/oauth-u2m.html)
|
|
94
|
+
> to authenticate the target Databricks user account and needs to open the browser for authentication. So it
|
|
95
|
+
> can only run on the user's machine.
|
|
97
96
|
|
|
98
97
|
|
|
99
98
|
## Contributing
|
|
@@ -24,12 +24,9 @@ For the latest documentation, see
|
|
|
24
24
|
|
|
25
25
|
Install the library with `pip install databricks-sql-connector`
|
|
26
26
|
|
|
27
|
-
Note: Don't hard-code authentication secrets into your Python. Use environment variables
|
|
28
|
-
|
|
29
27
|
```bash
|
|
30
28
|
export DATABRICKS_HOST=********.databricks.com
|
|
31
29
|
export DATABRICKS_HTTP_PATH=/sql/1.0/endpoints/****************
|
|
32
|
-
export DATABRICKS_TOKEN=dapi********************************
|
|
33
30
|
```
|
|
34
31
|
|
|
35
32
|
Example usage:
|
|
@@ -39,12 +36,10 @@ from databricks import sql
|
|
|
39
36
|
|
|
40
37
|
host = os.getenv("DATABRICKS_HOST")
|
|
41
38
|
http_path = os.getenv("DATABRICKS_HTTP_PATH")
|
|
42
|
-
access_token = os.getenv("DATABRICKS_TOKEN")
|
|
43
39
|
|
|
44
40
|
connection = sql.connect(
|
|
45
41
|
server_hostname=host,
|
|
46
|
-
http_path=http_path
|
|
47
|
-
access_token=access_token)
|
|
42
|
+
http_path=http_path)
|
|
48
43
|
|
|
49
44
|
cursor = connection.cursor()
|
|
50
45
|
cursor.execute('SELECT :param `p`, * FROM RANGE(10)', {"param": "foo"})
|
|
@@ -60,7 +55,10 @@ In the above example:
|
|
|
60
55
|
- `server-hostname` is the Databricks instance host name.
|
|
61
56
|
- `http-path` is the HTTP Path either to a Databricks SQL endpoint (e.g. /sql/1.0/endpoints/1234567890abcdef),
|
|
62
57
|
or to a Databricks Runtime interactive cluster (e.g. /sql/protocolv1/o/1234567890123456/1234-123456-slid123)
|
|
63
|
-
|
|
58
|
+
|
|
59
|
+
> Note: This example uses [Databricks OAuth U2M](https://docs.databricks.com/en/dev-tools/auth/oauth-u2m.html)
|
|
60
|
+
> to authenticate the target Databricks user account and needs to open the browser for authentication. So it
|
|
61
|
+
> can only run on the user's machine.
|
|
64
62
|
|
|
65
63
|
|
|
66
64
|
## Contributing
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "databricks-sql-connector"
|
|
3
|
-
version = "3.
|
|
3
|
+
version = "3.5.0"
|
|
4
4
|
description = "Databricks SQL Connector for Python"
|
|
5
5
|
authors = ["Databricks <databricks-sql-connector-maintainers@databricks.com>"]
|
|
6
6
|
license = "Apache-2.0"
|
|
@@ -12,7 +12,7 @@ include = ["CHANGELOG.md"]
|
|
|
12
12
|
python = "^3.8.0"
|
|
13
13
|
thrift = ">=0.16.0,<0.21.0"
|
|
14
14
|
pandas = [
|
|
15
|
-
{ version = ">=1.2.5,<2.
|
|
15
|
+
{ version = ">=1.2.5,<2.3.0", python = ">=3.8" }
|
|
16
16
|
]
|
|
17
17
|
pyarrow = ">=14.0.1,<17"
|
|
18
18
|
|
{databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/auth.py
RENAMED
|
@@ -64,7 +64,20 @@ def get_auth_provider(cfg: ClientContext):
|
|
|
64
64
|
# no op authenticator. authentication is performed using ssl certificate outside of headers
|
|
65
65
|
return AuthProvider()
|
|
66
66
|
else:
|
|
67
|
-
|
|
67
|
+
if (
|
|
68
|
+
cfg.oauth_redirect_port_range is not None
|
|
69
|
+
and cfg.oauth_client_id is not None
|
|
70
|
+
and cfg.oauth_scopes is not None
|
|
71
|
+
):
|
|
72
|
+
return DatabricksOAuthProvider(
|
|
73
|
+
cfg.hostname,
|
|
74
|
+
cfg.oauth_persistence,
|
|
75
|
+
cfg.oauth_redirect_port_range,
|
|
76
|
+
cfg.oauth_client_id,
|
|
77
|
+
cfg.oauth_scopes,
|
|
78
|
+
)
|
|
79
|
+
else:
|
|
80
|
+
raise RuntimeError("No valid authentication settings!")
|
|
68
81
|
|
|
69
82
|
|
|
70
83
|
PYSQL_OAUTH_SCOPES = ["sql", "offline_access"]
|
|
@@ -1,13 +1,11 @@
|
|
|
1
1
|
import base64
|
|
2
2
|
import logging
|
|
3
3
|
import urllib.parse
|
|
4
|
-
from typing import Dict, Union
|
|
4
|
+
from typing import Dict, Union, Optional
|
|
5
5
|
|
|
6
6
|
import six
|
|
7
7
|
import thrift
|
|
8
8
|
|
|
9
|
-
logger = logging.getLogger(__name__)
|
|
10
|
-
|
|
11
9
|
import ssl
|
|
12
10
|
import warnings
|
|
13
11
|
from http.client import HTTPResponse
|
|
@@ -16,6 +14,9 @@ from io import BytesIO
|
|
|
16
14
|
from urllib3 import HTTPConnectionPool, HTTPSConnectionPool, ProxyManager
|
|
17
15
|
from urllib3.util import make_headers
|
|
18
16
|
from databricks.sql.auth.retry import CommandType, DatabricksRetryPolicy
|
|
17
|
+
from databricks.sql.types import SSLOptions
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
class THttpClient(thrift.transport.THttpClient.THttpClient):
|
|
@@ -25,13 +26,12 @@ class THttpClient(thrift.transport.THttpClient.THttpClient):
|
|
|
25
26
|
uri_or_host,
|
|
26
27
|
port=None,
|
|
27
28
|
path=None,
|
|
28
|
-
|
|
29
|
-
cert_file=None,
|
|
30
|
-
key_file=None,
|
|
31
|
-
ssl_context=None,
|
|
29
|
+
ssl_options: Optional[SSLOptions] = None,
|
|
32
30
|
max_connections: int = 1,
|
|
33
31
|
retry_policy: Union[DatabricksRetryPolicy, int] = 0,
|
|
34
32
|
):
|
|
33
|
+
self._ssl_options = ssl_options
|
|
34
|
+
|
|
35
35
|
if port is not None:
|
|
36
36
|
warnings.warn(
|
|
37
37
|
"Please use the THttpClient('http{s}://host:port/path') constructor",
|
|
@@ -48,13 +48,11 @@ class THttpClient(thrift.transport.THttpClient.THttpClient):
|
|
|
48
48
|
self.scheme = parsed.scheme
|
|
49
49
|
assert self.scheme in ("http", "https")
|
|
50
50
|
if self.scheme == "https":
|
|
51
|
-
self.
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
else ssl_context
|
|
57
|
-
)
|
|
51
|
+
if self._ssl_options is not None:
|
|
52
|
+
# TODO: Not sure if those options are used anywhere - need to double-check
|
|
53
|
+
self.certfile = self._ssl_options.tls_client_cert_file
|
|
54
|
+
self.keyfile = self._ssl_options.tls_client_cert_key_file
|
|
55
|
+
self.context = self._ssl_options.create_ssl_context()
|
|
58
56
|
self.port = parsed.port
|
|
59
57
|
self.host = parsed.hostname
|
|
60
58
|
self.path = parsed.path
|
|
@@ -109,12 +107,23 @@ class THttpClient(thrift.transport.THttpClient.THttpClient):
|
|
|
109
107
|
def open(self):
|
|
110
108
|
|
|
111
109
|
# self.__pool replaces the self.__http used by the original THttpClient
|
|
110
|
+
_pool_kwargs = {"maxsize": self.max_connections}
|
|
111
|
+
|
|
112
112
|
if self.scheme == "http":
|
|
113
113
|
pool_class = HTTPConnectionPool
|
|
114
114
|
elif self.scheme == "https":
|
|
115
115
|
pool_class = HTTPSConnectionPool
|
|
116
|
-
|
|
117
|
-
|
|
116
|
+
_pool_kwargs.update(
|
|
117
|
+
{
|
|
118
|
+
"cert_reqs": ssl.CERT_REQUIRED
|
|
119
|
+
if self._ssl_options.tls_verify
|
|
120
|
+
else ssl.CERT_NONE,
|
|
121
|
+
"ca_certs": self._ssl_options.tls_trusted_ca_file,
|
|
122
|
+
"cert_file": self._ssl_options.tls_client_cert_file,
|
|
123
|
+
"key_file": self._ssl_options.tls_client_cert_key_file,
|
|
124
|
+
"key_password": self._ssl_options.tls_client_cert_key_password,
|
|
125
|
+
}
|
|
126
|
+
)
|
|
118
127
|
|
|
119
128
|
if self.using_proxy():
|
|
120
129
|
proxy_manager = ProxyManager(
|
{databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/client.py
RENAMED
|
@@ -1,7 +1,11 @@
|
|
|
1
1
|
from typing import Dict, Tuple, List, Optional, Any, Union, Sequence
|
|
2
2
|
|
|
3
3
|
import pandas
|
|
4
|
-
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
import pyarrow
|
|
7
|
+
except ImportError:
|
|
8
|
+
pyarrow = None
|
|
5
9
|
import requests
|
|
6
10
|
import json
|
|
7
11
|
import os
|
|
@@ -22,6 +26,8 @@ from databricks.sql.utils import (
|
|
|
22
26
|
ParamEscaper,
|
|
23
27
|
inject_parameters,
|
|
24
28
|
transform_paramstyle,
|
|
29
|
+
ColumnTable,
|
|
30
|
+
ColumnQueue,
|
|
25
31
|
)
|
|
26
32
|
from databricks.sql.parameters.native import (
|
|
27
33
|
DbsqlParameterBase,
|
|
@@ -35,7 +41,7 @@ from databricks.sql.parameters.native import (
|
|
|
35
41
|
)
|
|
36
42
|
|
|
37
43
|
|
|
38
|
-
from databricks.sql.types import Row
|
|
44
|
+
from databricks.sql.types import Row, SSLOptions
|
|
39
45
|
from databricks.sql.auth.auth import get_python_sql_connector_auth_provider
|
|
40
46
|
from databricks.sql.experimental.oauth_persistence import OAuthPersistence
|
|
41
47
|
|
|
@@ -96,7 +102,7 @@ class Connection:
|
|
|
96
102
|
sanitise parameterized inputs to prevent SQL injection. The inline parameter approach is maintained for
|
|
97
103
|
legacy purposes and will be deprecated in a future release. When this parameter is `True` you will see
|
|
98
104
|
a warning log message. To suppress this log message, set `use_inline_params="silent"`.
|
|
99
|
-
auth_type: `str`, optional
|
|
105
|
+
auth_type: `str`, optional (default is databricks-oauth if neither `access_token` nor `tls_client_cert_file` is set)
|
|
100
106
|
`databricks-oauth` : to use Databricks OAuth with fine-grained permission scopes, set to `databricks-oauth`.
|
|
101
107
|
`azure-oauth` : to use Microsoft Entra ID OAuth flow, set to `azure-oauth`.
|
|
102
108
|
|
|
@@ -178,8 +184,9 @@ class Connection:
|
|
|
178
184
|
# _tls_trusted_ca_file
|
|
179
185
|
# Set to the path of the file containing trusted CA certificates for server certificate
|
|
180
186
|
# verification. If not provide, uses system truststore.
|
|
181
|
-
# _tls_client_cert_file, _tls_client_cert_key_file
|
|
187
|
+
# _tls_client_cert_file, _tls_client_cert_key_file, _tls_client_cert_key_password
|
|
182
188
|
# Set client SSL certificate.
|
|
189
|
+
# See https://docs.python.org/3/library/ssl.html#ssl.SSLContext.load_cert_chain
|
|
183
190
|
# _retry_stop_after_attempts_count
|
|
184
191
|
# The maximum number of attempts during a request retry sequence (defaults to 24)
|
|
185
192
|
# _socket_timeout
|
|
@@ -220,12 +227,25 @@ class Connection:
|
|
|
220
227
|
|
|
221
228
|
base_headers = [("User-Agent", useragent_header)]
|
|
222
229
|
|
|
230
|
+
self._ssl_options = SSLOptions(
|
|
231
|
+
# Double negation is generally a bad thing, but we have to keep backward compatibility
|
|
232
|
+
tls_verify=not kwargs.get(
|
|
233
|
+
"_tls_no_verify", False
|
|
234
|
+
), # by default - verify cert and host
|
|
235
|
+
tls_verify_hostname=kwargs.get("_tls_verify_hostname", True),
|
|
236
|
+
tls_trusted_ca_file=kwargs.get("_tls_trusted_ca_file"),
|
|
237
|
+
tls_client_cert_file=kwargs.get("_tls_client_cert_file"),
|
|
238
|
+
tls_client_cert_key_file=kwargs.get("_tls_client_cert_key_file"),
|
|
239
|
+
tls_client_cert_key_password=kwargs.get("_tls_client_cert_key_password"),
|
|
240
|
+
)
|
|
241
|
+
|
|
223
242
|
self.thrift_backend = ThriftBackend(
|
|
224
243
|
self.host,
|
|
225
244
|
self.port,
|
|
226
245
|
http_path,
|
|
227
246
|
(http_headers or []) + base_headers,
|
|
228
247
|
auth_provider,
|
|
248
|
+
ssl_options=self._ssl_options,
|
|
229
249
|
_use_arrow_native_complex_types=_use_arrow_native_complex_types,
|
|
230
250
|
**kwargs,
|
|
231
251
|
)
|
|
@@ -977,14 +997,14 @@ class Cursor:
|
|
|
977
997
|
else:
|
|
978
998
|
raise Error("There is no active result set")
|
|
979
999
|
|
|
980
|
-
def fetchall_arrow(self) -> pyarrow.Table:
|
|
1000
|
+
def fetchall_arrow(self) -> "pyarrow.Table":
|
|
981
1001
|
self._check_not_closed()
|
|
982
1002
|
if self.active_result_set:
|
|
983
1003
|
return self.active_result_set.fetchall_arrow()
|
|
984
1004
|
else:
|
|
985
1005
|
raise Error("There is no active result set")
|
|
986
1006
|
|
|
987
|
-
def fetchmany_arrow(self, size) -> pyarrow.Table:
|
|
1007
|
+
def fetchmany_arrow(self, size) -> "pyarrow.Table":
|
|
988
1008
|
self._check_not_closed()
|
|
989
1009
|
if self.active_result_set:
|
|
990
1010
|
return self.active_result_set.fetchmany_arrow(size)
|
|
@@ -1129,6 +1149,18 @@ class ResultSet:
|
|
|
1129
1149
|
self.results = results
|
|
1130
1150
|
self.has_more_rows = has_more_rows
|
|
1131
1151
|
|
|
1152
|
+
def _convert_columnar_table(self, table):
|
|
1153
|
+
column_names = [c[0] for c in self.description]
|
|
1154
|
+
ResultRow = Row(*column_names)
|
|
1155
|
+
result = []
|
|
1156
|
+
for row_index in range(table.num_rows):
|
|
1157
|
+
curr_row = []
|
|
1158
|
+
for col_index in range(table.num_columns):
|
|
1159
|
+
curr_row.append(table.get_item(col_index, row_index))
|
|
1160
|
+
result.append(ResultRow(*curr_row))
|
|
1161
|
+
|
|
1162
|
+
return result
|
|
1163
|
+
|
|
1132
1164
|
def _convert_arrow_table(self, table):
|
|
1133
1165
|
column_names = [c[0] for c in self.description]
|
|
1134
1166
|
ResultRow = Row(*column_names)
|
|
@@ -1164,14 +1196,14 @@ class ResultSet:
|
|
|
1164
1196
|
timestamp_as_object=True,
|
|
1165
1197
|
)
|
|
1166
1198
|
|
|
1167
|
-
res = df.to_numpy(na_value=None)
|
|
1199
|
+
res = df.to_numpy(na_value=None, dtype="object")
|
|
1168
1200
|
return [ResultRow(*v) for v in res]
|
|
1169
1201
|
|
|
1170
1202
|
@property
|
|
1171
1203
|
def rownumber(self):
|
|
1172
1204
|
return self._next_row_index
|
|
1173
1205
|
|
|
1174
|
-
def fetchmany_arrow(self, size: int) -> pyarrow.Table:
|
|
1206
|
+
def fetchmany_arrow(self, size: int) -> "pyarrow.Table":
|
|
1175
1207
|
"""
|
|
1176
1208
|
Fetch the next set of rows of a query result, returning a PyArrow table.
|
|
1177
1209
|
|
|
@@ -1196,7 +1228,49 @@ class ResultSet:
|
|
|
1196
1228
|
|
|
1197
1229
|
return results
|
|
1198
1230
|
|
|
1199
|
-
def
|
|
1231
|
+
def merge_columnar(self, result1, result2):
|
|
1232
|
+
"""
|
|
1233
|
+
Function to merge / combining the columnar results into a single result
|
|
1234
|
+
:param result1:
|
|
1235
|
+
:param result2:
|
|
1236
|
+
:return:
|
|
1237
|
+
"""
|
|
1238
|
+
|
|
1239
|
+
if result1.column_names != result2.column_names:
|
|
1240
|
+
raise ValueError("The columns in the results don't match")
|
|
1241
|
+
|
|
1242
|
+
merged_result = [
|
|
1243
|
+
result1.column_table[i] + result2.column_table[i]
|
|
1244
|
+
for i in range(result1.num_columns)
|
|
1245
|
+
]
|
|
1246
|
+
return ColumnTable(merged_result, result1.column_names)
|
|
1247
|
+
|
|
1248
|
+
def fetchmany_columnar(self, size: int):
|
|
1249
|
+
"""
|
|
1250
|
+
Fetch the next set of rows of a query result, returning a Columnar Table.
|
|
1251
|
+
An empty sequence is returned when no more rows are available.
|
|
1252
|
+
"""
|
|
1253
|
+
if size < 0:
|
|
1254
|
+
raise ValueError("size argument for fetchmany is %s but must be >= 0", size)
|
|
1255
|
+
|
|
1256
|
+
results = self.results.next_n_rows(size)
|
|
1257
|
+
n_remaining_rows = size - results.num_rows
|
|
1258
|
+
self._next_row_index += results.num_rows
|
|
1259
|
+
|
|
1260
|
+
while (
|
|
1261
|
+
n_remaining_rows > 0
|
|
1262
|
+
and not self.has_been_closed_server_side
|
|
1263
|
+
and self.has_more_rows
|
|
1264
|
+
):
|
|
1265
|
+
self._fill_results_buffer()
|
|
1266
|
+
partial_results = self.results.next_n_rows(n_remaining_rows)
|
|
1267
|
+
results = self.merge_columnar(results, partial_results)
|
|
1268
|
+
n_remaining_rows -= partial_results.num_rows
|
|
1269
|
+
self._next_row_index += partial_results.num_rows
|
|
1270
|
+
|
|
1271
|
+
return results
|
|
1272
|
+
|
|
1273
|
+
def fetchall_arrow(self) -> "pyarrow.Table":
|
|
1200
1274
|
"""Fetch all (remaining) rows of a query result, returning them as a PyArrow table."""
|
|
1201
1275
|
results = self.results.remaining_rows()
|
|
1202
1276
|
self._next_row_index += results.num_rows
|
|
@@ -1209,12 +1283,30 @@ class ResultSet:
|
|
|
1209
1283
|
|
|
1210
1284
|
return results
|
|
1211
1285
|
|
|
1286
|
+
def fetchall_columnar(self):
|
|
1287
|
+
"""Fetch all (remaining) rows of a query result, returning them as a Columnar table."""
|
|
1288
|
+
results = self.results.remaining_rows()
|
|
1289
|
+
self._next_row_index += results.num_rows
|
|
1290
|
+
|
|
1291
|
+
while not self.has_been_closed_server_side and self.has_more_rows:
|
|
1292
|
+
self._fill_results_buffer()
|
|
1293
|
+
partial_results = self.results.remaining_rows()
|
|
1294
|
+
results = self.merge_columnar(results, partial_results)
|
|
1295
|
+
self._next_row_index += partial_results.num_rows
|
|
1296
|
+
|
|
1297
|
+
return results
|
|
1298
|
+
|
|
1212
1299
|
def fetchone(self) -> Optional[Row]:
|
|
1213
1300
|
"""
|
|
1214
1301
|
Fetch the next row of a query result set, returning a single sequence,
|
|
1215
1302
|
or None when no more data is available.
|
|
1216
1303
|
"""
|
|
1217
|
-
|
|
1304
|
+
|
|
1305
|
+
if isinstance(self.results, ColumnQueue):
|
|
1306
|
+
res = self._convert_columnar_table(self.fetchmany_columnar(1))
|
|
1307
|
+
else:
|
|
1308
|
+
res = self._convert_arrow_table(self.fetchmany_arrow(1))
|
|
1309
|
+
|
|
1218
1310
|
if len(res) > 0:
|
|
1219
1311
|
return res[0]
|
|
1220
1312
|
else:
|
|
@@ -1224,7 +1316,10 @@ class ResultSet:
|
|
|
1224
1316
|
"""
|
|
1225
1317
|
Fetch all (remaining) rows of a query result, returning them as a list of rows.
|
|
1226
1318
|
"""
|
|
1227
|
-
|
|
1319
|
+
if isinstance(self.results, ColumnQueue):
|
|
1320
|
+
return self._convert_columnar_table(self.fetchall_columnar())
|
|
1321
|
+
else:
|
|
1322
|
+
return self._convert_arrow_table(self.fetchall_arrow())
|
|
1228
1323
|
|
|
1229
1324
|
def fetchmany(self, size: int) -> List[Row]:
|
|
1230
1325
|
"""
|
|
@@ -1232,7 +1327,10 @@ class ResultSet:
|
|
|
1232
1327
|
|
|
1233
1328
|
An empty sequence is returned when no more rows are available.
|
|
1234
1329
|
"""
|
|
1235
|
-
|
|
1330
|
+
if isinstance(self.results, ColumnQueue):
|
|
1331
|
+
return self._convert_columnar_table(self.fetchmany_columnar(size))
|
|
1332
|
+
else:
|
|
1333
|
+
return self._convert_arrow_table(self.fetchmany_arrow(size))
|
|
1236
1334
|
|
|
1237
1335
|
def close(self) -> None:
|
|
1238
1336
|
"""
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
|
|
3
|
-
from ssl import SSLContext
|
|
4
3
|
from concurrent.futures import ThreadPoolExecutor, Future
|
|
5
4
|
from typing import List, Union
|
|
6
5
|
|
|
@@ -9,6 +8,8 @@ from databricks.sql.cloudfetch.downloader import (
|
|
|
9
8
|
DownloadableResultSettings,
|
|
10
9
|
DownloadedFile,
|
|
11
10
|
)
|
|
11
|
+
from databricks.sql.types import SSLOptions
|
|
12
|
+
|
|
12
13
|
from databricks.sql.thrift_api.TCLIService.ttypes import TSparkArrowResultLink
|
|
13
14
|
|
|
14
15
|
logger = logging.getLogger(__name__)
|
|
@@ -20,7 +21,7 @@ class ResultFileDownloadManager:
|
|
|
20
21
|
links: List[TSparkArrowResultLink],
|
|
21
22
|
max_download_threads: int,
|
|
22
23
|
lz4_compressed: bool,
|
|
23
|
-
|
|
24
|
+
ssl_options: SSLOptions,
|
|
24
25
|
):
|
|
25
26
|
self._pending_links: List[TSparkArrowResultLink] = []
|
|
26
27
|
for link in links:
|
|
@@ -38,7 +39,7 @@ class ResultFileDownloadManager:
|
|
|
38
39
|
self._thread_pool = ThreadPoolExecutor(max_workers=self._max_download_threads)
|
|
39
40
|
|
|
40
41
|
self._downloadable_result_settings = DownloadableResultSettings(lz4_compressed)
|
|
41
|
-
self.
|
|
42
|
+
self._ssl_options = ssl_options
|
|
42
43
|
|
|
43
44
|
def get_next_downloaded_file(
|
|
44
45
|
self, next_row_offset: int
|
|
@@ -95,7 +96,7 @@ class ResultFileDownloadManager:
|
|
|
95
96
|
handler = ResultSetDownloadHandler(
|
|
96
97
|
settings=self._downloadable_result_settings,
|
|
97
98
|
link=link,
|
|
98
|
-
|
|
99
|
+
ssl_options=self._ssl_options,
|
|
99
100
|
)
|
|
100
101
|
task = self._thread_pool.submit(handler.run)
|
|
101
102
|
self._download_tasks.append(task)
|
|
@@ -3,13 +3,12 @@ from dataclasses import dataclass
|
|
|
3
3
|
|
|
4
4
|
import requests
|
|
5
5
|
from requests.adapters import HTTPAdapter, Retry
|
|
6
|
-
from ssl import SSLContext, CERT_NONE
|
|
7
6
|
import lz4.frame
|
|
8
7
|
import time
|
|
9
8
|
|
|
10
9
|
from databricks.sql.thrift_api.TCLIService.ttypes import TSparkArrowResultLink
|
|
11
|
-
|
|
12
10
|
from databricks.sql.exc import Error
|
|
11
|
+
from databricks.sql.types import SSLOptions
|
|
13
12
|
|
|
14
13
|
logger = logging.getLogger(__name__)
|
|
15
14
|
|
|
@@ -66,11 +65,11 @@ class ResultSetDownloadHandler:
|
|
|
66
65
|
self,
|
|
67
66
|
settings: DownloadableResultSettings,
|
|
68
67
|
link: TSparkArrowResultLink,
|
|
69
|
-
|
|
68
|
+
ssl_options: SSLOptions,
|
|
70
69
|
):
|
|
71
70
|
self.settings = settings
|
|
72
71
|
self.link = link
|
|
73
|
-
self.
|
|
72
|
+
self._ssl_options = ssl_options
|
|
74
73
|
|
|
75
74
|
def run(self) -> DownloadedFile:
|
|
76
75
|
"""
|
|
@@ -95,14 +94,13 @@ class ResultSetDownloadHandler:
|
|
|
95
94
|
session.mount("http://", HTTPAdapter(max_retries=retryPolicy))
|
|
96
95
|
session.mount("https://", HTTPAdapter(max_retries=retryPolicy))
|
|
97
96
|
|
|
98
|
-
ssl_verify = self._ssl_context.verify_mode != CERT_NONE
|
|
99
|
-
|
|
100
97
|
try:
|
|
101
98
|
# Get the file via HTTP request
|
|
102
99
|
response = session.get(
|
|
103
100
|
self.link.fileLink,
|
|
104
101
|
timeout=self.settings.download_timeout,
|
|
105
|
-
verify=
|
|
102
|
+
verify=self._ssl_options.tls_verify,
|
|
103
|
+
# TODO: Pass cert from `self._ssl_options`
|
|
106
104
|
)
|
|
107
105
|
response.raise_for_status()
|
|
108
106
|
|